Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add max_lengh parametrisation to encode #1518

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/cpp/include/openvino/genai/tokenizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,9 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
/**
* @brief encode a single prompt
* @param prompt std::string with input prompt
* @param properties tokenization properties, e.g. ov::genai::add_special_tokens(false)
* @param add_special_tokens whether to add special tokens
* @param max_length maximum length to which output will be padded or truncated
* @param padding_mode whether to pad result, allowed values are ["truncate", "longest", "max_length", "do_not_pad"]
* @return pair of [input_ids, attention_mask]
*/
template <typename... Properties>
Expand All @@ -136,7 +138,9 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
/**
* @brief encode batch of prompts. Left padding will be applied by default
* @param prompts vector storing batch of prompts
* @param properties tokenization properties, e.g. ov::genai::add_special_tokens(false)
* @param add_special_tokens whether to add special tokens
* @param max_length maximum length to which output will be padded or truncated
* @param padding_mode whether to pad result, allowed values are ["truncate", "pad"]
* @return pair of [input_ids, attention_mask]
*/
template <typename... Properties>
Expand Down Expand Up @@ -240,6 +244,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {

static constexpr ov::Property<bool> add_special_tokens{"add_special_tokens"};
static constexpr ov::Property<bool> skip_special_tokens{"skip_special_tokens"};
static constexpr ov::Property<std::string> padding_mode{"padding_mode"};

} // namespace genai
} // namespace ov
59 changes: 51 additions & 8 deletions src/cpp/src/tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ class Tokenizer::TokenizerImpl {
// this flag holds the current state value of the CompiledModel.
bool m_add_special_tokens = true;
bool m_skip_special_tokens = true;
int m_max_pad_length = std::numeric_limits<int>::max();
int m_max_trunc_length = std::numeric_limits<int>::max();
bool m_older_than_24_5 = false;

int64_t m_pad_token_id = -1;
Expand All @@ -109,17 +111,47 @@ class Tokenizer::TokenizerImpl {

std::string m_chat_template = {};

std::pair<int, int> get_padding_values(std::string padding_mode, size_t max_length) {
if (padding_mode == "truncate") {
return {max_length, std::numeric_limits<int32_t>::max()};
} else if (padding_mode == "longest") {
return {std::numeric_limits<int32_t>::max(), std::numeric_limits<int32_t>::max()};
} else if (padding_mode == "max_length") {
return {std::numeric_limits<int32_t>::max(), max_length};
} else if (padding_mode == "do_not_pad") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is string, but not enum?

// bahves exactly as longest
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// bahves exactly as longest
// behaves exactly as longest

// TODO: need to find a way to disable padding automatically so that it will match to HF.
return {std::numeric_limits<int32_t>::max(), std::numeric_limits<int32_t>::max()};
} else {
OPENVINO_THROW("Unknown padding mode: " + padding_mode);
}
}

void set_state_if_necessary(CircularBufferQueueElementGuard<ov::InferRequest>& infer_request_guard, const ov::AnyMap& params) {
bool add_special_tokens_flag = m_add_special_tokens;
bool skip_special_tokens_flag = m_skip_special_tokens;

size_t max_length_val;
std::string padding_mode_val;

ov::genai::utils::read_anymap_param(params, add_special_tokens.name(), add_special_tokens_flag);
ov::genai::utils::read_anymap_param(params, skip_special_tokens.name(), skip_special_tokens_flag);
ov::genai::utils::read_anymap_param(params, padding_mode.name(), padding_mode_val);
ov::genai::utils::read_anymap_param(params, max_length.name(), max_length_val);

int max_trunc_length_val = m_max_trunc_length;
int max_pad_length_val = m_max_pad_length;

if (!padding_mode_val.empty()) {
std::tie(max_trunc_length_val, max_pad_length_val) = get_padding_values(padding_mode_val, max_length_val);
}

// If user requested add_special_tokens mode different from the current one,
// need to set state variable.
// If requested mode matches the stored state set, then don't touch states.
if (add_special_tokens_flag == m_add_special_tokens && skip_special_tokens_flag == m_skip_special_tokens) {
if (add_special_tokens_flag == m_add_special_tokens
&& skip_special_tokens_flag == m_skip_special_tokens
&& max_trunc_length_val == m_max_trunc_length
&& max_pad_length_val == m_max_pad_length) {
return;
}
if (m_older_than_24_5) {
Expand All @@ -137,15 +169,26 @@ class Tokenizer::TokenizerImpl {
ov::Tensor skip_special_tensor = ov::Tensor(ov::element::i32, {1});
*skip_special_tensor.data<int>() = skip_special_tokens_flag;

ov::Tensor max_trunc_length_tensor = ov::Tensor(ov::element::i32, {1});
*max_trunc_length_tensor.data<int>() = max_trunc_length_val;
ov::Tensor max_pad_length_tensor = ov::Tensor(ov::element::i32, {1});
*max_pad_length_tensor.data<int>() = max_pad_length_val;

for (auto& state: infer_request_guard.get().query_state()) {
if (state.get_name().find(ov::genai::ADD_SPECIAL_TOKENS_VAR_ID) != std::string::npos) {
if (state.get_name().find(add_special_tokens.name()) != std::string::npos) {
state.set_state(add_special_tensor);
} else if (state.get_name().find(ov::genai::SKIP_SPECIAL_TOKENS_VAR_ID) != std::string::npos) {
} else if (state.get_name().find(skip_special_tokens.name()) != std::string::npos) {
state.set_state(skip_special_tensor);
} else if (state.get_name().find("max_trunc_length") != std::string::npos) {
state.set_state(max_trunc_length_tensor);
} else if (state.get_name().find("max_pad_length") != std::string::npos) {
state.set_state(max_pad_length_tensor);
}
}
m_add_special_tokens = add_special_tokens_flag;
m_skip_special_tokens = skip_special_tokens_flag;
m_max_trunc_length = max_trunc_length_val;
m_max_pad_length = max_pad_length_val;
}

TokenizerImpl(const std::filesystem::path& models_path, const ov::AnyMap& properties) {
Expand Down Expand Up @@ -625,22 +668,22 @@ Tokenizer::Tokenizer(const std::string& model_str, ov::Tensor& weights_tensor, c
}

TokenizedInputs Tokenizer::encode(const std::string prompt, const ov::AnyMap& tokenization_params) {
check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()});
check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(), ov::genai::max_length.name(), ov::genai::padding_mode.name()});
return m_pimpl->encode(std::move(prompt), tokenization_params);
}

TokenizedInputs Tokenizer::encode(std::vector<std::string>& prompts, const ov::AnyMap& tokenization_params) {
check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()});
check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(), ov::genai::max_length.name(), ov::genai::padding_mode.name()});
return m_pimpl->encode(prompts, tokenization_params);
}

TokenizedInputs Tokenizer::encode(std::vector<std::string>&& prompts, const ov::AnyMap& tokenization_params) {
check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()});
check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(), ov::genai::max_length.name(), ov::genai::padding_mode.name()});
return m_pimpl->encode(prompts, tokenization_params);
}

TokenizedInputs Tokenizer::encode(std::initializer_list<std::string>& text, const ov::AnyMap& tokenization_params) {
check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()});
check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(), ov::genai::max_length.name(), ov::genai::padding_mode.name()});
return encode(std::vector<std::string>(text.begin(), text.end()), tokenization_params);
}

Expand Down
4 changes: 2 additions & 2 deletions src/python/openvino_genai/py_openvino_genai.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1656,12 +1656,12 @@ class Tokenizer:
Decode a batch of tokens into a list of string prompt.
"""
@typing.overload
def encode(self, prompts: list[str], add_special_tokens: bool = True) -> TokenizedInputs:
def encode(self, prompts: list[str], add_special_tokens: bool = True, max_length: int = 2147483647, padding_mode: str = 'truncate') -> TokenizedInputs:
"""
Encodes a list of prompts into tokenized inputs.
"""
@typing.overload
def encode(self, prompt: str, add_special_tokens: bool = True) -> TokenizedInputs:
def encode(self, prompt: str, add_special_tokens: bool = True, max_length: int = 2147483647, padding_mode: str = 'truncate') -> TokenizedInputs:
"""
Encodes a single prompt into tokenized input.
"""
Expand Down
20 changes: 17 additions & 3 deletions src/python/py_tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,21 +44,35 @@ void init_tokenizer(py::module_& m) {
return std::make_unique<ov::genai::Tokenizer>(tokenizer_path, kwargs_properties);
}), py::arg("tokenizer_path"), py::arg("properties") = ov::AnyMap({}))

.def("encode", [](Tokenizer& tok, std::vector<std::string>& prompts, bool add_special_tokens) {
.def("encode", [](Tokenizer& tok, std::vector<std::string>& prompts,
bool add_special_tokens,
size_t max_length,
std::string padding_mode) {
ov::AnyMap tokenization_params;
tokenization_params[ov::genai::add_special_tokens.name()] = add_special_tokens;
tokenization_params[ov::genai::max_length.name()] = max_length;
tokenization_params[ov::genai::padding_mode.name()] = padding_mode;
return tok.encode(prompts, tokenization_params);
},
py::arg("prompts"),
py::arg("add_special_tokens") = true,
py::arg("max_length") = std::numeric_limits<int>::max(),
py::arg("padding_mode") = "truncate",
R"(Encodes a list of prompts into tokenized inputs.)")

.def("encode", [](Tokenizer& tok, const std::string prompt, bool add_special_tokens) {
.def("encode", [](Tokenizer& tok, const std::string prompt,
bool add_special_tokens,
size_t max_length,
std::string padding_mode) {
ov::AnyMap tokenization_params;
tokenization_params[ov::genai::add_special_tokens.name()] = add_special_tokens;
tokenization_params[ov::genai::max_length.name()] = max_length;
tokenization_params[ov::genai::padding_mode.name()] = padding_mode;
return tok.encode(prompt, tokenization_params);
},
py::arg("prompt"), py::arg("add_special_tokens") = true,
py::arg("prompt"), py::arg("add_special_tokens") = true,
py::arg("max_length") = std::numeric_limits<int>::max(),
py::arg("padding_mode") = "truncate",
R"(Encodes a single prompt into tokenized input.)")

.def(
Expand Down
20 changes: 20 additions & 0 deletions tests/python_tests/test_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,26 @@ def test_encode_decode_with_special_tokens_option(add_special_tokens, skip_speci
decoded_hf = hf_tokenizer.decode(hf_res[0], skip_special_tokens=skip_special_tokens)
assert decoded_genai == decoded_hf

prompts = [
['1+1=', 'What is the previous answer?']
]
@pytest.mark.precommit
@pytest.mark.nightly
@pytest.mark.parametrize("add_special_tokens", [True, False])
@pytest.mark.parametrize("max_length", [10, 16, 64, 512])
@pytest.mark.parametrize("pad_mode", ["truncate", "longest", "max_length", "do_not_pad"])
@pytest.mark.parametrize("prompt", prompts)
def test_padding(add_special_tokens, max_length, pad_mode, prompt):
import numpy as np
model_descr = get_chat_models_list()[0]
model_id, path, hf_tokenizer, model_opt, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))
genai_tokenzier = ov_pipe.get_tokenizer()

# Calling encode with 'add_special_tokens' will set state flag.
ov_res = genai_tokenzier.encode(prompt, add_special_tokens=add_special_tokens, max_length=max_length, padding_mode=pad_mode).input_ids.data
hf_res = hf_tokenizer(prompt, return_tensors="np", add_special_tokens=add_special_tokens, max_length=max_length, padding=pad_mode)["input_ids"]
assert np.all(ov_res == hf_res)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need to test attention mask as well?
is attention_mask's tail filled with zeros in case of padding?



@pytest.mark.precommit
@pytest.mark.nightly
Expand Down
Loading