5
5
6
6
#include " group_beam_searcher.hpp"
7
7
#include " openvino/openvino.hpp"
8
+ #include < iostream>
9
+ #include < fstream>
8
10
9
11
namespace {
10
12
std::pair<ov::Tensor, ov::Tensor> tokenize (ov::InferRequest& tokenizer, std::string&& prompt) {
@@ -39,15 +41,27 @@ int main(int argc, char* argv[]) try {
39
41
// Compile models
40
42
ov::Core core;
41
43
core.add_extension (OPENVINO_TOKENIZERS_PATH); // OPENVINO_TOKENIZERS_PATH is defined in CMakeLists.txt
44
+ auto tokenizer_model = core.read_model (std::string{argv[1 ]} + " /openvino_tokenizer.xml" );
42
45
// tokenizer and detokenizer work on CPU only
43
46
ov::InferRequest tokenizer =
44
- core.compile_model (std::string{argv[ 1 ]} + " /openvino_tokenizer.xml " , " CPU" ).create_infer_request ();
47
+ core.compile_model (tokenizer_model , " CPU" ).create_infer_request ();
45
48
ov::InferRequest detokenizer =
46
49
core.compile_model (std::string{argv[1 ]} + " /openvino_detokenizer.xml" , " CPU" ).create_infer_request ();
47
50
// The model can be compiled for GPU as well
48
51
ov::InferRequest lm =
49
52
core.compile_model (std::string{argv[1 ]} + " /openvino_model.xml" , " CPU" ).create_infer_request ();
50
53
54
+ // Get the runtime info from the tokenizer model that we read earlier
55
+ auto rt_info = tokenizer_model->get_rt_info (); // Get the runtime info for the model
56
+ int64_t SPECIAL_EOS_TOKEN;
57
+
58
+ if (rt_info.count (" eos_token_id" ) > 0 ) { // check if the runtime information has a valid EOS token ID
59
+ SPECIAL_EOS_TOKEN = rt_info[" eos_token_id" ].as <int64_t >();
60
+
61
+ } else {
62
+ throw std::runtime_error (" EOS token ID not found in model's runtime information." );
63
+ }
64
+
51
65
int64_t total_positions = 0 ;
52
66
int32_t global_beam_idx = 0 ;
53
67
std::string prompt;
@@ -84,12 +98,13 @@ int main(int argc, char* argv[]) try {
84
98
lm.set_tensor (" beam_idx" , ov::Tensor{ov::element::i32 , {1 }, &global_beam_idx});
85
99
86
100
const int64_t * prompt_data = input_ids.data <const int64_t >();
87
- Parameters parameters{std::vector< int64_t >{ prompt_data, prompt_data + input_ids.get_size ()}};
101
+ Parameters parameters{{{ prompt_data, prompt_data + input_ids.get_size ()}}, SPECIAL_EOS_TOKEN };
88
102
GroupBeamSearcher group_beam_searcher{parameters};
89
103
std::vector<int64_t > next_tokens;
90
104
std::vector<int32_t > next_beams;
105
+ lm.infer ();
106
+
91
107
for (size_t length_count = 0 ; length_count < parameters.max_new_tokens ; ++length_count) {
92
- lm.infer ();
93
108
std::tie (next_tokens, next_beams) = group_beam_searcher.select_next_tokens (lm.get_tensor (" logits" ));
94
109
if (next_tokens.empty ()) {
95
110
break ;
@@ -105,11 +120,13 @@ int main(int argc, char* argv[]) try {
105
120
std::fill_n (attention_mask.data <int64_t >(), ov::shape_size (mask_shape), 1 );
106
121
lm.get_tensor (" position_ids" ).set_shape ({batch_size, 1 });
107
122
std::fill_n (lm.get_tensor (" position_ids" ).data <int64_t >(), batch_size, total_positions++);
123
+ lm.infer ();
108
124
}
109
125
110
126
Beam answer;
111
- float highest_score = std::numeric_limits<float >().min ();
112
- for (const std::vector<Beam>& group : finalize (std::move (group_beam_searcher))) {
127
+ float highest_score = std::numeric_limits<float >().lowest ();
128
+ auto all_groups = finalize (std::move (group_beam_searcher));
129
+ for (const std::vector<Beam>& group : all_groups[0 ]) {
113
130
for (const Beam& beam : group) {
114
131
if (beam.score > highest_score) {
115
132
highest_score = beam.score ;
@@ -119,7 +136,7 @@ int main(int argc, char* argv[]) try {
119
136
}
120
137
121
138
auto answer_str = detokenize (detokenizer, answer.tokens );
122
- answer_str = answer_str.substr (0 , answer_str.find (" <eos>" ));
139
+ // answer_str = answer_str.substr(0, answer_str.find("<eos>"));
123
140
std::cout << " Answer: " << answer_str << " \n _______\n " ;
124
141
global_beam_idx = answer.global_beam_idx ;
125
142
0 commit comments