Fixes

olpipi · olpipi · commit c7b1e9e45340 · 2024-04-24T16:27:36.000+04:00
diff --git a/text_generation/causal_lm/cpp/chat_model_lm.cpp b/text_generation/causal_lm/cpp/chat_model_lm.cpp
@@ -5,6 +5,8 @@
 
 #include "group_beam_searcher.hpp"
 #include "openvino/openvino.hpp"
+#include <iostream>
+#include <fstream>
 
 namespace {
 std::pair<ov::Tensor, ov::Tensor> tokenize(ov::InferRequest& tokenizer, std::string&& prompt) {
@@ -39,15 +41,27 @@ int main(int argc, char* argv[]) try {
     // Compile models
     ov::Core core;
     core.add_extension(OPENVINO_TOKENIZERS_PATH);  // OPENVINO_TOKENIZERS_PATH is defined in CMakeLists.txt
+    auto tokenizer_model = core.read_model(std::string{argv[1]} + "/openvino_tokenizer.xml");
     // tokenizer and detokenizer work on CPU only
     ov::InferRequest tokenizer =
-        core.compile_model(std::string{argv[1]} + "/openvino_tokenizer.xml", "CPU").create_infer_request();
+        core.compile_model(tokenizer_model, "CPU").create_infer_request();
     ov::InferRequest detokenizer =
         core.compile_model(std::string{argv[1]} + "/openvino_detokenizer.xml", "CPU").create_infer_request();
     // The model can be compiled for GPU as well
     ov::InferRequest lm =
         core.compile_model(std::string{argv[1]} + "/openvino_model.xml", "CPU").create_infer_request();
 
+    // Get the runtime info from the tokenizer model that we read earlier
+    auto rt_info = tokenizer_model->get_rt_info(); //Get the runtime info for the model
+    int64_t SPECIAL_EOS_TOKEN;
+
+    if (rt_info.count("eos_token_id") > 0) { //check if the runtime information has a valid EOS token ID
+        SPECIAL_EOS_TOKEN = rt_info["eos_token_id"].as<int64_t>();
+
+    } else {
+        throw std::runtime_error("EOS token ID not found in model's runtime information.");
+    }
+
     int64_t total_positions = 0;
     int32_t global_beam_idx = 0;
     std::string prompt;
@@ -84,12 +98,13 @@ int main(int argc, char* argv[]) try {
         lm.set_tensor("beam_idx", ov::Tensor{ov::element::i32, {1}, &global_beam_idx});
 
         const int64_t* prompt_data = input_ids.data<const int64_t>();
-        Parameters parameters{std::vector<int64_t>{prompt_data, prompt_data + input_ids.get_size()}};
+        Parameters parameters{{{prompt_data, prompt_data + input_ids.get_size()}}, SPECIAL_EOS_TOKEN};
         GroupBeamSearcher group_beam_searcher{parameters};
         std::vector<int64_t> next_tokens;
         std::vector<int32_t> next_beams;
+        lm.infer();
+
         for (size_t length_count = 0; length_count < parameters.max_new_tokens; ++length_count) {
-            lm.infer();
             std::tie(next_tokens, next_beams) = group_beam_searcher.select_next_tokens(lm.get_tensor("logits"));
             if (next_tokens.empty()) {
                 break;
@@ -105,11 +120,13 @@ int main(int argc, char* argv[]) try {
             std::fill_n(attention_mask.data<int64_t>(), ov::shape_size(mask_shape), 1);
             lm.get_tensor("position_ids").set_shape({batch_size, 1});
             std::fill_n(lm.get_tensor("position_ids").data<int64_t>(), batch_size, total_positions++);
+            lm.infer();
         }
 
         Beam answer;
-        float highest_score = std::numeric_limits<float>().min();
-        for (const std::vector<Beam>& group : finalize(std::move(group_beam_searcher))) {
+        float highest_score = std::numeric_limits<float>().lowest();
+        auto all_groups = finalize(std::move(group_beam_searcher));
+        for (const std::vector<Beam>& group : all_groups[0]) {
             for (const Beam& beam : group) {
                 if (beam.score > highest_score) {
                     highest_score = beam.score;
@@ -119,7 +136,7 @@ int main(int argc, char* argv[]) try {
         }
 
         auto answer_str = detokenize(detokenizer, answer.tokens);
-        answer_str = answer_str.substr(0, answer_str.find("<eos>"));
+        //answer_str = answer_str.substr(0, answer_str.find("<eos>"));
         std::cout << "Answer: " << answer_str << "\n_______\n";
         global_beam_idx = answer.global_beam_idx;