From 09a542608b560959edb96e628915a1d6bd780c26 Mon Sep 17 00:00:00 2001
From: Ekaterina Aidova <ekaterina.aidova@intel.com>
Date: Tue, 7 Jan 2025 11:13:35 +0400
Subject: [PATCH 01/12] [llm_bench] add support granite and granitemoe models
 (#1486)

related to https://github.com/huggingface/optimum-intel/pull/1099
added opportunity to test these models via llm_bench

Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
---
 tools/llm_bench/llm_bench_utils/config_class.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/llm_bench/llm_bench_utils/config_class.py b/tools/llm_bench/llm_bench_utils/config_class.py
index 7dd27b198b..9c149c98b6 100644
--- a/tools/llm_bench/llm_bench_utils/config_class.py
+++ b/tools/llm_bench/llm_bench_utils/config_class.py
@@ -102,7 +102,9 @@
         "olmo",
         "phi3",
         "starcoder",
-        "instruct-gpt"
+        "instruct-gpt",
+        "granite",
+        "granitemoe",
     ],
     'ldm_super_resolution': ['ldm-super-resolution'],
 }

From 9ac38f0d5c79c0864dafd8484b5a696261e3bfda Mon Sep 17 00:00:00 2001
From: Helena Kloosterman <helena.kloosterman@intel.com>
Date: Tue, 7 Jan 2025 10:01:12 +0100
Subject: [PATCH 02/12] Update VLM example code in README (#1466)

Add `pipe.start_chat()` to VLM example. Without this, inference with
several models results in empty outputs.

This can be removed if this will be the default for VLM models, but at
the moment, the most basic example should work with supported models.

Also changed printing the VLMDecodedResults to getting the generated
text and printing that (see comment from Ilya).
---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index be3de5e8ce..9d4543bed4 100644
--- a/README.md
+++ b/README.md
@@ -133,13 +133,15 @@ from PIL import Image
 
 # Choose GPU instead of CPU in the line below to run the model on Intel integrated or discrete GPU
 pipe = ov_genai.VLMPipeline("./InternVL2-1B", "CPU")
+pipe.start_chat()
 
 image = Image.open("dog.jpg")
 image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.uint8)
 image_data = ov.Tensor(image_data)  
 
 prompt = "Can you describe the image?"
-print(pipe.generate(prompt, image=image_data, max_new_tokens=100))
+result = pipe.generate(prompt, image=image_data, max_new_tokens=100)
+print(result.texts[0])
 ```
 
 ### Run generation using VLMPipeline in C++

From d7d117a4a6a47f024a07fb914d1ea3a1dd829c58 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@gmail.com>
Date: Tue, 7 Jan 2025 10:01:25 +0100
Subject: [PATCH 03/12] Fix text streaming in samples (#1487)

Fix issue https://github.com/openvinotoolkit/openvino.genai/issues/1381

Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
---
 samples/python/multinomial_causal_lm/multinomial_causal_lm.py | 2 +-
 tools/llm_bench/llm_bench_utils/ov_utils.py                   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/python/multinomial_causal_lm/multinomial_causal_lm.py b/samples/python/multinomial_causal_lm/multinomial_causal_lm.py
index 953388ed6a..5ec9d54601 100755
--- a/samples/python/multinomial_causal_lm/multinomial_causal_lm.py
+++ b/samples/python/multinomial_causal_lm/multinomial_causal_lm.py
@@ -90,7 +90,7 @@ def put(self, token_id: int) -> bool:
             word = text[self.print_len:]            
             self.tokens_cache = []
             self.print_len = 0
-        elif len(text) >= 3 and text[-3:] == chr(65533):
+        elif len(text) >= 3 and text[-1] == chr(65533):
             # Don't print incomplete text.
             pass
         elif len(text) > self.print_len:
diff --git a/tools/llm_bench/llm_bench_utils/ov_utils.py b/tools/llm_bench/llm_bench_utils/ov_utils.py
index 316c9d0b89..596da8cb3a 100644
--- a/tools/llm_bench/llm_bench_utils/ov_utils.py
+++ b/tools/llm_bench/llm_bench_utils/ov_utils.py
@@ -701,7 +701,7 @@ def put(self, token_id: int) -> bool:
                 word = text[self.print_len:]
                 self.tokens_cache = []
                 self.print_len = 0
-            elif len(text) >= 3 and text[-3:] == chr(65533):
+            elif len(text) >= 3 and text[-1] == chr(65533):
                 # Don't print incomplete text.
                 pass
             elif len(text) > self.print_len:

From 65e8362e85a887af22e105d97d2333db921a1766 Mon Sep 17 00:00:00 2001
From: Alexander Kozlov <kozzzloff@list.ru>
Date: Tue, 7 Jan 2025 12:01:45 +0300
Subject: [PATCH 04/12] Added ability to compare results vs. llama.cpp (#1461)

Example:
```bash
rm -rf results/smollm2_N_FP16/gt.csv
mkdir -p results/smollm2_N_FP16

# References from PyTorch FP16
wwb --base-model HuggingFaceTB/SmolLM2-360M-Instruct --gt-data results/smollm2_N_FP16/gt.csv --hf --num-samples 4


#huggingface-cli download "bartowski/SmolLM2-360M-Instruct-GGUF" "SmolLM2-360M-Instruct-f16.gguf"
wwb --target-model models/SmolLM2-360M-Instruct-f16.gguf --gt-data results/smollm2_N_FP16/gt.csv --llamacpp --output results/smollm2_N_L_FP16 --num-samples
```
---
 .../whowhatbench/model_loaders.py             | 20 +++++-
 .../whowhatbench/text_evaluator.py            | 27 +++++---
 tools/who_what_benchmark/whowhatbench/wwb.py  | 61 ++++++++++++++++---
 3 files changed, 88 insertions(+), 20 deletions(-)

diff --git a/tools/who_what_benchmark/whowhatbench/model_loaders.py b/tools/who_what_benchmark/whowhatbench/model_loaders.py
index 8a00c70852..c792a3c0b2 100644
--- a/tools/who_what_benchmark/whowhatbench/model_loaders.py
+++ b/tools/who_what_benchmark/whowhatbench/model_loaders.py
@@ -41,8 +41,19 @@ def load_text_genai_pipeline(model_dir, device="CPU", ov_config=None):
     return GenAIModelWrapper(openvino_genai.LLMPipeline(model_dir, device=device, **ov_config), model_dir, "text")
 
 
+def load_text_llamacpp_pipeline(model_dir):
+    try:
+        from llama_cpp import Llama
+    except ImportError:
+        logger.error(
+            "Failed to import llama_cpp package. Please install llama-cpp-python.")
+        exit(-1)
+    model = Llama(model_dir)
+    return model
+
+
 def load_text_model(
-    model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False
+    model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False, use_llamacpp=False,
 ):
     if use_hf:
         logger.info("Using HF Transformers API")
@@ -53,6 +64,9 @@ def load_text_model(
     elif use_genai:
         logger.info("Using OpenVINO GenAI API")
         model = load_text_genai_pipeline(model_id, device, ov_config)
+    elif use_llamacpp:
+        logger.info("Using llama.cpp API")
+        model = load_text_llamacpp_pipeline(model_id)
     else:
         logger.info("Using Optimum API")
         from optimum.intel.openvino import OVModelForCausalLM
@@ -276,7 +290,7 @@ def load_inpainting_model(
 
 
 def load_model(
-    model_type, model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False
+    model_type, model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False, use_llamacpp=False
 ):
     if model_id is None:
         return None
@@ -288,7 +302,7 @@ def load_model(
         ov_options = {}
 
     if model_type == "text":
-        return load_text_model(model_id, device, ov_options, use_hf, use_genai)
+        return load_text_model(model_id, device, ov_options, use_hf, use_genai, use_llamacpp)
     elif model_type == "text-to-image":
         return load_text2image_model(
             model_id, device, ov_options, use_hf, use_genai
diff --git a/tools/who_what_benchmark/whowhatbench/text_evaluator.py b/tools/who_what_benchmark/whowhatbench/text_evaluator.py
index 50ce224def..433521a186 100644
--- a/tools/who_what_benchmark/whowhatbench/text_evaluator.py
+++ b/tools/who_what_benchmark/whowhatbench/text_evaluator.py
@@ -108,6 +108,7 @@ def __init__(
         generation_config=None,
         generation_config_base=None,
         seqs_per_request=None,
+        use_chat_template=None,
     ) -> None:
         assert (
             base_model is not None or gt_data is not None
@@ -123,6 +124,7 @@ def __init__(
         self.generation_config_base = generation_config
         self.seqs_per_request = seqs_per_request
         self.generation_fn = gen_answer_fn
+        self.use_chat_template = use_chat_template
         if self.generation_config is not None:
             assert self.seqs_per_request is not None
 
@@ -202,15 +204,21 @@ def worst_examples(self, top_k: int = 5, metric="similarity"):
         return res
 
     def _generate_data(self, model, gen_answer_fn=None, generation_config=None):
-        def default_gen_answer(model, tokenizer, prompt, max_new_tokens, crop_question):
-            inputs = self.tokenizer(prompt, return_tensors="pt")
-
-            tokens = model.generate(**inputs, do_sample=False, max_new_tokens=max_new_tokens)
-
-            if crop_question:
-                tokens = tokens[:, inputs["input_ids"].shape[-1] :]
-
-            return self.tokenizer.batch_decode(tokens, skip_special_tokens=True)[0]
+        def default_gen_answer(model, tokenizer, prompt, max_new_tokens, crop_question, use_chat_template=False):
+            if use_chat_template:
+                message = [{"role": "user", "content": prompt}]
+                inputs = tokenizer.apply_chat_template(message, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+                tokens = model.generate(inputs, do_sample=False, max_new_tokens=max_new_tokens)
+                if crop_question:
+                    tokens = tokens[:, inputs.shape[-1]:]
+                res = self.tokenizer.decode(tokens[0], skip_special_tokens=True)
+                return res
+            else:
+                inputs = self.tokenizer(prompt, return_tensors="pt")
+                tokens = model.generate(**inputs, do_sample=False, max_new_tokens=max_new_tokens)
+                if crop_question:
+                    tokens = tokens[:, inputs["input_ids"].shape[-1] :]
+                return self.tokenizer.batch_decode(tokens, skip_special_tokens=True)[0]
 
         gen_answer_fn = gen_answer_fn or default_gen_answer
 
@@ -250,6 +258,7 @@ def default_gen_answer(model, tokenizer, prompt, max_new_tokens, crop_question):
                         p,
                         self.max_new_tokens,
                         self._crop_question,
+                        self.use_chat_template
                     )
                 )
         else:
diff --git a/tools/who_what_benchmark/whowhatbench/wwb.py b/tools/who_what_benchmark/whowhatbench/wwb.py
index 7acf3cf5aa..7d4354f846 100644
--- a/tools/who_what_benchmark/whowhatbench/wwb.py
+++ b/tools/who_what_benchmark/whowhatbench/wwb.py
@@ -40,6 +40,11 @@ def parse_args():
         default=None,
         help="Tokenizer for divergency metric. If not provided, it will be load from base_model or target_model.",
     )
+    parser.add_argument(
+        "--chat-template",
+        action="store_true",
+        help="Whether apply the default chat template.",
+    )
     parser.add_argument(
         "--gt-data",
         default=None,
@@ -137,6 +142,11 @@ def parse_args():
         action="store_true",
         help="Use LLMPipeline from transformers library to instantiate the model.",
     )
+    parser.add_argument(
+        "--llamacpp",
+        action="store_true",
+        help="Use llama-cpp-python to instantiate the model.",
+    )
     parser.add_argument(
         "--image-size",
         type=int,
@@ -190,9 +200,13 @@ def load_prompts(args):
 def load_tokenizer(args):
     tokenizer = None
     if args.tokenizer is not None:
-        tokenizer = AutoTokenizer.from_pretrained(
-            args.tokenizer, trust_remote_code=True
-        )
+        if args.llamacpp:
+            from llama_cpp.llama_tokenizer import LlamaHFTokenizer
+            tokenizer = LlamaHFTokenizer.from_pretrained(args.tokenizer)
+        else:
+            tokenizer = AutoTokenizer.from_pretrained(
+                args.tokenizer, trust_remote_code=True
+            )
     elif args.base_model is not None:
         tokenizer = AutoTokenizer.from_pretrained(
             args.base_model, trust_remote_code=True
@@ -246,8 +260,29 @@ def diff_strings(a: str, b: str, *, use_loguru_colors: bool = False) -> str:
     return "".join(output)
 
 
-def genai_gen_text(model, tokenizer, question, max_new_tokens, skip_question):
-    return model.generate(question, do_sample=False, max_new_tokens=max_new_tokens)
+def genai_gen_text(model, tokenizer, question, max_new_tokens, skip_question, use_chat_template=False):
+    if use_chat_template:
+        model.start_chat()
+        result = model.generate(question, do_sample=False, max_new_tokens=max_new_tokens)
+        model.finish_chat()
+        return result
+    else:
+        return model.generate(question, do_sample=False, max_new_tokens=max_new_tokens)
+
+
+def llamacpp_gen_text(model, tokenizer, question, max_new_tokens, skip_question, use_chat_template=False):
+    if use_chat_template:
+        output = model.create_chat_completion(messages=[{"role": "user", "content": question}], max_tokens=max_new_tokens, temperature=0.0)
+        text = output["choices"][0]["message"]["content"]
+        if skip_question:
+            text = text[len(question):]
+        return text
+    else:
+        output = model(question, max_tokens=max_new_tokens, echo=True, temperature=0.0)
+        text = output["choices"][0]["text"]
+        if skip_question:
+            text = text[len(question):]
+        return text
 
 
 def genai_gen_image(model, prompt, num_inference_steps, generator=None):
@@ -322,7 +357,15 @@ def create_evaluator(base_model, args):
         prompts = load_prompts(args)
 
         if task == "text":
-            tokenizer = load_tokenizer(args)
+            tokenizer = load_tokenizer(args) if not args.llamacpp else None
+
+            if args.genai:
+                gen_answer_fn = genai_gen_text
+            elif args.llamacpp:
+                gen_answer_fn = llamacpp_gen_text
+            else:
+                gen_answer_fn = None
+
             return EvaluatorCLS(
                 base_model=base_model,
                 gt_data=args.gt_data,
@@ -331,7 +374,8 @@ def create_evaluator(base_model, args):
                 similarity_model_id=args.data_encoder,
                 num_samples=args.num_samples,
                 language=args.language,
-                gen_answer_fn=genai_gen_text if args.genai else None,
+                gen_answer_fn=gen_answer_fn,
+                use_chat_template=args.chat_template,
             )
         elif task == "text-to-image":
             return EvaluatorCLS(
@@ -467,10 +511,11 @@ def main():
                 args.ov_config,
                 args.hf,
                 args.genai,
+                args.llamacpp
             )
             all_metrics_per_question, all_metrics = evaluator.score(
                 target_model,
-                evaluator.get_generation_fn() if args.genai else None,
+                evaluator.get_generation_fn() if args.genai or args.llamacpp else None,
                 output_dir=args.output
             )
         logger.info("Metrics for model: %s", args.target_model)

From db0fb9a27a18d1080bdb152c5c845e1a0a9b5941 Mon Sep 17 00:00:00 2001
From: "Anastasiya(Asya) Pronina" <anastasiya.pronina@intel.com>
Date: Tue, 7 Jan 2025 10:02:01 +0100
Subject: [PATCH 05/12] Replace 'CACHE_DIR' with 'NPUW_CACHE_DIR' in
 StatefulLLMPipeline (#1489)

Handle `CACHE_DIR` in `StatefulLLMPipeline` the same way as in
`StatelessLLMPipeline`
---
 src/cpp/src/llm_pipeline_static.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp
index 94aa6e19fe..c98b571179 100644
--- a/src/cpp/src/llm_pipeline_static.cpp
+++ b/src/cpp/src/llm_pipeline_static.cpp
@@ -739,7 +739,10 @@ std::shared_ptr<ov::CompiledModel> StatefulLLMPipeline::setupAndCompileModel(
 
     rename_key(pipeline_config, "PREFILL_CONFIG", "NPUW_LLM_PREFILL_CONFIG");
     rename_key(pipeline_config, "GENERATE_CONFIG", "NPUW_LLM_GENERATE_CONFIG");
-  
+
+    // Replace CACHE_DIR option if NPUW is enabled
+    set_npuw_cache_dir(pipeline_config);
+
     return std::make_shared<ov::CompiledModel>(genai::utils::singleton_core().compile_model(model, "NPU", pipeline_config));
 }
 

From 3e12db7a6f7e461d928abd0c8fcaca9a67db06bf Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 7 Jan 2025 18:43:59 +0400
Subject: [PATCH 06/12] Update datasets requirement from <3.2.0 to <3.3.0 in
 /tools/who_what_benchmark (#1491)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Updates the requirements on
[datasets](https://github.com/huggingface/datasets) to permit the latest
version.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/huggingface/datasets/releases">datasets's
releases</a>.</em></p>
<blockquote>
<h2>3.2.0</h2>
<h2>Dataset Features</h2>
<ul>
<li>Faster parquet streaming + filters with predicate pushdown by <a
href="https://github.com/lhoestq"><code>@​lhoestq</code></a> in <a
href="https://redirect.github.com/huggingface/datasets/pull/7309">huggingface/datasets#7309</a>
<ul>
<li>Up to +100% streaming speed</li>
<li>Fast filtering via predicate pushdown (skip files/row groups based
on predicate instead of downloading the full data), e.g.
<pre lang="python"><code>from datasets import load_dataset
filters = [('date', '&gt;=', '2023')]
ds = load_dataset(&quot;HuggingFaceFW/fineweb-2&quot;,
&quot;fra_Latn&quot;, streaming=True, filters=filters)
</code></pre>
</li>
</ul>
</li>
</ul>
<h2>Other improvements and bug fixes</h2>
<ul>
<li>fix conda release worlflow by <a
href="https://github.com/lhoestq"><code>@​lhoestq</code></a> in <a
href="https://redirect.github.com/huggingface/datasets/pull/7272">huggingface/datasets#7272</a></li>
<li>Add link to video dataset by <a
href="https://github.com/NielsRogge"><code>@​NielsRogge</code></a> in <a
href="https://redirect.github.com/huggingface/datasets/pull/7277">huggingface/datasets#7277</a></li>
<li>Raise error for incorrect JSON serialization by <a
href="https://github.com/varadhbhatnagar"><code>@​varadhbhatnagar</code></a>
in <a
href="https://redirect.github.com/huggingface/datasets/pull/7273">huggingface/datasets#7273</a></li>
<li>support for custom feature encoding/decoding by <a
href="https://github.com/alex-hh"><code>@​alex-hh</code></a> in <a
href="https://redirect.github.com/huggingface/datasets/pull/7284">huggingface/datasets#7284</a></li>
<li>update load_dataset doctring by <a
href="https://github.com/lhoestq"><code>@​lhoestq</code></a> in <a
href="https://redirect.github.com/huggingface/datasets/pull/7301">huggingface/datasets#7301</a></li>
<li>Let server decide default repo visibility by <a
href="https://github.com/Wauplin"><code>@​Wauplin</code></a> in <a
href="https://redirect.github.com/huggingface/datasets/pull/7302">huggingface/datasets#7302</a></li>
<li>fix: update elasticsearch version by <a
href="https://github.com/ruidazeng"><code>@​ruidazeng</code></a> in <a
href="https://redirect.github.com/huggingface/datasets/pull/7300">huggingface/datasets#7300</a></li>
<li>Fix typing in iterable_dataset.py by <a
href="https://github.com/lhoestq"><code>@​lhoestq</code></a> in <a
href="https://redirect.github.com/huggingface/datasets/pull/7304">huggingface/datasets#7304</a></li>
<li>Updated inconsistent output in documentation examples for
<code>ClassLabel</code> by <a
href="https://github.com/sergiopaniego"><code>@​sergiopaniego</code></a>
in <a
href="https://redirect.github.com/huggingface/datasets/pull/7293">huggingface/datasets#7293</a></li>
<li>More docs to from_dict to mention that the result lives in RAM by <a
href="https://github.com/lhoestq"><code>@​lhoestq</code></a> in <a
href="https://redirect.github.com/huggingface/datasets/pull/7316">huggingface/datasets#7316</a></li>
<li>Release: 3.2.0 by <a
href="https://github.com/lhoestq"><code>@​lhoestq</code></a> in <a
href="https://redirect.github.com/huggingface/datasets/pull/7317">huggingface/datasets#7317</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/ruidazeng"><code>@​ruidazeng</code></a>
made their first contribution in <a
href="https://redirect.github.com/huggingface/datasets/pull/7300">huggingface/datasets#7300</a></li>
<li><a
href="https://github.com/sergiopaniego"><code>@​sergiopaniego</code></a>
made their first contribution in <a
href="https://redirect.github.com/huggingface/datasets/pull/7293">huggingface/datasets#7293</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/huggingface/datasets/compare/3.1.0...3.2.0">https://github.com/huggingface/datasets/compare/3.1.0...3.2.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/huggingface/datasets/commit/fba47587a4f1771ad9f934a5e66f0860231405cd"><code>fba4758</code></a>
Release: 3.2.0 (<a
href="https://redirect.github.com/huggingface/datasets/issues/7317">#7317</a>)</li>
<li><a
href="https://github.com/huggingface/datasets/commit/8983782529101c1d9b526716dc68fbf3103a4f0e"><code>8983782</code></a>
More docs to from_dict to mention that the result lives in RAM (<a
href="https://redirect.github.com/huggingface/datasets/issues/7316">#7316</a>)</li>
<li><a
href="https://github.com/huggingface/datasets/commit/661d7bac29689e2d9eb74fba3d243939d6e9f25b"><code>661d7ba</code></a>
Faster parquet streaming + filters with predicate pushdown (<a
href="https://redirect.github.com/huggingface/datasets/issues/7309">#7309</a>)</li>
<li><a
href="https://github.com/huggingface/datasets/commit/b60ebb83cd668decaa21df66148beb44bce57739"><code>b60ebb8</code></a>
Updated inconsistent output in documentation examples for
<code>ClassLabel</code> (<a
href="https://redirect.github.com/huggingface/datasets/issues/7293">#7293</a>)</li>
<li><a
href="https://github.com/huggingface/datasets/commit/c9d3450d4a87de629f4b535650dceed74b499736"><code>c9d3450</code></a>
Update iterable_dataset.py (<a
href="https://redirect.github.com/huggingface/datasets/issues/7304">#7304</a>)</li>
<li><a
href="https://github.com/huggingface/datasets/commit/38d648e23cceb6b898813828b4b5370afea64119"><code>38d648e</code></a>
fix: update elasticsearch version (<a
href="https://redirect.github.com/huggingface/datasets/issues/7300">#7300</a>)</li>
<li><a
href="https://github.com/huggingface/datasets/commit/c8252f279934675c5ad624774c0d9c7af49225dc"><code>c8252f2</code></a>
Let server decide default repo visibility (<a
href="https://redirect.github.com/huggingface/datasets/issues/7302">#7302</a>)</li>
<li><a
href="https://github.com/huggingface/datasets/commit/06c3235a640d00bf59223ebabf3cb489a2891767"><code>06c3235</code></a>
update load_dataset doctring (<a
href="https://redirect.github.com/huggingface/datasets/issues/7301">#7301</a>)</li>
<li><a
href="https://github.com/huggingface/datasets/commit/17f17b3fe7f276e1b019cca8aa651bf7c818a928"><code>17f17b3</code></a>
support for custom feature encoding/decoding (<a
href="https://redirect.github.com/huggingface/datasets/issues/7284">#7284</a>)</li>
<li><a
href="https://github.com/huggingface/datasets/commit/2049c00921c59cdeb835137a1c49639cf175af07"><code>2049c00</code></a>
Raise error for incorrect JSON serialization (<a
href="https://redirect.github.com/huggingface/datasets/issues/7273">#7273</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/huggingface/datasets/compare/0.0.2...3.2.0">compare
view</a></li>
</ul>
</details>
<br />


Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 tools/who_what_benchmark/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/who_what_benchmark/requirements.txt b/tools/who_what_benchmark/requirements.txt
index d4b702de78..ab4192d56c 100644
--- a/tools/who_what_benchmark/requirements.txt
+++ b/tools/who_what_benchmark/requirements.txt
@@ -7,4 +7,4 @@ pandas>=2.0.3
 numpy>=1.23.5
 tqdm>=4.66.1
 diffusers
-datasets<3.2.0
+datasets<3.3.0

From 74fd08fa19e2cf7ffe0eaecb3f539f3f737ee002 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Tue, 7 Jan 2025 22:16:50 +0400
Subject: [PATCH 07/12] Revert "Update datasets requirement from <3.2.0 to
 <3.3.0 in /tools/who_what_benchmark" (#1495)

Reverts openvinotoolkit/openvino.genai#1491
---
 tools/who_what_benchmark/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/who_what_benchmark/requirements.txt b/tools/who_what_benchmark/requirements.txt
index ab4192d56c..d4b702de78 100644
--- a/tools/who_what_benchmark/requirements.txt
+++ b/tools/who_what_benchmark/requirements.txt
@@ -7,4 +7,4 @@ pandas>=2.0.3
 numpy>=1.23.5
 tqdm>=4.66.1
 diffusers
-datasets<3.3.0
+datasets<3.2.0

From d48326b0ecdefb5dd2a758a3536c4e7011c82934 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Wed, 8 Jan 2025 02:04:44 +0400
Subject: [PATCH 08/12] Enable ov_add_api_validator_post_build_step (#1402)

---
 src/cpp/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt
index 24367c17ce..ff804cd85a 100644
--- a/src/cpp/CMakeLists.txt
+++ b/src/cpp/CMakeLists.txt
@@ -101,7 +101,7 @@ endif()
 
 if(OpenVINODeveloperPackage_FOUND)
     # must be called after all target_link_libraries
-    # ov_add_api_validator_post_build_step(TARGET ${TARGET_NAME})
+    ov_add_api_validator_post_build_step(TARGET ${TARGET_NAME})
 
     ov_ncc_naming_style(FOR_TARGET ${TARGET_NAME}
                         SOURCE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include")

From cdf8118377b6654daeedf1634d6d157ac7668767 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Wed, 8 Jan 2025 08:06:57 +0400
Subject: [PATCH 09/12] [CB] Fix key cache shape for GPU (#1497)

Regression after
https://github.com/openvinotoolkit/openvino.genai/pull/1416

CVS-160158
---
 src/cpp/src/device_config.hpp | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/cpp/src/device_config.hpp b/src/cpp/src/device_config.hpp
index cc2e21b9a1..fee6c7abd1 100644
--- a/src/cpp/src/device_config.hpp
+++ b/src/cpp/src/device_config.hpp
@@ -117,22 +117,22 @@ class DeviceConfig {
         }
 
         for (size_t layer_id = 0; layer_id < m_num_decoder_layers; layer_id++) {
-            m_key_cache_shape.push_back(ov::PartialShape{ov::Dimension::dynamic(),
-                                                         ov::Dimension(m_num_kv_heads[layer_id]),
-                                                         ov::Dimension(m_block_size),
-                                                         ov::Dimension(m_head_size)});
-
             m_value_cache_shape.push_back(ov::PartialShape{ov::Dimension::dynamic(),
                                                            ov::Dimension(m_num_kv_heads[layer_id]),
                                                            ov::Dimension(m_block_size),
                                                            ov::Dimension(m_head_size)});
 
-            if (m_device.find("GPU") != std::string::npos) {
+            if (m_device.find("GPU") == std::string::npos) {
+                m_key_cache_shape.push_back(ov::PartialShape{ov::Dimension::dynamic(),
+                                                             ov::Dimension(m_num_kv_heads[layer_id]),
+                                                             ov::Dimension(m_block_size),
+                                                             ov::Dimension(m_head_size)});
+            } else  if (m_device.find("GPU") != std::string::npos) {
                 // Update key shape, as the key's shape is different from the value's shape
                 m_key_cache_shape.push_back(ov::PartialShape{ov::Dimension::dynamic(),
-                                                     ov::Dimension(m_num_kv_heads[layer_id]),
-                                                     ov::Dimension(m_head_size),
-                                                     ov::Dimension(m_block_size)});
+                                                             ov::Dimension(m_num_kv_heads[layer_id]),
+                                                             ov::Dimension(m_head_size),
+                                                             ov::Dimension(m_block_size)});
             }
         }
     }

From fb16a71b3c5d8736d75f4201e33d398e967fa152 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Wed, 8 Jan 2025 14:47:39 +0400
Subject: [PATCH 10/12] Finally drop old LLM bench folder (#1498)

---
 llm_bench/python/README.md                    | 4 ----
 llm_bench/python/who_what_benchmark/README.md | 4 ----
 2 files changed, 8 deletions(-)
 delete mode 100644 llm_bench/python/README.md
 delete mode 100644 llm_bench/python/who_what_benchmark/README.md

diff --git a/llm_bench/python/README.md b/llm_bench/python/README.md
deleted file mode 100644
index 272ed11d1b..0000000000
--- a/llm_bench/python/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Benchmarking Script for Large Language Models
-
-> [!IMPORTANT]  
-> LLM bench code was moved to [tools](../../tools/llm_bench/) directory. Please navigate to the new directory for continue of tool usage.
\ No newline at end of file
diff --git a/llm_bench/python/who_what_benchmark/README.md b/llm_bench/python/who_what_benchmark/README.md
deleted file mode 100644
index 414b4d9342..0000000000
--- a/llm_bench/python/who_what_benchmark/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Simple Accuracy Benchmark for Generative AI models
-
-> [!IMPORTANT]  
-> Who What Benchmark code was moved to [tools](../../../tools/who_what_benchmark/) directory. Please navigate to the new directory for continue of tool usage.
\ No newline at end of file

From 5ab58ca70dd2774595ad82768074c7a497aa9377 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Wed, 8 Jan 2025 14:49:25 +0400
Subject: [PATCH 11/12] Add complete version information (#1500)

CVS-160212
---
 .github/workflows/genai-tools.yml             |  2 +-
 .github/workflows/linux.yml                   |  2 +-
 .../workflows/stable_diffusion_1_5_cpp.yml    |  4 +-
 CMakeLists.txt                                |  1 +
 cmake/templates/__version__.py.in             |  5 --
 cmake/templates/version.cpp.in                | 19 +++++
 cmake/templates/version.hpp.in                | 34 +++++++++
 cmake/version.cmake                           | 72 +++++++++++++++++++
 src/cpp/CMakeLists.txt                        | 16 ++++-
 src/python/CMakeLists.txt                     | 16 ++---
 src/python/clean_version.cmake                | 21 ++++++
 src/python/openvino_genai/__init__.py         |  5 +-
 src/python/openvino_genai/__init__.pyi        |  5 +-
 .../openvino_genai/py_openvino_genai.pyi      |  6 +-
 src/python/py_openvino_genai.cpp              |  7 ++
 15 files changed, 190 insertions(+), 25 deletions(-)
 delete mode 100644 cmake/templates/__version__.py.in
 create mode 100644 cmake/templates/version.cpp.in
 create mode 100644 cmake/templates/version.hpp.in
 create mode 100644 cmake/version.cmake
 create mode 100644 src/python/clean_version.cmake

diff --git a/.github/workflows/genai-tools.yml b/.github/workflows/genai-tools.yml
index 333bee3e11..bd6cb46362 100644
--- a/.github/workflows/genai-tools.yml
+++ b/.github/workflows/genai-tools.yml
@@ -44,7 +44,7 @@ jobs:
       with:
         platform: ubuntu22
         commit_packages_to_provide: wheels
-        revision: latest_available_commit
+        revision: 345163f87953fb0dd8dd590257eb7fc84378da8e
 
   llm_bench:
     name: 'LLM bench tests'
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index 0a991e2a54..0d7a5b7bae 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -52,7 +52,7 @@ jobs:
       with:
         platform: ubuntu22
         commit_packages_to_provide: wheels
-        revision: latest_available_commit
+        revision: 345163f87953fb0dd8dd590257eb7fc84378da8e
 
     - name: Clone docker tag from OpenVINO repo
       uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
index e0bf5371b3..3b01697f26 100644
--- a/.github/workflows/stable_diffusion_1_5_cpp.yml
+++ b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -45,7 +45,7 @@ jobs:
       with:
         platform: ubuntu22
         commit_packages_to_provide: wheels
-        revision: latest_available_commit
+        revision: 345163f87953fb0dd8dd590257eb7fc84378da8e
 
   openvino_download_windows:
     name: Download OpenVINO for Windows
@@ -71,7 +71,7 @@ jobs:
       with:
         platform: windows
         commit_packages_to_provide: wheels
-        revision: latest_available_commit
+        revision: 345163f87953fb0dd8dd590257eb7fc84378da8e
 
   stable_diffusion_1_5_cpp-linux:
     runs-on: ubuntu-22.04-8-cores
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 181132e210..3a67a24bab 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -60,6 +60,7 @@ if(NOT OpenVINODeveloperPackage_FOUND)
 endif()
 
 include(cmake/features.cmake)
+include(cmake/version.cmake)
 
 if(ENABLE_PYTHON)
     # the following two calls are required for cross-compilation
diff --git a/cmake/templates/__version__.py.in b/cmake/templates/__version__.py.in
deleted file mode 100644
index ce8e01a246..0000000000
--- a/cmake/templates/__version__.py.in
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-# Will be overwritten by cmake.
-__version__ = "@OpenVINOGenAI_VERSION@"
diff --git a/cmake/templates/version.cpp.in b/cmake/templates/version.cpp.in
new file mode 100644
index 0000000000..f6015832f9
--- /dev/null
+++ b/cmake/templates/version.cpp.in
@@ -0,0 +1,19 @@
+// Copyright (C) 2023-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "openvino/genai/version.hpp"
+
+namespace ov {
+namespace genai {
+
+const Version get_version() {
+    const static Version version = {
+        "@OpenVINOGenAI_FULL_VERSION@",
+        "OpenVINO GenAI version",
+    };
+
+    return version;
+}
+
+} // namespace genai
+} // namespace ov
diff --git a/cmake/templates/version.hpp.in b/cmake/templates/version.hpp.in
new file mode 100644
index 0000000000..34120ef632
--- /dev/null
+++ b/cmake/templates/version.hpp.in
@@ -0,0 +1,34 @@
+// Copyright (C) 2023-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "openvino/core/version.hpp"
+#include "openvino/genai/visibility.hpp"
+
+/**
+ * OpenVINO GenAI major version
+ */
+#define OPENVINO_GENAI_VERSION_MAJOR @OpenVINOGenAI_VERSION_MAJOR@
+
+/**
+ * OpenVINO GenAI minor version
+ */
+#define OPENVINO_GENAI_VERSION_MINOR @OpenVINOGenAI_VERSION_MINOR@
+
+/**
+ * OpenVINO GenAI patch version
+ */
+#define OPENVINO_GENAI_VERSION_PATCH @OpenVINOGenAI_VERSION_PATCH@
+
+namespace ov {
+namespace genai {
+
+/**
+ * Returns OpenVINO GenAI full version including git commit and hash information in form of:
+ *   <MAJOR>.<MINOR>.<PATCH>.<REVISION>-<COMMIT NUMBER>-<COMMIT HASH>[-<BRANCH SUFFIX>]
+ */
+OPENVINO_EXTERN_C OPENVINO_GENAI_EXPORTS const ov::Version OPENVINO_CDECL get_version();
+
+} // namespace genai
+} // namespace ov
diff --git a/cmake/version.cmake b/cmake/version.cmake
new file mode 100644
index 0000000000..b9b51e8fe2
--- /dev/null
+++ b/cmake/version.cmake
@@ -0,0 +1,72 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+find_package(Git QUIET)
+
+function(ov_genai_branch_name VAR)
+    if(GIT_FOUND)
+        execute_process(
+                COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref HEAD
+                WORKING_DIRECTORY ${OpenVINOGenAI_SOURCE_DIR}
+                OUTPUT_VARIABLE GIT_BRANCH
+                RESULT_VARIABLE EXIT_CODE
+                OUTPUT_STRIP_TRAILING_WHITESPACE)
+        if(EXIT_CODE EQUAL 0)
+            set(${VAR} ${GIT_BRANCH} PARENT_SCOPE)
+        endif()
+    endif()
+endfunction()
+
+function(ov_genai_commit_hash VAR)
+    if(GIT_FOUND)
+        execute_process(
+                COMMAND ${GIT_EXECUTABLE} rev-parse --short=11 HEAD
+                WORKING_DIRECTORY ${OpenVINOGenAI_SOURCE_DIR}
+                OUTPUT_VARIABLE GIT_COMMIT_HASH
+                RESULT_VARIABLE EXIT_CODE
+                OUTPUT_STRIP_TRAILING_WHITESPACE)
+        if(EXIT_CODE EQUAL 0)
+            set(${VAR} ${GIT_COMMIT_HASH} PARENT_SCOPE)
+        endif()
+    endif()
+endfunction()
+
+function(ov_genai_commit_number VAR)
+    set(GIT_COMMIT_NUMBER_FOUND OFF)
+    if(GIT_FOUND)
+        execute_process(
+                COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD
+                WORKING_DIRECTORY ${OpenVINOGenAI_SOURCE_DIR}
+                OUTPUT_VARIABLE GIT_COMMIT_NUMBER
+                RESULT_VARIABLE EXIT_CODE
+                OUTPUT_STRIP_TRAILING_WHITESPACE)
+        if(EXIT_CODE EQUAL 0)
+            set(GIT_COMMIT_NUMBER_FOUND ON)
+            set(${VAR} ${GIT_COMMIT_NUMBER} PARENT_SCOPE)
+        endif()
+    endif()
+    if(NOT GIT_COMMIT_NUMBER_FOUND)
+        # set zeros since git is not available
+        set(${VAR} "000" PARENT_SCOPE)
+    endif()
+endfunction()
+
+function(ov_genai_full_version full_version)
+    if(GIT_FOUND)
+        ov_genai_branch_name(GIT_BRANCH)
+        ov_genai_commit_hash(GIT_COMMIT_HASH)
+        ov_genai_commit_number(GIT_COMMIT_NUMBER)
+
+        if(NOT GIT_BRANCH MATCHES "^(master|HEAD)$")
+            set(GIT_BRANCH_POSTFIX "-${GIT_BRANCH}")
+        endif()
+
+        set(${full_version} "${OpenVINOGenAI_VERSION}-${GIT_COMMIT_NUMBER}-${GIT_COMMIT_HASH}${GIT_BRANCH_POSTFIX}" PARENT_SCOPE)
+    else()
+        set(${full_version} "${OpenVINOGenAI_VERSION}" PARENT_SCOPE)
+    endif()
+endfunction()
+
+ov_genai_full_version(OpenVINOGenAI_FULL_VERSION)
+message(STATUS "OpenVINO GenAI full version: ${OpenVINOGenAI_FULL_VERSION}")
diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt
index ff804cd85a..e954037daf 100644
--- a/src/cpp/CMakeLists.txt
+++ b/src/cpp/CMakeLists.txt
@@ -54,9 +54,18 @@ FetchContent_MakeAvailable(safetensors.h)
 
 ov_genai_build_jinja2cpp()
 
+# generate version files
+
+configure_file("${OpenVINOGenAI_SOURCE_DIR}/cmake/templates/version.hpp.in"
+               "${CMAKE_CURRENT_BINARY_DIR}/openvino/genai/version.hpp" @ONLY)
+
+configure_file("${OpenVINOGenAI_SOURCE_DIR}/cmake/templates/version.cpp.in"
+               "${CMAKE_CURRENT_BINARY_DIR}/version.cpp" @ONLY)
+
 # Library
 
 file(GLOB_RECURSE SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/*.c")
+list(APPEND SOURCE_FILES "${CMAKE_CURRENT_BINARY_DIR}/version.cpp")
 
 set(TARGET_NAME openvino_genai)
 
@@ -68,7 +77,9 @@ if(TARGET openvino_tokenizers)
 endif()
 
 target_include_directories(${TARGET_NAME}
-    PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>" "$<INSTALL_INTERFACE:runtime/include>"
+    PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
+           "$<INSTALL_INTERFACE:runtime/include>"
+           "$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>"
     PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src")
 
 target_include_directories(${TARGET_NAME} SYSTEM PRIVATE "${safetensors.h_SOURCE_DIR}")
@@ -145,6 +156,9 @@ install(TARGETS ${TARGET_NAME} EXPORT OpenVINOGenAITargets
 
 install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/
         DESTINATION runtime/include COMPONENT core_genai_dev)
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/openvino/genai/version.hpp
+        DESTINATION runtime/include/openvino/genai COMPONENT core_genai_dev)
+
 install(EXPORT OpenVINOGenAITargets FILE OpenVINOGenAITargets.cmake
         NAMESPACE openvino:: DESTINATION runtime/cmake
         COMPONENT core_genai_dev)
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index 75a2fd59a7..1293246260 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -34,9 +34,6 @@ file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/__init__.py"
           "${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/py_openvino_genai.pyi"
      DESTINATION "${CMAKE_BINARY_DIR}/openvino_genai/")
 
-configure_file("${OpenVINOGenAI_SOURCE_DIR}/cmake/templates/__version__.py.in"
-               "${CMAKE_BINARY_DIR}/openvino_genai/__version__.py" @ONLY)
-
 if(OpenVINODeveloperPackage_FOUND)
     # TODO: commit changes separately
     # ov_add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})
@@ -69,18 +66,12 @@ endif()
 install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/__init__.py"
               "${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/__init__.pyi"
               "${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/py_openvino_genai.pyi"
-              "${CMAKE_BINARY_DIR}/openvino_genai/__version__.py"
         DESTINATION python/openvino_genai
         COMPONENT pygenai_${Python3_VERSION_MAJOR}_${Python3_VERSION_MINOR})
 install(TARGETS ${TARGET_NAME}
         LIBRARY DESTINATION python/openvino_genai
         COMPONENT pygenai_${Python3_VERSION_MAJOR}_${Python3_VERSION_MINOR})
 
-install(FILES "${CMAKE_BINARY_DIR}/openvino_genai/__version__.py"
-        DESTINATION openvino_genai
-        COMPONENT wheel_genai
-        EXCLUDE_FROM_ALL)
-
 install(FILES "${OpenVINOGenAI_SOURCE_DIR}/LICENSE"
               "${OpenVINOGenAI_SOURCE_DIR}/third-party-programs.txt"
               "${OpenVINOGenAI_SOURCE_DIR}/SECURITY.md"
@@ -154,7 +145,8 @@ if(pybind11_stubgen_AVAILABLE)
     endif()
 
     set(stub_files_location "${OpenVINOGenAI_BINARY_DIR}/src/python")
-    set(generated_files ${stub_files_location}/openvino_genai/__init__.pyi
+    set(init_pyi_file "${stub_files_location}/openvino_genai/__init__.pyi")
+    set(generated_files ${init_pyi_file}
                         ${stub_files_location}/openvino_genai/py_openvino_genai.pyi)
     set_source_files_properties(${generated_files} PROPERTIES GENERATED ON)
 
@@ -184,6 +176,9 @@ if(pybind11_stubgen_AVAILABLE)
                                             "${CMAKE_BINARY_DIR}/openvino_genai/py_openvino_genai.pyi"
         COMMAND "${CMAKE_COMMAND}" -E env PYTHONPATH=${CMAKE_BINARY_DIR}:${openvino_pythonpath}:$ENV{PYTHONPATH}
                 ${pybind11_stubgen} --output-dir ${stub_files_location} openvino_genai
+        COMMAND "${CMAKE_COMMAND}"
+                -D init_pyi_file=${init_pyi_file}
+                -P "${CMAKE_CURRENT_SOURCE_DIR}/clean_version.cmake"
         ${validation_command}
         ${copy_to_source_command}
         COMMAND "${CMAKE_COMMAND}" -E copy ${generated_files} "${CMAKE_BINARY_DIR}/openvino_genai/"
@@ -192,6 +187,7 @@ if(pybind11_stubgen_AVAILABLE)
             ${python_sources}
             ${validation_dependencies}
             "${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/__init__.py"
+            "${CMAKE_CURRENT_SOURCE_DIR}/clean_version.cmake"
             "${CMAKE_CURRENT_SOURCE_DIR}/compare_pyi.cmake"
         COMMENT "[${pybind11_stubgen_dep}] Generate .pyi files"
         VERBATIM)
diff --git a/src/python/clean_version.cmake b/src/python/clean_version.cmake
new file mode 100644
index 0000000000..f02e293493
--- /dev/null
+++ b/src/python/clean_version.cmake
@@ -0,0 +1,21 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+foreach(var IN ITEMS init_pyi_file)
+    if(NOT DEFINED ${var})
+        message(FATAL_ERROR "Variable ${var} is not defined")
+    endif()
+endforeach()
+
+file(STRINGS ${init_pyi_file} file_lines)
+
+foreach(file_line IN LISTS file_lines)
+    if(file_line MATCHES "^__version__.*")
+        set(file_line "__version__: str")
+    endif()
+
+    set(file_content "${file_content}${file_line}\n")
+endforeach()
+
+file(WRITE ${init_pyi_file} ${file_content})
diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py
index a0b0faf58c..0ad7ba3f12 100644
--- a/src/python/openvino_genai/__init__.py
+++ b/src/python/openvino_genai/__init__.py
@@ -5,8 +5,6 @@
 
 import openvino  # add_dll_directory for openvino lib
 import os
-from .__version__ import __version__
-
 
 if hasattr(os, "add_dll_directory"):
     os.add_dll_directory(os.path.dirname(__file__))
@@ -17,8 +15,11 @@
     RawPerfMetrics,
     PerfMetrics,
     StreamerBase,
+    get_version,
 )
 
+__version__ = get_version()
+
 # VLM pipeline
 
 from .py_openvino_genai import (
diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi
index 187e0a0a06..0a401ae958 100644
--- a/src/python/openvino_genai/__init__.pyi
+++ b/src/python/openvino_genai/__init__.pyi
@@ -42,7 +42,8 @@ from openvino_genai.py_openvino_genai import WhisperPerfMetrics
 from openvino_genai.py_openvino_genai import WhisperPipeline
 from openvino_genai.py_openvino_genai import WhisperRawPerfMetrics
 from openvino_genai.py_openvino_genai import draft_model
+from openvino_genai.py_openvino_genai import get_version
 import os as os
 from . import py_openvino_genai
-__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'openvino', 'os', 'py_openvino_genai']
-__version__: str = '2025.0.0.0'
+__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
+__version__: str
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index d405cd9bbf..5adde32db4 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -5,7 +5,7 @@ from __future__ import annotations
 import openvino._pyopenvino
 import os
 import typing
-__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model']
+__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
 class Adapter:
     """
     Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier.
@@ -2204,3 +2204,7 @@ def draft_model(models_path: os.PathLike, device: str = '', **kwargs) -> openvin
     """
     device on which inference will be performed
     """
+def get_version() -> str:
+    """
+    OpenVINO GenAI version
+    """
diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp
index 429f48f30d..f8e577d5c8 100644
--- a/src/python/py_openvino_genai.cpp
+++ b/src/python/py_openvino_genai.cpp
@@ -11,6 +11,7 @@
 #include <pybind11/typing.h>
 
 #include "openvino/genai/llm_pipeline.hpp"
+#include "openvino/genai/version.hpp"
 
 #include "py_utils.hpp"
 
@@ -21,6 +22,7 @@ using ov::genai::DecodedResults;
 using ov::genai::EncodedResults;
 using ov::genai::StreamerBase;
 using ov::genai::StringInputs;
+using ov::genai::get_version;
 
 void init_lora_adapter(py::module_& m);
 void init_perf_metrics(py::module_& m);
@@ -82,7 +84,12 @@ class ConstructableStreamer: public StreamerBase {
 PYBIND11_MODULE(py_openvino_genai, m) {
     m.doc() = "Pybind11 binding for OpenVINO GenAI library";
 
+    m.def("get_version", [] () -> py::str {
+        return get_version().buildNumber;
+    }, get_version().description);
+
     init_perf_metrics(m);
+
     py::class_<DecodedResults>(m, "DecodedResults", decoded_results_docstring)
         .def(py::init<>())
         .def_property_readonly("texts", [](const DecodedResults &dr) -> py::typing::List<py::str> { return pyutils::handle_utf8((std::vector<std::string>)dr); })

From 3e5c8895650c64d73a9b15f5597c09f1a6b78fd3 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Wed, 8 Jan 2025 18:09:14 +0400
Subject: [PATCH 12/12] Added information about LoRA support (#1504)

---
 .github/workflows/mac.yml                     |  2 +-
 .github/workflows/windows.yml                 |  2 +-
 README.md                                     |  2 +-
 ...SUPPORTED_MODELS.md => SUPPORTED_MODELS.md | 29 ++++++++++++++++++-
 samples/cpp/visual_language_chat/README.md    |  2 +-
 .../cpp/whisper_speech_recognition/README.md  |  2 +-
 .../whisper_speech_recognition/README.md      |  2 +-
 7 files changed, 34 insertions(+), 7 deletions(-)
 rename src/docs/SUPPORTED_MODELS.md => SUPPORTED_MODELS.md (95%)

diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
index 5402b79e70..062b83fc27 100644
--- a/.github/workflows/mac.yml
+++ b/.github/workflows/mac.yml
@@ -17,7 +17,7 @@ concurrency:
 
 env:
   PYTHON_VERSION: '3.10'
-  OV_BRANCH: master
+  OV_BRANCH: 345163f87953fb0dd8dd590257eb7fc84378da8e
   OV_TARBALL: ''
 
 jobs:
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index e396671b2c..95a713d7a1 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -17,7 +17,7 @@ concurrency:
 
 env:
   PYTHON_VERSION: '3.11'
-  OV_BRANCH: master
+  OV_BRANCH: 345163f87953fb0dd8dd590257eb7fc84378da8e
   OV_TARBALL: ''
 
 jobs:
diff --git a/README.md b/README.md
index 9d4543bed4..c5cf799973 100644
--- a/README.md
+++ b/README.md
@@ -394,7 +394,7 @@ See [here](https://openvinotoolkit.github.io/openvino_notebooks/?search=Automati
 
 ## Additional materials
 
-- [List of supported models](https://github.com/openvinotoolkit/openvino.genai/blob/master/src/docs/SUPPORTED_MODELS.md) (NOTE: models can work, but were not tried yet)
+- [List of supported models](https://github.com/openvinotoolkit/openvino.genai/blob/master/SUPPORTED_MODELS.md) (NOTE: models can work, but were not tried yet)
 - [OpenVINO Generative AI workflow](https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html)
 - [Optimum-intel and OpenVINO](https://huggingface.co/docs/optimum/intel/openvino/export)
 
diff --git a/src/docs/SUPPORTED_MODELS.md b/SUPPORTED_MODELS.md
similarity index 95%
rename from src/docs/SUPPORTED_MODELS.md
rename to SUPPORTED_MODELS.md
index 44da29ced4..6b45f47890 100644
--- a/src/docs/SUPPORTED_MODELS.md
+++ b/SUPPORTED_MODELS.md
@@ -147,6 +147,8 @@
   </tbody>
 </table>
 
+> [!NOTE]
+> LoRA adapters are supported.
 
 The pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature. The model is required to have the following inputs after the conversion:
 1. `input_ids` contains the tokens.
@@ -165,12 +167,14 @@ The pipeline can work with other similar topologies produced by `optimum-intel`
       <th>Architecture</th>
       <th>Text 2 image</th>
       <th>Image 2 image</th>
+      <th>LoRA support</th>
       <th>Example HuggingFace Models</th>
     </tr>
     <tr>
       <td><code>Latent Consistency Model</code></td>
       <td>Supported</td>
       <td>Supported</td>
+      <td>Supported</td>
       <td>
         <ul>
           <li><a href="https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7"><code>SimianLuo/LCM_Dreamshaper_v7</code></a></li>
@@ -181,6 +185,7 @@ The pipeline can work with other similar topologies produced by `optimum-intel`
       <td><code>Stable Diffusion</code></td>
       <td>Supported</td>
       <td>Supported</td>
+      <td>Supported</td>
       <td>
         <ul>
           <li><a href="https://huggingface.co/CompVis/stable-diffusion-v1-1"><code>CompVis/stable-diffusion-v1-1</code></a></li>
@@ -213,6 +218,7 @@ The pipeline can work with other similar topologies produced by `optimum-intel`
       <td><code>Stable Diffusion XL</code></td>
       <td>Supported</td>
       <td>Supported</td>
+      <td>Supported</td>
       <td>
         <ul>
           <li><a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-0.9"><code>stabilityai/stable-diffusion-xl-base-0.9</code></a></li>
@@ -225,6 +231,7 @@ The pipeline can work with other similar topologies produced by `optimum-intel`
       <td><code>Stable Diffusion 3</code></td>
       <td>Supported</td>
       <td>Not supported</td>
+      <td>Not supported</td>
       <td>
         <ul>
           <li><a href="https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers"><code>stabilityai/stable-diffusion-3-medium-diffusers</code></a></li>
@@ -237,6 +244,7 @@ The pipeline can work with other similar topologies produced by `optimum-intel`
       <td><code>Flux</code></td>
       <td>Supported</td>
       <td>Not supported</td>
+      <td>Not supported</td>
       <td>
         <ul>
           <li><a href="https://huggingface.co/black-forest-labs/FLUX.1-schnell"><code>black-forest-labs/FLUX.1-schnell</code></a></li>
@@ -260,10 +268,12 @@ In addition to image generation models, `InpaintingPipeline` supports specialize
   <tbody style="vertical-align: top;">
     <tr>
       <th>Architecture</th>
+      <th>LoRA support</th>
       <th>Example HuggingFace Models</th>
     </tr>
     <tr>
       <td><code>Stable Diffusion</code></td>
+      <td>Supported</td>
       <td>
         <ul>
           <li><a href="https://huggingface.co/stabilityai/stable-diffusion-2-inpainting"><code>stabilityai/stable-diffusion-2-inpainting</code></a></li>
@@ -275,13 +285,22 @@ In addition to image generation models, `InpaintingPipeline` supports specialize
     </tr>
     <tr>
       <td><code>Stable Diffusion XL</code></td>
+      <td>Supported</td>
       <td>
         <ul>
           <li><a href="https://huggingface.co/diffusers/stable-diffusion-xl-1.0-inpainting-0.1"><code>diffusers/stable-diffusion-xl-1.0-inpainting-0.1</code></a></li>
         </ul>
       </td>
     </tr>
-    </tr>
+    <!-- <tr>
+      <td><code>FLUX</code></td>
+      <td>Not supported</td>
+      <td>
+        <ul>
+          <li><a href="https://huggingface.co/black-forest-labs/FLUX.1-Fill-dev"><code>black-forest-labs/FLUX.1-Fill-dev</code></a></li>
+        </ul>
+      </td>
+    </tr> -->
   </tbody>
 </table>
 
@@ -292,11 +311,13 @@ In addition to image generation models, `InpaintingPipeline` supports specialize
     <tr>
       <th>Architecture</th>
       <th>Models</th>
+      <th>LoRA support</th>
       <th>Example HuggingFace Models</th>
     </tr>
     <tr>
       <td><code>InternVL2</code></td>
       <td>InternVL2</td>
+      <td>Not supported</td>
       <td>
         <ul>
           <li><a href="https://huggingface.co/OpenGVLab/InternVL2-1B"><code>OpenGVLab/InternVL2-1B</code></a></li>
@@ -309,6 +330,7 @@ In addition to image generation models, `InpaintingPipeline` supports specialize
     <tr>
       <td><code>LLaVA</code></td>
       <td>LLaVA-v1.5</td>
+      <td>Not supported</td>
       <td>
         <ul>
           <li><a href="https://huggingface.co/llava-hf/llava-1.5-7b-hf"><code>llava-hf/llava-1.5-7b-hf</code></a></li>
@@ -318,6 +340,7 @@ In addition to image generation models, `InpaintingPipeline` supports specialize
     <tr>
       <td><code>LLaVA-NeXT</code></td>
       <td>LLaVa-v1.6</td>
+      <td>Not supported</td>
       <td>
         <ul>
           <li><a href="https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf"><code>llava-hf/llava-v1.6-mistral-7b-hf</code></a></li>
@@ -329,6 +352,7 @@ In addition to image generation models, `InpaintingPipeline` supports specialize
     <tr>
       <td><code>MiniCPMV</code></td>
       <td>MiniCPM-V-2_6</td>
+      <td>Not supported</td>
       <td>
         <ul>
           <li><a href="https://huggingface.co/openbmb/MiniCPM-V-2_6"><code>openbmb/MiniCPM-V-2_6</code></a></li>
@@ -345,11 +369,13 @@ In addition to image generation models, `InpaintingPipeline` supports specialize
     <tr>
       <th>Architecture</th>
       <th>Models</th>
+      <th>LoRA support</th>
       <th>Example HuggingFace Models</th>
     </tr>
     <tr>
       <td rowspan=2><code>WhisperForConditionalGeneration</code></td>
       <td>Whisper</td>
+      <td>Not supported</td>
       <td>
         <ul>
           <li><a href="https://huggingface.co/openai/whisper-tiny"><code>openai/whisper-tiny</code></a></li>
@@ -366,6 +392,7 @@ In addition to image generation models, `InpaintingPipeline` supports specialize
     </tr>
     <tr>
       <td>Distil-Whisper</td>
+      <td>Not supported</td>
       <td>
         <ul>
           <li><a href="https://huggingface.co/distil-whisper/distil-small.en"><code>distil-whisper/distil-small.en</code></a></li>
diff --git a/samples/cpp/visual_language_chat/README.md b/samples/cpp/visual_language_chat/README.md
index 39364d51ee..73baf0088a 100644
--- a/samples/cpp/visual_language_chat/README.md
+++ b/samples/cpp/visual_language_chat/README.md
@@ -29,7 +29,7 @@ Follow [Get Started with Samples](https://docs.openvino.ai/2024/learn-openvino/o
 
 Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model `llava-hf/llava-v1.6-mistral-7b-hf` can benefit from being run on a dGPU. Modify the source code to change the device for inference to the `GPU`.
 
-See [SUPPORTED_MODELS.md](../../../src/docs/SUPPORTED_MODELS.md#visual-language-models) for the list of supported models.
+See [SUPPORTED_MODELS.md](../../../SUPPORTED_MODELS.md#visual-language-models) for the list of supported models.
 
 ## Run benchmark:
 
diff --git a/samples/cpp/whisper_speech_recognition/README.md b/samples/cpp/whisper_speech_recognition/README.md
index d649266613..2ea3322dee 100644
--- a/samples/cpp/whisper_speech_recognition/README.md
+++ b/samples/cpp/whisper_speech_recognition/README.md
@@ -31,7 +31,7 @@ Output:
 timestamps: [0, 2] text:  How are you doing today?
 ```
 
-See [SUPPORTED_MODELS.md](../../../src/docs/SUPPORTED_MODELS.md#whisper-models) for the list of supported models.
+See [SUPPORTED_MODELS.md](../../../SUPPORTED_MODELS.md#whisper-models) for the list of supported models.
 
 # Whisper pipeline usage
 
diff --git a/samples/python/whisper_speech_recognition/README.md b/samples/python/whisper_speech_recognition/README.md
index aeb46444bf..5f373df2b7 100644
--- a/samples/python/whisper_speech_recognition/README.md
+++ b/samples/python/whisper_speech_recognition/README.md
@@ -38,7 +38,7 @@ Output:
 timestamps: [0, 2] text:  How are you doing today?
 ```
 
-See [SUPPORTED_MODELS.md](../../../src/docs/SUPPORTED_MODELS.md#whisper-models) for the list of supported models.
+See [SUPPORTED_MODELS.md](../../../SUPPORTED_MODELS.md#whisper-models) for the list of supported models.
 
 # Whisper pipeline usage