From 09a542608b560959edb96e628915a1d6bd780c26 Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova Date: Tue, 7 Jan 2025 11:13:35 +0400 Subject: [PATCH 01/12] [llm_bench] add support granite and granitemoe models (#1486) related to https://github.com/huggingface/optimum-intel/pull/1099 added opportunity to test these models via llm_bench Co-authored-by: Ilya Lavrenov --- tools/llm_bench/llm_bench_utils/config_class.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/llm_bench/llm_bench_utils/config_class.py b/tools/llm_bench/llm_bench_utils/config_class.py index 7dd27b198b..9c149c98b6 100644 --- a/tools/llm_bench/llm_bench_utils/config_class.py +++ b/tools/llm_bench/llm_bench_utils/config_class.py @@ -102,7 +102,9 @@ "olmo", "phi3", "starcoder", - "instruct-gpt" + "instruct-gpt", + "granite", + "granitemoe", ], 'ldm_super_resolution': ['ldm-super-resolution'], } From 9ac38f0d5c79c0864dafd8484b5a696261e3bfda Mon Sep 17 00:00:00 2001 From: Helena Kloosterman Date: Tue, 7 Jan 2025 10:01:12 +0100 Subject: [PATCH 02/12] Update VLM example code in README (#1466) Add `pipe.start_chat()` to VLM example. Without this, inference with several models results in empty outputs. This can be removed if this will be the default for VLM models, but at the moment, the most basic example should work with supported models. Also changed printing the VLMDecodedResults to getting the generated text and printing that (see comment from Ilya). --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index be3de5e8ce..9d4543bed4 100644 --- a/README.md +++ b/README.md @@ -133,13 +133,15 @@ from PIL import Image # Choose GPU instead of CPU in the line below to run the model on Intel integrated or discrete GPU pipe = ov_genai.VLMPipeline("./InternVL2-1B", "CPU") +pipe.start_chat() image = Image.open("dog.jpg") image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.uint8) image_data = ov.Tensor(image_data) prompt = "Can you describe the image?" -print(pipe.generate(prompt, image=image_data, max_new_tokens=100)) +result = pipe.generate(prompt, image=image_data, max_new_tokens=100) +print(result.texts[0]) ``` ### Run generation using VLMPipeline in C++ From d7d117a4a6a47f024a07fb914d1ea3a1dd829c58 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Tue, 7 Jan 2025 10:01:25 +0100 Subject: [PATCH 03/12] Fix text streaming in samples (#1487) Fix issue https://github.com/openvinotoolkit/openvino.genai/issues/1381 Co-authored-by: Ilya Lavrenov --- samples/python/multinomial_causal_lm/multinomial_causal_lm.py | 2 +- tools/llm_bench/llm_bench_utils/ov_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/python/multinomial_causal_lm/multinomial_causal_lm.py b/samples/python/multinomial_causal_lm/multinomial_causal_lm.py index 953388ed6a..5ec9d54601 100755 --- a/samples/python/multinomial_causal_lm/multinomial_causal_lm.py +++ b/samples/python/multinomial_causal_lm/multinomial_causal_lm.py @@ -90,7 +90,7 @@ def put(self, token_id: int) -> bool: word = text[self.print_len:] self.tokens_cache = [] self.print_len = 0 - elif len(text) >= 3 and text[-3:] == chr(65533): + elif len(text) >= 3 and text[-1] == chr(65533): # Don't print incomplete text. pass elif len(text) > self.print_len: diff --git a/tools/llm_bench/llm_bench_utils/ov_utils.py b/tools/llm_bench/llm_bench_utils/ov_utils.py index 316c9d0b89..596da8cb3a 100644 --- a/tools/llm_bench/llm_bench_utils/ov_utils.py +++ b/tools/llm_bench/llm_bench_utils/ov_utils.py @@ -701,7 +701,7 @@ def put(self, token_id: int) -> bool: word = text[self.print_len:] self.tokens_cache = [] self.print_len = 0 - elif len(text) >= 3 and text[-3:] == chr(65533): + elif len(text) >= 3 and text[-1] == chr(65533): # Don't print incomplete text. pass elif len(text) > self.print_len: From 65e8362e85a887af22e105d97d2333db921a1766 Mon Sep 17 00:00:00 2001 From: Alexander Kozlov Date: Tue, 7 Jan 2025 12:01:45 +0300 Subject: [PATCH 04/12] Added ability to compare results vs. llama.cpp (#1461) Example: ```bash rm -rf results/smollm2_N_FP16/gt.csv mkdir -p results/smollm2_N_FP16 # References from PyTorch FP16 wwb --base-model HuggingFaceTB/SmolLM2-360M-Instruct --gt-data results/smollm2_N_FP16/gt.csv --hf --num-samples 4 #huggingface-cli download "bartowski/SmolLM2-360M-Instruct-GGUF" "SmolLM2-360M-Instruct-f16.gguf" wwb --target-model models/SmolLM2-360M-Instruct-f16.gguf --gt-data results/smollm2_N_FP16/gt.csv --llamacpp --output results/smollm2_N_L_FP16 --num-samples ``` --- .../whowhatbench/model_loaders.py | 20 +++++- .../whowhatbench/text_evaluator.py | 27 +++++--- tools/who_what_benchmark/whowhatbench/wwb.py | 61 ++++++++++++++++--- 3 files changed, 88 insertions(+), 20 deletions(-) diff --git a/tools/who_what_benchmark/whowhatbench/model_loaders.py b/tools/who_what_benchmark/whowhatbench/model_loaders.py index 8a00c70852..c792a3c0b2 100644 --- a/tools/who_what_benchmark/whowhatbench/model_loaders.py +++ b/tools/who_what_benchmark/whowhatbench/model_loaders.py @@ -41,8 +41,19 @@ def load_text_genai_pipeline(model_dir, device="CPU", ov_config=None): return GenAIModelWrapper(openvino_genai.LLMPipeline(model_dir, device=device, **ov_config), model_dir, "text") +def load_text_llamacpp_pipeline(model_dir): + try: + from llama_cpp import Llama + except ImportError: + logger.error( + "Failed to import llama_cpp package. Please install llama-cpp-python.") + exit(-1) + model = Llama(model_dir) + return model + + def load_text_model( - model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False + model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False, use_llamacpp=False, ): if use_hf: logger.info("Using HF Transformers API") @@ -53,6 +64,9 @@ def load_text_model( elif use_genai: logger.info("Using OpenVINO GenAI API") model = load_text_genai_pipeline(model_id, device, ov_config) + elif use_llamacpp: + logger.info("Using llama.cpp API") + model = load_text_llamacpp_pipeline(model_id) else: logger.info("Using Optimum API") from optimum.intel.openvino import OVModelForCausalLM @@ -276,7 +290,7 @@ def load_inpainting_model( def load_model( - model_type, model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False + model_type, model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False, use_llamacpp=False ): if model_id is None: return None @@ -288,7 +302,7 @@ def load_model( ov_options = {} if model_type == "text": - return load_text_model(model_id, device, ov_options, use_hf, use_genai) + return load_text_model(model_id, device, ov_options, use_hf, use_genai, use_llamacpp) elif model_type == "text-to-image": return load_text2image_model( model_id, device, ov_options, use_hf, use_genai diff --git a/tools/who_what_benchmark/whowhatbench/text_evaluator.py b/tools/who_what_benchmark/whowhatbench/text_evaluator.py index 50ce224def..433521a186 100644 --- a/tools/who_what_benchmark/whowhatbench/text_evaluator.py +++ b/tools/who_what_benchmark/whowhatbench/text_evaluator.py @@ -108,6 +108,7 @@ def __init__( generation_config=None, generation_config_base=None, seqs_per_request=None, + use_chat_template=None, ) -> None: assert ( base_model is not None or gt_data is not None @@ -123,6 +124,7 @@ def __init__( self.generation_config_base = generation_config self.seqs_per_request = seqs_per_request self.generation_fn = gen_answer_fn + self.use_chat_template = use_chat_template if self.generation_config is not None: assert self.seqs_per_request is not None @@ -202,15 +204,21 @@ def worst_examples(self, top_k: int = 5, metric="similarity"): return res def _generate_data(self, model, gen_answer_fn=None, generation_config=None): - def default_gen_answer(model, tokenizer, prompt, max_new_tokens, crop_question): - inputs = self.tokenizer(prompt, return_tensors="pt") - - tokens = model.generate(**inputs, do_sample=False, max_new_tokens=max_new_tokens) - - if crop_question: - tokens = tokens[:, inputs["input_ids"].shape[-1] :] - - return self.tokenizer.batch_decode(tokens, skip_special_tokens=True)[0] + def default_gen_answer(model, tokenizer, prompt, max_new_tokens, crop_question, use_chat_template=False): + if use_chat_template: + message = [{"role": "user", "content": prompt}] + inputs = tokenizer.apply_chat_template(message, tokenize=True, add_generation_prompt=True, return_tensors="pt") + tokens = model.generate(inputs, do_sample=False, max_new_tokens=max_new_tokens) + if crop_question: + tokens = tokens[:, inputs.shape[-1]:] + res = self.tokenizer.decode(tokens[0], skip_special_tokens=True) + return res + else: + inputs = self.tokenizer(prompt, return_tensors="pt") + tokens = model.generate(**inputs, do_sample=False, max_new_tokens=max_new_tokens) + if crop_question: + tokens = tokens[:, inputs["input_ids"].shape[-1] :] + return self.tokenizer.batch_decode(tokens, skip_special_tokens=True)[0] gen_answer_fn = gen_answer_fn or default_gen_answer @@ -250,6 +258,7 @@ def default_gen_answer(model, tokenizer, prompt, max_new_tokens, crop_question): p, self.max_new_tokens, self._crop_question, + self.use_chat_template ) ) else: diff --git a/tools/who_what_benchmark/whowhatbench/wwb.py b/tools/who_what_benchmark/whowhatbench/wwb.py index 7acf3cf5aa..7d4354f846 100644 --- a/tools/who_what_benchmark/whowhatbench/wwb.py +++ b/tools/who_what_benchmark/whowhatbench/wwb.py @@ -40,6 +40,11 @@ def parse_args(): default=None, help="Tokenizer for divergency metric. If not provided, it will be load from base_model or target_model.", ) + parser.add_argument( + "--chat-template", + action="store_true", + help="Whether apply the default chat template.", + ) parser.add_argument( "--gt-data", default=None, @@ -137,6 +142,11 @@ def parse_args(): action="store_true", help="Use LLMPipeline from transformers library to instantiate the model.", ) + parser.add_argument( + "--llamacpp", + action="store_true", + help="Use llama-cpp-python to instantiate the model.", + ) parser.add_argument( "--image-size", type=int, @@ -190,9 +200,13 @@ def load_prompts(args): def load_tokenizer(args): tokenizer = None if args.tokenizer is not None: - tokenizer = AutoTokenizer.from_pretrained( - args.tokenizer, trust_remote_code=True - ) + if args.llamacpp: + from llama_cpp.llama_tokenizer import LlamaHFTokenizer + tokenizer = LlamaHFTokenizer.from_pretrained(args.tokenizer) + else: + tokenizer = AutoTokenizer.from_pretrained( + args.tokenizer, trust_remote_code=True + ) elif args.base_model is not None: tokenizer = AutoTokenizer.from_pretrained( args.base_model, trust_remote_code=True @@ -246,8 +260,29 @@ def diff_strings(a: str, b: str, *, use_loguru_colors: bool = False) -> str: return "".join(output) -def genai_gen_text(model, tokenizer, question, max_new_tokens, skip_question): - return model.generate(question, do_sample=False, max_new_tokens=max_new_tokens) +def genai_gen_text(model, tokenizer, question, max_new_tokens, skip_question, use_chat_template=False): + if use_chat_template: + model.start_chat() + result = model.generate(question, do_sample=False, max_new_tokens=max_new_tokens) + model.finish_chat() + return result + else: + return model.generate(question, do_sample=False, max_new_tokens=max_new_tokens) + + +def llamacpp_gen_text(model, tokenizer, question, max_new_tokens, skip_question, use_chat_template=False): + if use_chat_template: + output = model.create_chat_completion(messages=[{"role": "user", "content": question}], max_tokens=max_new_tokens, temperature=0.0) + text = output["choices"][0]["message"]["content"] + if skip_question: + text = text[len(question):] + return text + else: + output = model(question, max_tokens=max_new_tokens, echo=True, temperature=0.0) + text = output["choices"][0]["text"] + if skip_question: + text = text[len(question):] + return text def genai_gen_image(model, prompt, num_inference_steps, generator=None): @@ -322,7 +357,15 @@ def create_evaluator(base_model, args): prompts = load_prompts(args) if task == "text": - tokenizer = load_tokenizer(args) + tokenizer = load_tokenizer(args) if not args.llamacpp else None + + if args.genai: + gen_answer_fn = genai_gen_text + elif args.llamacpp: + gen_answer_fn = llamacpp_gen_text + else: + gen_answer_fn = None + return EvaluatorCLS( base_model=base_model, gt_data=args.gt_data, @@ -331,7 +374,8 @@ def create_evaluator(base_model, args): similarity_model_id=args.data_encoder, num_samples=args.num_samples, language=args.language, - gen_answer_fn=genai_gen_text if args.genai else None, + gen_answer_fn=gen_answer_fn, + use_chat_template=args.chat_template, ) elif task == "text-to-image": return EvaluatorCLS( @@ -467,10 +511,11 @@ def main(): args.ov_config, args.hf, args.genai, + args.llamacpp ) all_metrics_per_question, all_metrics = evaluator.score( target_model, - evaluator.get_generation_fn() if args.genai else None, + evaluator.get_generation_fn() if args.genai or args.llamacpp else None, output_dir=args.output ) logger.info("Metrics for model: %s", args.target_model) From db0fb9a27a18d1080bdb152c5c845e1a0a9b5941 Mon Sep 17 00:00:00 2001 From: "Anastasiya(Asya) Pronina" Date: Tue, 7 Jan 2025 10:02:01 +0100 Subject: [PATCH 05/12] Replace 'CACHE_DIR' with 'NPUW_CACHE_DIR' in StatefulLLMPipeline (#1489) Handle `CACHE_DIR` in `StatefulLLMPipeline` the same way as in `StatelessLLMPipeline` --- src/cpp/src/llm_pipeline_static.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp index 94aa6e19fe..c98b571179 100644 --- a/src/cpp/src/llm_pipeline_static.cpp +++ b/src/cpp/src/llm_pipeline_static.cpp @@ -739,7 +739,10 @@ std::shared_ptr StatefulLLMPipeline::setupAndCompileModel( rename_key(pipeline_config, "PREFILL_CONFIG", "NPUW_LLM_PREFILL_CONFIG"); rename_key(pipeline_config, "GENERATE_CONFIG", "NPUW_LLM_GENERATE_CONFIG"); - + + // Replace CACHE_DIR option if NPUW is enabled + set_npuw_cache_dir(pipeline_config); + return std::make_shared(genai::utils::singleton_core().compile_model(model, "NPU", pipeline_config)); } From 3e12db7a6f7e461d928abd0c8fcaca9a67db06bf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 7 Jan 2025 18:43:59 +0400 Subject: [PATCH 06/12] Update datasets requirement from <3.2.0 to <3.3.0 in /tools/who_what_benchmark (#1491) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updates the requirements on [datasets](https://github.com/huggingface/datasets) to permit the latest version.
Release notes

Sourced from datasets's releases.

3.2.0

Dataset Features

  • Faster parquet streaming + filters with predicate pushdown by @​lhoestq in huggingface/datasets#7309
    • Up to +100% streaming speed
    • Fast filtering via predicate pushdown (skip files/row groups based on predicate instead of downloading the full data), e.g.
      from datasets import load_dataset
      filters = [('date', '>=', '2023')]
      ds = load_dataset("HuggingFaceFW/fineweb-2",
      "fra_Latn", streaming=True, filters=filters)
      

Other improvements and bug fixes

New Contributors

Full Changelog: https://github.com/huggingface/datasets/compare/3.1.0...3.2.0

Commits

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- tools/who_what_benchmark/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/who_what_benchmark/requirements.txt b/tools/who_what_benchmark/requirements.txt index d4b702de78..ab4192d56c 100644 --- a/tools/who_what_benchmark/requirements.txt +++ b/tools/who_what_benchmark/requirements.txt @@ -7,4 +7,4 @@ pandas>=2.0.3 numpy>=1.23.5 tqdm>=4.66.1 diffusers -datasets<3.2.0 +datasets<3.3.0 From 74fd08fa19e2cf7ffe0eaecb3f539f3f737ee002 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 7 Jan 2025 22:16:50 +0400 Subject: [PATCH 07/12] Revert "Update datasets requirement from <3.2.0 to <3.3.0 in /tools/who_what_benchmark" (#1495) Reverts openvinotoolkit/openvino.genai#1491 --- tools/who_what_benchmark/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/who_what_benchmark/requirements.txt b/tools/who_what_benchmark/requirements.txt index ab4192d56c..d4b702de78 100644 --- a/tools/who_what_benchmark/requirements.txt +++ b/tools/who_what_benchmark/requirements.txt @@ -7,4 +7,4 @@ pandas>=2.0.3 numpy>=1.23.5 tqdm>=4.66.1 diffusers -datasets<3.3.0 +datasets<3.2.0 From d48326b0ecdefb5dd2a758a3536c4e7011c82934 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 8 Jan 2025 02:04:44 +0400 Subject: [PATCH 08/12] Enable ov_add_api_validator_post_build_step (#1402) --- src/cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt index 24367c17ce..ff804cd85a 100644 --- a/src/cpp/CMakeLists.txt +++ b/src/cpp/CMakeLists.txt @@ -101,7 +101,7 @@ endif() if(OpenVINODeveloperPackage_FOUND) # must be called after all target_link_libraries - # ov_add_api_validator_post_build_step(TARGET ${TARGET_NAME}) + ov_add_api_validator_post_build_step(TARGET ${TARGET_NAME}) ov_ncc_naming_style(FOR_TARGET ${TARGET_NAME} SOURCE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include") From cdf8118377b6654daeedf1634d6d157ac7668767 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 8 Jan 2025 08:06:57 +0400 Subject: [PATCH 09/12] [CB] Fix key cache shape for GPU (#1497) Regression after https://github.com/openvinotoolkit/openvino.genai/pull/1416 CVS-160158 --- src/cpp/src/device_config.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/cpp/src/device_config.hpp b/src/cpp/src/device_config.hpp index cc2e21b9a1..fee6c7abd1 100644 --- a/src/cpp/src/device_config.hpp +++ b/src/cpp/src/device_config.hpp @@ -117,22 +117,22 @@ class DeviceConfig { } for (size_t layer_id = 0; layer_id < m_num_decoder_layers; layer_id++) { - m_key_cache_shape.push_back(ov::PartialShape{ov::Dimension::dynamic(), - ov::Dimension(m_num_kv_heads[layer_id]), - ov::Dimension(m_block_size), - ov::Dimension(m_head_size)}); - m_value_cache_shape.push_back(ov::PartialShape{ov::Dimension::dynamic(), ov::Dimension(m_num_kv_heads[layer_id]), ov::Dimension(m_block_size), ov::Dimension(m_head_size)}); - if (m_device.find("GPU") != std::string::npos) { + if (m_device.find("GPU") == std::string::npos) { + m_key_cache_shape.push_back(ov::PartialShape{ov::Dimension::dynamic(), + ov::Dimension(m_num_kv_heads[layer_id]), + ov::Dimension(m_block_size), + ov::Dimension(m_head_size)}); + } else if (m_device.find("GPU") != std::string::npos) { // Update key shape, as the key's shape is different from the value's shape m_key_cache_shape.push_back(ov::PartialShape{ov::Dimension::dynamic(), - ov::Dimension(m_num_kv_heads[layer_id]), - ov::Dimension(m_head_size), - ov::Dimension(m_block_size)}); + ov::Dimension(m_num_kv_heads[layer_id]), + ov::Dimension(m_head_size), + ov::Dimension(m_block_size)}); } } } From fb16a71b3c5d8736d75f4201e33d398e967fa152 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 8 Jan 2025 14:47:39 +0400 Subject: [PATCH 10/12] Finally drop old LLM bench folder (#1498) --- llm_bench/python/README.md | 4 ---- llm_bench/python/who_what_benchmark/README.md | 4 ---- 2 files changed, 8 deletions(-) delete mode 100644 llm_bench/python/README.md delete mode 100644 llm_bench/python/who_what_benchmark/README.md diff --git a/llm_bench/python/README.md b/llm_bench/python/README.md deleted file mode 100644 index 272ed11d1b..0000000000 --- a/llm_bench/python/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# Benchmarking Script for Large Language Models - -> [!IMPORTANT] -> LLM bench code was moved to [tools](../../tools/llm_bench/) directory. Please navigate to the new directory for continue of tool usage. \ No newline at end of file diff --git a/llm_bench/python/who_what_benchmark/README.md b/llm_bench/python/who_what_benchmark/README.md deleted file mode 100644 index 414b4d9342..0000000000 --- a/llm_bench/python/who_what_benchmark/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# Simple Accuracy Benchmark for Generative AI models - -> [!IMPORTANT] -> Who What Benchmark code was moved to [tools](../../../tools/who_what_benchmark/) directory. Please navigate to the new directory for continue of tool usage. \ No newline at end of file From 5ab58ca70dd2774595ad82768074c7a497aa9377 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 8 Jan 2025 14:49:25 +0400 Subject: [PATCH 11/12] Add complete version information (#1500) CVS-160212 --- .github/workflows/genai-tools.yml | 2 +- .github/workflows/linux.yml | 2 +- .../workflows/stable_diffusion_1_5_cpp.yml | 4 +- CMakeLists.txt | 1 + cmake/templates/__version__.py.in | 5 -- cmake/templates/version.cpp.in | 19 +++++ cmake/templates/version.hpp.in | 34 +++++++++ cmake/version.cmake | 72 +++++++++++++++++++ src/cpp/CMakeLists.txt | 16 ++++- src/python/CMakeLists.txt | 16 ++--- src/python/clean_version.cmake | 21 ++++++ src/python/openvino_genai/__init__.py | 5 +- src/python/openvino_genai/__init__.pyi | 5 +- .../openvino_genai/py_openvino_genai.pyi | 6 +- src/python/py_openvino_genai.cpp | 7 ++ 15 files changed, 190 insertions(+), 25 deletions(-) delete mode 100644 cmake/templates/__version__.py.in create mode 100644 cmake/templates/version.cpp.in create mode 100644 cmake/templates/version.hpp.in create mode 100644 cmake/version.cmake create mode 100644 src/python/clean_version.cmake diff --git a/.github/workflows/genai-tools.yml b/.github/workflows/genai-tools.yml index 333bee3e11..bd6cb46362 100644 --- a/.github/workflows/genai-tools.yml +++ b/.github/workflows/genai-tools.yml @@ -44,7 +44,7 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels - revision: latest_available_commit + revision: 345163f87953fb0dd8dd590257eb7fc84378da8e llm_bench: name: 'LLM bench tests' diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 0a991e2a54..0d7a5b7bae 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -52,7 +52,7 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels - revision: latest_available_commit + revision: 345163f87953fb0dd8dd590257eb7fc84378da8e - name: Clone docker tag from OpenVINO repo uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml index e0bf5371b3..3b01697f26 100644 --- a/.github/workflows/stable_diffusion_1_5_cpp.yml +++ b/.github/workflows/stable_diffusion_1_5_cpp.yml @@ -45,7 +45,7 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels - revision: latest_available_commit + revision: 345163f87953fb0dd8dd590257eb7fc84378da8e openvino_download_windows: name: Download OpenVINO for Windows @@ -71,7 +71,7 @@ jobs: with: platform: windows commit_packages_to_provide: wheels - revision: latest_available_commit + revision: 345163f87953fb0dd8dd590257eb7fc84378da8e stable_diffusion_1_5_cpp-linux: runs-on: ubuntu-22.04-8-cores diff --git a/CMakeLists.txt b/CMakeLists.txt index 181132e210..3a67a24bab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,6 +60,7 @@ if(NOT OpenVINODeveloperPackage_FOUND) endif() include(cmake/features.cmake) +include(cmake/version.cmake) if(ENABLE_PYTHON) # the following two calls are required for cross-compilation diff --git a/cmake/templates/__version__.py.in b/cmake/templates/__version__.py.in deleted file mode 100644 index ce8e01a246..0000000000 --- a/cmake/templates/__version__.py.in +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Will be overwritten by cmake. -__version__ = "@OpenVINOGenAI_VERSION@" diff --git a/cmake/templates/version.cpp.in b/cmake/templates/version.cpp.in new file mode 100644 index 0000000000..f6015832f9 --- /dev/null +++ b/cmake/templates/version.cpp.in @@ -0,0 +1,19 @@ +// Copyright (C) 2023-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/version.hpp" + +namespace ov { +namespace genai { + +const Version get_version() { + const static Version version = { + "@OpenVINOGenAI_FULL_VERSION@", + "OpenVINO GenAI version", + }; + + return version; +} + +} // namespace genai +} // namespace ov diff --git a/cmake/templates/version.hpp.in b/cmake/templates/version.hpp.in new file mode 100644 index 0000000000..34120ef632 --- /dev/null +++ b/cmake/templates/version.hpp.in @@ -0,0 +1,34 @@ +// Copyright (C) 2023-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "openvino/core/version.hpp" +#include "openvino/genai/visibility.hpp" + +/** + * OpenVINO GenAI major version + */ +#define OPENVINO_GENAI_VERSION_MAJOR @OpenVINOGenAI_VERSION_MAJOR@ + +/** + * OpenVINO GenAI minor version + */ +#define OPENVINO_GENAI_VERSION_MINOR @OpenVINOGenAI_VERSION_MINOR@ + +/** + * OpenVINO GenAI patch version + */ +#define OPENVINO_GENAI_VERSION_PATCH @OpenVINOGenAI_VERSION_PATCH@ + +namespace ov { +namespace genai { + +/** + * Returns OpenVINO GenAI full version including git commit and hash information in form of: + * ...--[-] + */ +OPENVINO_EXTERN_C OPENVINO_GENAI_EXPORTS const ov::Version OPENVINO_CDECL get_version(); + +} // namespace genai +} // namespace ov diff --git a/cmake/version.cmake b/cmake/version.cmake new file mode 100644 index 0000000000..b9b51e8fe2 --- /dev/null +++ b/cmake/version.cmake @@ -0,0 +1,72 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +find_package(Git QUIET) + +function(ov_genai_branch_name VAR) + if(GIT_FOUND) + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref HEAD + WORKING_DIRECTORY ${OpenVINOGenAI_SOURCE_DIR} + OUTPUT_VARIABLE GIT_BRANCH + RESULT_VARIABLE EXIT_CODE + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(EXIT_CODE EQUAL 0) + set(${VAR} ${GIT_BRANCH} PARENT_SCOPE) + endif() + endif() +endfunction() + +function(ov_genai_commit_hash VAR) + if(GIT_FOUND) + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-parse --short=11 HEAD + WORKING_DIRECTORY ${OpenVINOGenAI_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_HASH + RESULT_VARIABLE EXIT_CODE + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(EXIT_CODE EQUAL 0) + set(${VAR} ${GIT_COMMIT_HASH} PARENT_SCOPE) + endif() + endif() +endfunction() + +function(ov_genai_commit_number VAR) + set(GIT_COMMIT_NUMBER_FOUND OFF) + if(GIT_FOUND) + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD + WORKING_DIRECTORY ${OpenVINOGenAI_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_NUMBER + RESULT_VARIABLE EXIT_CODE + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(EXIT_CODE EQUAL 0) + set(GIT_COMMIT_NUMBER_FOUND ON) + set(${VAR} ${GIT_COMMIT_NUMBER} PARENT_SCOPE) + endif() + endif() + if(NOT GIT_COMMIT_NUMBER_FOUND) + # set zeros since git is not available + set(${VAR} "000" PARENT_SCOPE) + endif() +endfunction() + +function(ov_genai_full_version full_version) + if(GIT_FOUND) + ov_genai_branch_name(GIT_BRANCH) + ov_genai_commit_hash(GIT_COMMIT_HASH) + ov_genai_commit_number(GIT_COMMIT_NUMBER) + + if(NOT GIT_BRANCH MATCHES "^(master|HEAD)$") + set(GIT_BRANCH_POSTFIX "-${GIT_BRANCH}") + endif() + + set(${full_version} "${OpenVINOGenAI_VERSION}-${GIT_COMMIT_NUMBER}-${GIT_COMMIT_HASH}${GIT_BRANCH_POSTFIX}" PARENT_SCOPE) + else() + set(${full_version} "${OpenVINOGenAI_VERSION}" PARENT_SCOPE) + endif() +endfunction() + +ov_genai_full_version(OpenVINOGenAI_FULL_VERSION) +message(STATUS "OpenVINO GenAI full version: ${OpenVINOGenAI_FULL_VERSION}") diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt index ff804cd85a..e954037daf 100644 --- a/src/cpp/CMakeLists.txt +++ b/src/cpp/CMakeLists.txt @@ -54,9 +54,18 @@ FetchContent_MakeAvailable(safetensors.h) ov_genai_build_jinja2cpp() +# generate version files + +configure_file("${OpenVINOGenAI_SOURCE_DIR}/cmake/templates/version.hpp.in" + "${CMAKE_CURRENT_BINARY_DIR}/openvino/genai/version.hpp" @ONLY) + +configure_file("${OpenVINOGenAI_SOURCE_DIR}/cmake/templates/version.cpp.in" + "${CMAKE_CURRENT_BINARY_DIR}/version.cpp" @ONLY) + # Library file(GLOB_RECURSE SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/*.c") +list(APPEND SOURCE_FILES "${CMAKE_CURRENT_BINARY_DIR}/version.cpp") set(TARGET_NAME openvino_genai) @@ -68,7 +77,9 @@ if(TARGET openvino_tokenizers) endif() target_include_directories(${TARGET_NAME} - PUBLIC "$" "$" + PUBLIC "$" + "$" + "$" PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src") target_include_directories(${TARGET_NAME} SYSTEM PRIVATE "${safetensors.h_SOURCE_DIR}") @@ -145,6 +156,9 @@ install(TARGETS ${TARGET_NAME} EXPORT OpenVINOGenAITargets install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ DESTINATION runtime/include COMPONENT core_genai_dev) +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/openvino/genai/version.hpp + DESTINATION runtime/include/openvino/genai COMPONENT core_genai_dev) + install(EXPORT OpenVINOGenAITargets FILE OpenVINOGenAITargets.cmake NAMESPACE openvino:: DESTINATION runtime/cmake COMPONENT core_genai_dev) diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 75a2fd59a7..1293246260 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -34,9 +34,6 @@ file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/__init__.py" "${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/py_openvino_genai.pyi" DESTINATION "${CMAKE_BINARY_DIR}/openvino_genai/") -configure_file("${OpenVINOGenAI_SOURCE_DIR}/cmake/templates/__version__.py.in" - "${CMAKE_BINARY_DIR}/openvino_genai/__version__.py" @ONLY) - if(OpenVINODeveloperPackage_FOUND) # TODO: commit changes separately # ov_add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}) @@ -69,18 +66,12 @@ endif() install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/__init__.py" "${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/__init__.pyi" "${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/py_openvino_genai.pyi" - "${CMAKE_BINARY_DIR}/openvino_genai/__version__.py" DESTINATION python/openvino_genai COMPONENT pygenai_${Python3_VERSION_MAJOR}_${Python3_VERSION_MINOR}) install(TARGETS ${TARGET_NAME} LIBRARY DESTINATION python/openvino_genai COMPONENT pygenai_${Python3_VERSION_MAJOR}_${Python3_VERSION_MINOR}) -install(FILES "${CMAKE_BINARY_DIR}/openvino_genai/__version__.py" - DESTINATION openvino_genai - COMPONENT wheel_genai - EXCLUDE_FROM_ALL) - install(FILES "${OpenVINOGenAI_SOURCE_DIR}/LICENSE" "${OpenVINOGenAI_SOURCE_DIR}/third-party-programs.txt" "${OpenVINOGenAI_SOURCE_DIR}/SECURITY.md" @@ -154,7 +145,8 @@ if(pybind11_stubgen_AVAILABLE) endif() set(stub_files_location "${OpenVINOGenAI_BINARY_DIR}/src/python") - set(generated_files ${stub_files_location}/openvino_genai/__init__.pyi + set(init_pyi_file "${stub_files_location}/openvino_genai/__init__.pyi") + set(generated_files ${init_pyi_file} ${stub_files_location}/openvino_genai/py_openvino_genai.pyi) set_source_files_properties(${generated_files} PROPERTIES GENERATED ON) @@ -184,6 +176,9 @@ if(pybind11_stubgen_AVAILABLE) "${CMAKE_BINARY_DIR}/openvino_genai/py_openvino_genai.pyi" COMMAND "${CMAKE_COMMAND}" -E env PYTHONPATH=${CMAKE_BINARY_DIR}:${openvino_pythonpath}:$ENV{PYTHONPATH} ${pybind11_stubgen} --output-dir ${stub_files_location} openvino_genai + COMMAND "${CMAKE_COMMAND}" + -D init_pyi_file=${init_pyi_file} + -P "${CMAKE_CURRENT_SOURCE_DIR}/clean_version.cmake" ${validation_command} ${copy_to_source_command} COMMAND "${CMAKE_COMMAND}" -E copy ${generated_files} "${CMAKE_BINARY_DIR}/openvino_genai/" @@ -192,6 +187,7 @@ if(pybind11_stubgen_AVAILABLE) ${python_sources} ${validation_dependencies} "${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/__init__.py" + "${CMAKE_CURRENT_SOURCE_DIR}/clean_version.cmake" "${CMAKE_CURRENT_SOURCE_DIR}/compare_pyi.cmake" COMMENT "[${pybind11_stubgen_dep}] Generate .pyi files" VERBATIM) diff --git a/src/python/clean_version.cmake b/src/python/clean_version.cmake new file mode 100644 index 0000000000..f02e293493 --- /dev/null +++ b/src/python/clean_version.cmake @@ -0,0 +1,21 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +foreach(var IN ITEMS init_pyi_file) + if(NOT DEFINED ${var}) + message(FATAL_ERROR "Variable ${var} is not defined") + endif() +endforeach() + +file(STRINGS ${init_pyi_file} file_lines) + +foreach(file_line IN LISTS file_lines) + if(file_line MATCHES "^__version__.*") + set(file_line "__version__: str") + endif() + + set(file_content "${file_content}${file_line}\n") +endforeach() + +file(WRITE ${init_pyi_file} ${file_content}) diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py index a0b0faf58c..0ad7ba3f12 100644 --- a/src/python/openvino_genai/__init__.py +++ b/src/python/openvino_genai/__init__.py @@ -5,8 +5,6 @@ import openvino # add_dll_directory for openvino lib import os -from .__version__ import __version__ - if hasattr(os, "add_dll_directory"): os.add_dll_directory(os.path.dirname(__file__)) @@ -17,8 +15,11 @@ RawPerfMetrics, PerfMetrics, StreamerBase, + get_version, ) +__version__ = get_version() + # VLM pipeline from .py_openvino_genai import ( diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi index 187e0a0a06..0a401ae958 100644 --- a/src/python/openvino_genai/__init__.pyi +++ b/src/python/openvino_genai/__init__.pyi @@ -42,7 +42,8 @@ from openvino_genai.py_openvino_genai import WhisperPerfMetrics from openvino_genai.py_openvino_genai import WhisperPipeline from openvino_genai.py_openvino_genai import WhisperRawPerfMetrics from openvino_genai.py_openvino_genai import draft_model +from openvino_genai.py_openvino_genai import get_version import os as os from . import py_openvino_genai -__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'openvino', 'os', 'py_openvino_genai'] -__version__: str = '2025.0.0.0' +__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] +__version__: str diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index d405cd9bbf..5adde32db4 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -5,7 +5,7 @@ from __future__ import annotations import openvino._pyopenvino import os import typing -__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model'] +__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] class Adapter: """ Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier. @@ -2204,3 +2204,7 @@ def draft_model(models_path: os.PathLike, device: str = '', **kwargs) -> openvin """ device on which inference will be performed """ +def get_version() -> str: + """ + OpenVINO GenAI version + """ diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp index 429f48f30d..f8e577d5c8 100644 --- a/src/python/py_openvino_genai.cpp +++ b/src/python/py_openvino_genai.cpp @@ -11,6 +11,7 @@ #include #include "openvino/genai/llm_pipeline.hpp" +#include "openvino/genai/version.hpp" #include "py_utils.hpp" @@ -21,6 +22,7 @@ using ov::genai::DecodedResults; using ov::genai::EncodedResults; using ov::genai::StreamerBase; using ov::genai::StringInputs; +using ov::genai::get_version; void init_lora_adapter(py::module_& m); void init_perf_metrics(py::module_& m); @@ -82,7 +84,12 @@ class ConstructableStreamer: public StreamerBase { PYBIND11_MODULE(py_openvino_genai, m) { m.doc() = "Pybind11 binding for OpenVINO GenAI library"; + m.def("get_version", [] () -> py::str { + return get_version().buildNumber; + }, get_version().description); + init_perf_metrics(m); + py::class_(m, "DecodedResults", decoded_results_docstring) .def(py::init<>()) .def_property_readonly("texts", [](const DecodedResults &dr) -> py::typing::List { return pyutils::handle_utf8((std::vector)dr); }) From 3e5c8895650c64d73a9b15f5597c09f1a6b78fd3 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 8 Jan 2025 18:09:14 +0400 Subject: [PATCH 12/12] Added information about LoRA support (#1504) --- .github/workflows/mac.yml | 2 +- .github/workflows/windows.yml | 2 +- README.md | 2 +- ...SUPPORTED_MODELS.md => SUPPORTED_MODELS.md | 29 ++++++++++++++++++- samples/cpp/visual_language_chat/README.md | 2 +- .../cpp/whisper_speech_recognition/README.md | 2 +- .../whisper_speech_recognition/README.md | 2 +- 7 files changed, 34 insertions(+), 7 deletions(-) rename src/docs/SUPPORTED_MODELS.md => SUPPORTED_MODELS.md (95%) diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 5402b79e70..062b83fc27 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -17,7 +17,7 @@ concurrency: env: PYTHON_VERSION: '3.10' - OV_BRANCH: master + OV_BRANCH: 345163f87953fb0dd8dd590257eb7fc84378da8e OV_TARBALL: '' jobs: diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index e396671b2c..95a713d7a1 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -17,7 +17,7 @@ concurrency: env: PYTHON_VERSION: '3.11' - OV_BRANCH: master + OV_BRANCH: 345163f87953fb0dd8dd590257eb7fc84378da8e OV_TARBALL: '' jobs: diff --git a/README.md b/README.md index 9d4543bed4..c5cf799973 100644 --- a/README.md +++ b/README.md @@ -394,7 +394,7 @@ See [here](https://openvinotoolkit.github.io/openvino_notebooks/?search=Automati ## Additional materials -- [List of supported models](https://github.com/openvinotoolkit/openvino.genai/blob/master/src/docs/SUPPORTED_MODELS.md) (NOTE: models can work, but were not tried yet) +- [List of supported models](https://github.com/openvinotoolkit/openvino.genai/blob/master/SUPPORTED_MODELS.md) (NOTE: models can work, but were not tried yet) - [OpenVINO Generative AI workflow](https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html) - [Optimum-intel and OpenVINO](https://huggingface.co/docs/optimum/intel/openvino/export) diff --git a/src/docs/SUPPORTED_MODELS.md b/SUPPORTED_MODELS.md similarity index 95% rename from src/docs/SUPPORTED_MODELS.md rename to SUPPORTED_MODELS.md index 44da29ced4..6b45f47890 100644 --- a/src/docs/SUPPORTED_MODELS.md +++ b/SUPPORTED_MODELS.md @@ -147,6 +147,8 @@ +> [!NOTE] +> LoRA adapters are supported. The pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature. The model is required to have the following inputs after the conversion: 1. `input_ids` contains the tokens. @@ -165,12 +167,14 @@ The pipeline can work with other similar topologies produced by `optimum-intel` Architecture Text 2 image Image 2 image + LoRA support Example HuggingFace Models Latent Consistency Model Supported Supported + Supported
  • SimianLuo/LCM_Dreamshaper_v7
  • @@ -181,6 +185,7 @@ The pipeline can work with other similar topologies produced by `optimum-intel` Stable Diffusion Supported Supported + Supported
    • CompVis/stable-diffusion-v1-1
    • @@ -213,6 +218,7 @@ The pipeline can work with other similar topologies produced by `optimum-intel` Stable Diffusion XL Supported Supported + Supported
      • stabilityai/stable-diffusion-xl-base-0.9
      • @@ -225,6 +231,7 @@ The pipeline can work with other similar topologies produced by `optimum-intel` Stable Diffusion 3 Supported Not supported + Not supported
        • stabilityai/stable-diffusion-3-medium-diffusers
        • @@ -237,6 +244,7 @@ The pipeline can work with other similar topologies produced by `optimum-intel` Flux Supported Not supported + Not supported
          • black-forest-labs/FLUX.1-schnell
          • @@ -260,10 +268,12 @@ In addition to image generation models, `InpaintingPipeline` supports specialize Architecture + LoRA support Example HuggingFace Models Stable Diffusion + Supported
            • stabilityai/stable-diffusion-2-inpainting
            • @@ -275,13 +285,22 @@ In addition to image generation models, `InpaintingPipeline` supports specialize Stable Diffusion XL + Supported - + @@ -292,11 +311,13 @@ In addition to image generation models, `InpaintingPipeline` supports specialize Architecture Models + LoRA support Example HuggingFace Models InternVL2 InternVL2 + Not supported
              • OpenGVLab/InternVL2-1B
              • @@ -309,6 +330,7 @@ In addition to image generation models, `InpaintingPipeline` supports specialize LLaVA LLaVA-v1.5 + Not supported
                • llava-hf/llava-1.5-7b-hf
                • @@ -318,6 +340,7 @@ In addition to image generation models, `InpaintingPipeline` supports specialize LLaVA-NeXT LLaVa-v1.6 + Not supported
                  • llava-hf/llava-v1.6-mistral-7b-hf
                  • @@ -329,6 +352,7 @@ In addition to image generation models, `InpaintingPipeline` supports specialize MiniCPMV MiniCPM-V-2_6 + Not supported
                    • openbmb/MiniCPM-V-2_6
                    • @@ -345,11 +369,13 @@ In addition to image generation models, `InpaintingPipeline` supports specialize Architecture Models + LoRA support Example HuggingFace Models WhisperForConditionalGeneration Whisper + Not supported
                      • openai/whisper-tiny
                      • @@ -366,6 +392,7 @@ In addition to image generation models, `InpaintingPipeline` supports specialize Distil-Whisper + Not supported
                        • distil-whisper/distil-small.en
                        • diff --git a/samples/cpp/visual_language_chat/README.md b/samples/cpp/visual_language_chat/README.md index 39364d51ee..73baf0088a 100644 --- a/samples/cpp/visual_language_chat/README.md +++ b/samples/cpp/visual_language_chat/README.md @@ -29,7 +29,7 @@ Follow [Get Started with Samples](https://docs.openvino.ai/2024/learn-openvino/o Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model `llava-hf/llava-v1.6-mistral-7b-hf` can benefit from being run on a dGPU. Modify the source code to change the device for inference to the `GPU`. -See [SUPPORTED_MODELS.md](../../../src/docs/SUPPORTED_MODELS.md#visual-language-models) for the list of supported models. +See [SUPPORTED_MODELS.md](../../../SUPPORTED_MODELS.md#visual-language-models) for the list of supported models. ## Run benchmark: diff --git a/samples/cpp/whisper_speech_recognition/README.md b/samples/cpp/whisper_speech_recognition/README.md index d649266613..2ea3322dee 100644 --- a/samples/cpp/whisper_speech_recognition/README.md +++ b/samples/cpp/whisper_speech_recognition/README.md @@ -31,7 +31,7 @@ Output: timestamps: [0, 2] text: How are you doing today? ``` -See [SUPPORTED_MODELS.md](../../../src/docs/SUPPORTED_MODELS.md#whisper-models) for the list of supported models. +See [SUPPORTED_MODELS.md](../../../SUPPORTED_MODELS.md#whisper-models) for the list of supported models. # Whisper pipeline usage diff --git a/samples/python/whisper_speech_recognition/README.md b/samples/python/whisper_speech_recognition/README.md index aeb46444bf..5f373df2b7 100644 --- a/samples/python/whisper_speech_recognition/README.md +++ b/samples/python/whisper_speech_recognition/README.md @@ -38,7 +38,7 @@ Output: timestamps: [0, 2] text: How are you doing today? ``` -See [SUPPORTED_MODELS.md](../../../src/docs/SUPPORTED_MODELS.md#whisper-models) for the list of supported models. +See [SUPPORTED_MODELS.md](../../../SUPPORTED_MODELS.md#whisper-models) for the list of supported models. # Whisper pipeline usage