From f7f98f35df6f6f62a90030435f33f5d89c49bb94 Mon Sep 17 00:00:00 2001 From: Aleksandr Suslov Date: Wed, 16 Oct 2024 23:11:13 +0400 Subject: [PATCH 1/5] migrate to wwb from genai --- .../openvino/tiny_llama_find_hyperparams/requirements.txt | 7 +++---- .../openvino/tiny_llama_synthetic_data/README.md | 6 +----- .../openvino/tiny_llama_synthetic_data/main.py | 1 - .../openvino/tiny_llama_synthetic_data/requirements.txt | 4 +--- tests/post_training/requirements.txt | 4 ++-- 5 files changed, 7 insertions(+), 15 deletions(-) diff --git a/examples/llm_compression/openvino/tiny_llama_find_hyperparams/requirements.txt b/examples/llm_compression/openvino/tiny_llama_find_hyperparams/requirements.txt index e5e57118e2a..56ae6cccda2 100644 --- a/examples/llm_compression/openvino/tiny_llama_find_hyperparams/requirements.txt +++ b/examples/llm_compression/openvino/tiny_llama_find_hyperparams/requirements.txt @@ -1,8 +1,7 @@ datasets -whowhatbench @ git+https://github.com/andreyanufr/who_what_benchmark.git -numpy>=1.23.5 +whowhatbench @ git+https://github.com/openvinotoolkit/openvino.genai.git#subdirectory=tools/who_what_benchmark +numpy>=1.23.5,<2 openvino==2024.5 -optimum-intel[openvino]>=1.13.0 +optimum-intel[openvino]>=1.19.0 transformers>=4.35.2 onnx==1.17.0 -numpy<2 diff --git a/examples/llm_compression/openvino/tiny_llama_synthetic_data/README.md b/examples/llm_compression/openvino/tiny_llama_synthetic_data/README.md index da4556bfba3..0883f27b1bf 100644 --- a/examples/llm_compression/openvino/tiny_llama_synthetic_data/README.md +++ b/examples/llm_compression/openvino/tiny_llama_synthetic_data/README.md @@ -1,16 +1,12 @@ # Compress TinyLLama model using synthetic data -This example demonstrates how to optimize Large Language Models (LLMs) using NNCF weight compression API & synthetic data for the advanced algorithms usage. The example applies 4/8-bit mixed-precision quantization & Scale Estimation algorithm to weights of Linear (Fully-connected) layers of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) model. -To evaluate the accuracy of the compressed model we measure similarity between two texts generated by the baseline and compressed models using [WhoWhatBench](https://github.com/openvinotoolkit/openvino.genai/tree/master/llm_bench/python/who_what_benchmark) library. +This example demonstrates how to optimize Large Language Models (LLMs) using NNCF weight compression API & synthetic data for the advanced algorithms usage. The example applies 4/8-bit mixed-precision quantization & Scale Estimation algorithm to weights of Linear (Fully-connected) layers of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) model. This leads to a significant decrease in model footprint and performance improvement with OpenVINO. The example includes the following steps: -- Prepare `wikitext` dataset. - Prepare `TinyLlama/TinyLlama-1.1B-Chat-v1.0` text-generation model in OpenVINO representation using [Optimum-Intel](https://huggingface.co/docs/optimum/intel/inference). -- Compress weights of the model with NNCF Weight compression algorithm with Scale Estimation & `wikitext` dataset. - Prepare `synthetic` dataset using `nncf.data.generate_text_data` method. - Compress weights of the model with NNCF Weight compression algorithm with Scale Estimation & `synthetic` dataset. -- Measure the similarity of the two models optimized with different datasets. ## Install requirements diff --git a/examples/llm_compression/openvino/tiny_llama_synthetic_data/main.py b/examples/llm_compression/openvino/tiny_llama_synthetic_data/main.py index 86eaa04fa54..0feb47f70cc 100644 --- a/examples/llm_compression/openvino/tiny_llama_synthetic_data/main.py +++ b/examples/llm_compression/openvino/tiny_llama_synthetic_data/main.py @@ -77,7 +77,6 @@ def main(): scale_estimation=True, ) - # Verify the model output in comparison to floating-point one input_ids = tokenizer("What is Python? ", return_tensors="pt").to(device=hf_model.device) max_new_tokens = 100 diff --git a/examples/llm_compression/openvino/tiny_llama_synthetic_data/requirements.txt b/examples/llm_compression/openvino/tiny_llama_synthetic_data/requirements.txt index dfde1f7d619..a12f18358bb 100644 --- a/examples/llm_compression/openvino/tiny_llama_synthetic_data/requirements.txt +++ b/examples/llm_compression/openvino/tiny_llama_synthetic_data/requirements.txt @@ -1,8 +1,6 @@ torch==2.5.1 -datasets==3.0.1 -numpy>=1.23.5 +numpy>=1.23.5,<2 openvino==2024.5 optimum-intel[openvino]>=1.13.0 transformers>=4.35.2 onnx==1.17.0 -numpy<2 diff --git a/tests/post_training/requirements.txt b/tests/post_training/requirements.txt index 93103066aab..dbd664e22a3 100644 --- a/tests/post_training/requirements.txt +++ b/tests/post_training/requirements.txt @@ -9,7 +9,7 @@ pytest-forked librosa==0.10.0 memory-profiler==0.61.0 -optimum-intel==1.15.2 +#optimum-intel==1.15.2 optimum==1.17.1 scikit-learn>=1.2.2,<=1.5.0 soundfile==0.12.1 @@ -17,5 +17,5 @@ tensorboard==2.13.0 tensorflow-io==0.32.0 timm==0.9.2 transformers==4.38.2 -whowhatbench @ git+https://github.com/andreyanufr/who_what_benchmark@456d3584ce628f6c8605f37cd9a3ab2db1ebf933 +whowhatbench @ git+https://github.com/openvinotoolkit/openvino.genai.git@42096011f3fc822ccbefc8b70a73a599e154de6d#subdirectory=tools/who_what_benchmark datasets==2.21.0 From 87148abec0de555bf72a16b9219766844bfa9025 Mon Sep 17 00:00:00 2001 From: Alexander Suslov Date: Thu, 17 Oct 2024 07:48:31 +0400 Subject: [PATCH 2/5] Update requirements.txt --- tests/post_training/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/post_training/requirements.txt b/tests/post_training/requirements.txt index dbd664e22a3..e02d73c8cff 100644 --- a/tests/post_training/requirements.txt +++ b/tests/post_training/requirements.txt @@ -9,7 +9,7 @@ pytest-forked librosa==0.10.0 memory-profiler==0.61.0 -#optimum-intel==1.15.2 +optimum-intel==1.20.0 optimum==1.17.1 scikit-learn>=1.2.2,<=1.5.0 soundfile==0.12.1 From a98028d4d422ce9cd50d2d8754207d6dbf572038 Mon Sep 17 00:00:00 2001 From: Alexander Suslov Date: Fri, 18 Oct 2024 10:03:11 +0400 Subject: [PATCH 3/5] Update requirements.txt --- tests/post_training/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/post_training/requirements.txt b/tests/post_training/requirements.txt index e02d73c8cff..3442c610a2f 100644 --- a/tests/post_training/requirements.txt +++ b/tests/post_training/requirements.txt @@ -17,5 +17,5 @@ tensorboard==2.13.0 tensorflow-io==0.32.0 timm==0.9.2 transformers==4.38.2 -whowhatbench @ git+https://github.com/openvinotoolkit/openvino.genai.git@42096011f3fc822ccbefc8b70a73a599e154de6d#subdirectory=tools/who_what_benchmark +whowhatbench @ git+https://github.com/openvinotoolkit/openvino.genai.git@07b8f05e60e8ea3e0150307a4c91b3ed8f4c6188#subdirectory=tools/who_what_benchmark datasets==2.21.0 From 5654d0f4b5e1a67ddd14f81c5a04d3c12e04c1bf Mon Sep 17 00:00:00 2001 From: Alexander Suslov Date: Mon, 21 Oct 2024 15:26:28 +0400 Subject: [PATCH 4/5] Update requirements.txt --- tests/post_training/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/post_training/requirements.txt b/tests/post_training/requirements.txt index 3442c610a2f..c0d4a985f26 100644 --- a/tests/post_training/requirements.txt +++ b/tests/post_training/requirements.txt @@ -10,7 +10,7 @@ pytest-forked librosa==0.10.0 memory-profiler==0.61.0 optimum-intel==1.20.0 -optimum==1.17.1 +optimum==1.23.1 scikit-learn>=1.2.2,<=1.5.0 soundfile==0.12.1 tensorboard==2.13.0 From 94f1122f4fda8d4b8c643fc6ffe84b0d3123868b Mon Sep 17 00:00:00 2001 From: Aleksandr Suslov Date: Fri, 6 Dec 2024 10:54:40 +0400 Subject: [PATCH 5/5] update --- .../openvino/tiny_llama_find_hyperparams/requirements.txt | 2 +- .../openvino/tiny_llama_synthetic_data/requirements.txt | 2 +- tests/post_training/requirements.txt | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/llm_compression/openvino/tiny_llama_find_hyperparams/requirements.txt b/examples/llm_compression/openvino/tiny_llama_find_hyperparams/requirements.txt index 56ae6cccda2..9aef312ebdf 100644 --- a/examples/llm_compression/openvino/tiny_llama_find_hyperparams/requirements.txt +++ b/examples/llm_compression/openvino/tiny_llama_find_hyperparams/requirements.txt @@ -2,6 +2,6 @@ datasets whowhatbench @ git+https://github.com/openvinotoolkit/openvino.genai.git#subdirectory=tools/who_what_benchmark numpy>=1.23.5,<2 openvino==2024.5 -optimum-intel[openvino]>=1.19.0 +optimum-intel>=1.13.0 transformers>=4.35.2 onnx==1.17.0 diff --git a/examples/llm_compression/openvino/tiny_llama_synthetic_data/requirements.txt b/examples/llm_compression/openvino/tiny_llama_synthetic_data/requirements.txt index a12f18358bb..5c81440aba7 100644 --- a/examples/llm_compression/openvino/tiny_llama_synthetic_data/requirements.txt +++ b/examples/llm_compression/openvino/tiny_llama_synthetic_data/requirements.txt @@ -1,6 +1,6 @@ torch==2.5.1 numpy>=1.23.5,<2 openvino==2024.5 -optimum-intel[openvino]>=1.13.0 +optimum-intel>=1.13.0 transformers>=4.35.2 onnx==1.17.0 diff --git a/tests/post_training/requirements.txt b/tests/post_training/requirements.txt index c0d4a985f26..9ef3ab3b680 100644 --- a/tests/post_training/requirements.txt +++ b/tests/post_training/requirements.txt @@ -9,13 +9,13 @@ pytest-forked librosa==0.10.0 memory-profiler==0.61.0 -optimum-intel==1.20.0 -optimum==1.23.1 +optimum-intel==1.20.1 +optimum==1.23.3 scikit-learn>=1.2.2,<=1.5.0 soundfile==0.12.1 tensorboard==2.13.0 tensorflow-io==0.32.0 timm==0.9.2 -transformers==4.38.2 -whowhatbench @ git+https://github.com/openvinotoolkit/openvino.genai.git@07b8f05e60e8ea3e0150307a4c91b3ed8f4c6188#subdirectory=tools/who_what_benchmark +transformers==4.46.3 +whowhatbench @ git+https://github.com/openvinotoolkit/openvino.genai.git@7d8912ff9df9bcfacf0044d108963cb7618bff69#subdirectory=tools/who_what_benchmark datasets==2.21.0