openvinotoolkit · TolyaTalamanov · Jan 20, 2025 · Nov 22, 2024 · Dec 2, 2024 · Dec 3, 2024
diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -64,6 +64,7 @@
 - 'src/cpp/src/whisper_pipeline_static.hpp'
 - 'src/python/py_whisper_pipeline.cpp'
 - 'tests/python_tests/test_whisper_pipeline.py'
+- 'tests/python_tests/test_whisper_pipeline_static.py'
 
 'category: Python API':
 - 'src/python/**/*'

diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
@@ -268,9 +268,10 @@ jobs:
       matrix:
         test:
           - name: 'Whisper'
-            cmd: 'tests/python_tests/test_whisper_pipeline.py'
+            cmd: 'tests/python_tests/test_whisper_pipeline.py tests/python_tests/test_whisper_pipeline_static.py'
           - name: 'LLM & VLM'
-            cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py'
+            cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_whisper_pipeline_static.py'
+
     defaults:
       run:
         shell: bash

diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
@@ -235,7 +235,7 @@ jobs:
           python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels
           python -c "from openvino_genai import LLMPipeline"
           python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels
-          python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
+          python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_whisper_pipeline_static.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
 
   genai_python_lib_whisper:
     name: OpenVINO genai extension whisper tests (cmake + wheel)

diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
@@ -245,7 +245,7 @@ jobs:
           . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
           python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels
           python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels
-          python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
+          python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_whisper_pipeline_static.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
 
   genai_python_lib_whisper:
     name: OpenVINO genai extension whisper tests (cmake + wheel)
@@ -315,7 +315,7 @@ jobs:
           # this check enabled for windows only. Ticket: 160205.
           python -m pip install transformers==4.46.3
 
-          python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k "not test_smoke"
+          python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py ./tests/python_tests/test_whisper_pipeline_static.py -k "not test_smoke"
 
   genai_python_lib_vlm:
     name: OpenVINO genai VLM tests (cmake + wheel)

diff --git a/src/cpp/src/whisper_pipeline_static.cpp b/src/cpp/src/whisper_pipeline_static.cpp
@@ -509,7 +509,7 @@ ov::InferRequest DecoderCache::get_model(uint8_t input_ids_size) {
         reshape_input_ids(m_decoder_model, input_ids_size);
 
         ov::Core core = utils::singleton_core();
-        ov::CompiledModel compiled_model = core.compile_model(m_decoder_model, "NPU");
+        ov::CompiledModel compiled_model = core.compile_model(m_decoder_model, "NPU", m_properties);
         ov::genai::utils::print_compiled_model_properties(compiled_model, "Static Whisper decoder model");
         m_cache.emplace(input_ids_size, compiled_model.create_infer_request());
     }
@@ -544,14 +544,14 @@ WhisperPipeline::StaticWhisperPipeline::StaticWhisperPipeline(const std::filesys
     preprocess_decoder(decoder_with_past_model);
 
     ov::CompiledModel compiled_model;
-    compiled_model = core.compile_model(encoder_model, "NPU");
+    compiled_model = core.compile_model(encoder_model, "NPU", properties);
     ov::genai::utils::print_compiled_model_properties(compiled_model, "Static Whisper encoder model");
     m_models.encoder = compiled_model.create_infer_request();
 
     // Will compile decoder model when it's needed 
-    m_decoder_cache = DecoderCache(decoder_model);
+    m_decoder_cache = DecoderCache(decoder_model, properties);
 
-    compiled_model = core.compile_model(decoder_with_past_model, "NPU");
+    compiled_model = core.compile_model(decoder_with_past_model, "NPU", properties);
     ov::genai::utils::print_compiled_model_properties(compiled_model, "Static Whisper decoder with past model");
     m_models.decoder_with_past = compiled_model.create_infer_request();
 

diff --git a/src/cpp/src/whisper_pipeline_static.hpp b/src/cpp/src/whisper_pipeline_static.hpp
@@ -18,12 +18,15 @@ namespace genai {
 class DecoderCache {
 public:
     DecoderCache() = default;
-    DecoderCache(std::shared_ptr<ov::Model> model) : m_decoder_model(model) {}
+    DecoderCache(std::shared_ptr<ov::Model> model, ov::AnyMap properties)
+     : m_decoder_model(model)
+     , m_properties(properties) {}
 
     ov::InferRequest get_model(uint8_t input_ids_size);
 private:
     std::unordered_map<uint8_t, ov::InferRequest> m_cache;
     std::shared_ptr<ov::Model> m_decoder_model;
+    ov::AnyMap m_properties;
 };
 
 class WhisperPipeline::StaticWhisperPipeline : public WhisperPipeline::WhisperPipelineImplBase {

diff --git a/tests/python_tests/test_whisper_pipeline_static.py b/tests/python_tests/test_whisper_pipeline_static.py
@@ -0,0 +1,150 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from test_whisper_pipeline import get_whisper_models_list, get_samples_from_dataset
+from transformers import WhisperProcessor, AutoTokenizer
+from optimum.intel.openvino import OVModelForSpeechSeq2Seq
+import openvino_genai as ov_genai
+import openvino_tokenizers
+import openvino
+import pytest
+import pathlib
+
+# This test suite is designed specifically to validate the functionality 
+# and robustness of the WhisperStaticPipeline on NPUW:CPU.
+config = {"NPU_USE_NPUW" : "YES",
+          "NPUW_DEVICES" : "CPU",
+          "NPUW_ONLINE_PIPELINE" : "NONE"}
+
+def load_and_save_whisper_model(params, stateful=False, **tokenizer_kwargs):
+    model_id, path = params
+
+    processor = WhisperProcessor.from_pretrained(model_id, trust_remote_code=True)
+    if not stateful:
+        path = pathlib.Path(f"{path}_with_past")
+
+    if not (path / "openvino_encoder_model.xml").exists():
+        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+        ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(
+            tokenizer,
+            with_detokenizer=True,
+            clean_up_tokenization_spaces=False,
+            **tokenizer_kwargs,
+        )
+
+        openvino.save_model(ov_tokenizer, path / "openvino_tokenizer.xml")
+        openvino.save_model(ov_detokenizer, path / "openvino_detokenizer.xml")
+
+        # to store tokenizer config jsons with special tokens
+        tokenizer.save_pretrained(path)
+
+        opt_model = OVModelForSpeechSeq2Seq.from_pretrained(
+            model_id,
+            export=True,
+            trust_remote_code=True,
+            stateful=stateful,
+            compile=False,
+            device="CPU",
+            load_in_8bit=False,
+        )
+        opt_model.generation_config.save_pretrained(path)
+        opt_model.config.save_pretrained(path)
+        opt_model.save_pretrained(path)
+        processor.save_pretrained(path)
+
+    return model_id, path
+
+def get_results_cpu_npu(model_path, audio_sample, **config_kwargs):
+    cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
+    expected = cpu_pipe.generate(audio_sample, **config_kwargs)
+
+    npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config)
+    actual_out = npu_pipe.generate(audio_sample, **config_kwargs)
+
+    return expected, actual_out
+
+def compare_results_with_assert(expected, actual_out):
+    assert len(expected.texts) == len(actual_out.texts)
+
+    for i in range(0, len(expected.texts)):
+        if expected.texts[i] != actual_out.texts[i]:
+            print(f'expected: {expected.texts[i]}\n')
+            print(f'actual_out: {actual_out.texts[i]}')
+        assert expected.texts[i] == actual_out.texts[i]
+
+
+@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
+@pytest.mark.parametrize("test_sample", get_samples_from_dataset(language="en", length=1))
+@pytest.mark.precommit
+def test_static_whisper_generation_compare_with_cpu(model_descr, test_sample):
+    model_id, model_path = load_and_save_whisper_model(model_descr)
+
+    expected, actual_out = get_results_cpu_npu(model_path, test_sample)
+
+    compare_results_with_assert(expected, actual_out)
+
+
+@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
+@pytest.mark.parametrize("test_sample",
+    [
+        *get_samples_from_dataset(language="fr", length=2),
+        *get_samples_from_dataset(language="de", length=2),
+        *get_samples_from_dataset(language="es", length=2),
+    ],)
+@pytest.mark.precommit
+def test_static_whisper_autodetect(model_descr, test_sample):
+    model_id, model_path = load_and_save_whisper_model(model_descr)
+
+    expected, actual_out = get_results_cpu_npu(model_path, test_sample)
+
+    compare_results_with_assert(expected, actual_out)
+
+
+@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
+@pytest.mark.parametrize(
+    "test_sample", get_samples_from_dataset(language="de", length=3)
+)
+@pytest.mark.precommit
+def test_static_whisper_language_de(model_descr, test_sample):
+    model_id, model_path = load_and_save_whisper_model(model_descr)
+
+    expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|de|>")
+
+    compare_results_with_assert(expected, actual_out)
+
+
+@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
+@pytest.mark.parametrize(
+    "test_sample", get_samples_from_dataset(language="fr", length=3)
+)
+@pytest.mark.precommit
+def test_static_whisper_language_fr(model_descr, test_sample):
+    model_id, model_path = load_and_save_whisper_model(model_descr)
+
+    expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|fr|>")
+
+    compare_results_with_assert(expected, actual_out)
+
+
+@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
+@pytest.mark.parametrize(
+    "test_sample", get_samples_from_dataset(language="ru", length=3)
+)
+@pytest.mark.precommit
+def test_static_whisper_language_ru(model_descr, test_sample):
+    model_id, model_path = load_and_save_whisper_model(model_descr)
+
+    expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|ru|>")
+
+    compare_results_with_assert(expected, actual_out)
+
+
+@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
+@pytest.mark.parametrize("test_sample", get_samples_from_dataset(language="en", length=1, long_form=True))
+@pytest.mark.precommit
+def test_static_whisper_generation_long(model_descr, test_sample):
+    model_id, model_path = load_and_save_whisper_model(model_descr)
+
+    expected, actual_out = get_results_cpu_npu(model_path, test_sample)
+
+    compare_results_with_assert(expected, actual_out)