diff --git a/.github/labeler.yml b/.github/labeler.yml index f618bdb7fc..2bfe4248c1 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -64,6 +64,7 @@ - 'src/cpp/src/whisper_pipeline_static.hpp' - 'src/python/py_whisper_pipeline.cpp' - 'tests/python_tests/test_whisper_pipeline.py' +- 'tests/python_tests/test_whisper_pipeline_static.py' 'category: Python API': - 'src/python/**/*' diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 5fc5568853..6bfca03c55 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -268,9 +268,10 @@ jobs: matrix: test: - name: 'Whisper' - cmd: 'tests/python_tests/test_whisper_pipeline.py' + cmd: 'tests/python_tests/test_whisper_pipeline.py tests/python_tests/test_whisper_pipeline_static.py' - name: 'LLM & VLM' - cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py' + cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_whisper_pipeline_static.py' + defaults: run: shell: bash diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index f377d3e6a5..57776be64b 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -235,7 +235,7 @@ jobs: python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels python -c "from openvino_genai import LLMPipeline" python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels - python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template" + python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_whisper_pipeline_static.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template" genai_python_lib_whisper: name: OpenVINO genai extension whisper tests (cmake + wheel) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index f3ff07b641..ab5515e857 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -245,7 +245,7 @@ jobs: . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1" python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels - python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template" + python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_whisper_pipeline_static.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template" genai_python_lib_whisper: name: OpenVINO genai extension whisper tests (cmake + wheel) @@ -315,7 +315,7 @@ jobs: # this check enabled for windows only. Ticket: 160205. python -m pip install transformers==4.46.3 - python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k "not test_smoke" + python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py ./tests/python_tests/test_whisper_pipeline_static.py -k "not test_smoke" genai_python_lib_vlm: name: OpenVINO genai VLM tests (cmake + wheel) diff --git a/src/cpp/src/whisper_pipeline_static.cpp b/src/cpp/src/whisper_pipeline_static.cpp index 551774ec59..e49a25e2d2 100644 --- a/src/cpp/src/whisper_pipeline_static.cpp +++ b/src/cpp/src/whisper_pipeline_static.cpp @@ -509,7 +509,7 @@ ov::InferRequest DecoderCache::get_model(uint8_t input_ids_size) { reshape_input_ids(m_decoder_model, input_ids_size); ov::Core core = utils::singleton_core(); - ov::CompiledModel compiled_model = core.compile_model(m_decoder_model, "NPU"); + ov::CompiledModel compiled_model = core.compile_model(m_decoder_model, "NPU", m_properties); ov::genai::utils::print_compiled_model_properties(compiled_model, "Static Whisper decoder model"); m_cache.emplace(input_ids_size, compiled_model.create_infer_request()); } @@ -544,14 +544,14 @@ WhisperPipeline::StaticWhisperPipeline::StaticWhisperPipeline(const std::filesys preprocess_decoder(decoder_with_past_model); ov::CompiledModel compiled_model; - compiled_model = core.compile_model(encoder_model, "NPU"); + compiled_model = core.compile_model(encoder_model, "NPU", properties); ov::genai::utils::print_compiled_model_properties(compiled_model, "Static Whisper encoder model"); m_models.encoder = compiled_model.create_infer_request(); // Will compile decoder model when it's needed - m_decoder_cache = DecoderCache(decoder_model); + m_decoder_cache = DecoderCache(decoder_model, properties); - compiled_model = core.compile_model(decoder_with_past_model, "NPU"); + compiled_model = core.compile_model(decoder_with_past_model, "NPU", properties); ov::genai::utils::print_compiled_model_properties(compiled_model, "Static Whisper decoder with past model"); m_models.decoder_with_past = compiled_model.create_infer_request(); diff --git a/src/cpp/src/whisper_pipeline_static.hpp b/src/cpp/src/whisper_pipeline_static.hpp index b0618452d4..48425356b2 100644 --- a/src/cpp/src/whisper_pipeline_static.hpp +++ b/src/cpp/src/whisper_pipeline_static.hpp @@ -18,12 +18,15 @@ namespace genai { class DecoderCache { public: DecoderCache() = default; - DecoderCache(std::shared_ptr model) : m_decoder_model(model) {} + DecoderCache(std::shared_ptr model, ov::AnyMap properties) + : m_decoder_model(model) + , m_properties(properties) {} ov::InferRequest get_model(uint8_t input_ids_size); private: std::unordered_map m_cache; std::shared_ptr m_decoder_model; + ov::AnyMap m_properties; }; class WhisperPipeline::StaticWhisperPipeline : public WhisperPipeline::WhisperPipelineImplBase { diff --git a/tests/python_tests/test_whisper_pipeline_static.py b/tests/python_tests/test_whisper_pipeline_static.py new file mode 100644 index 0000000000..75bd5e1d86 --- /dev/null +++ b/tests/python_tests/test_whisper_pipeline_static.py @@ -0,0 +1,150 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from test_whisper_pipeline import get_whisper_models_list, get_samples_from_dataset +from transformers import WhisperProcessor, AutoTokenizer +from optimum.intel.openvino import OVModelForSpeechSeq2Seq +import openvino_genai as ov_genai +import openvino_tokenizers +import openvino +import pytest +import pathlib + +# This test suite is designed specifically to validate the functionality +# and robustness of the WhisperStaticPipeline on NPUW:CPU. +config = {"NPU_USE_NPUW" : "YES", + "NPUW_DEVICES" : "CPU", + "NPUW_ONLINE_PIPELINE" : "NONE"} + +def load_and_save_whisper_model(params, stateful=False, **tokenizer_kwargs): + model_id, path = params + + processor = WhisperProcessor.from_pretrained(model_id, trust_remote_code=True) + if not stateful: + path = pathlib.Path(f"{path}_with_past") + + if not (path / "openvino_encoder_model.xml").exists(): + tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) + ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer( + tokenizer, + with_detokenizer=True, + clean_up_tokenization_spaces=False, + **tokenizer_kwargs, + ) + + openvino.save_model(ov_tokenizer, path / "openvino_tokenizer.xml") + openvino.save_model(ov_detokenizer, path / "openvino_detokenizer.xml") + + # to store tokenizer config jsons with special tokens + tokenizer.save_pretrained(path) + + opt_model = OVModelForSpeechSeq2Seq.from_pretrained( + model_id, + export=True, + trust_remote_code=True, + stateful=stateful, + compile=False, + device="CPU", + load_in_8bit=False, + ) + opt_model.generation_config.save_pretrained(path) + opt_model.config.save_pretrained(path) + opt_model.save_pretrained(path) + processor.save_pretrained(path) + + return model_id, path + +def get_results_cpu_npu(model_path, audio_sample, **config_kwargs): + cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") + expected = cpu_pipe.generate(audio_sample, **config_kwargs) + + npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config) + actual_out = npu_pipe.generate(audio_sample, **config_kwargs) + + return expected, actual_out + +def compare_results_with_assert(expected, actual_out): + assert len(expected.texts) == len(actual_out.texts) + + for i in range(0, len(expected.texts)): + if expected.texts[i] != actual_out.texts[i]: + print(f'expected: {expected.texts[i]}\n') + print(f'actual_out: {actual_out.texts[i]}') + assert expected.texts[i] == actual_out.texts[i] + + +@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) +@pytest.mark.parametrize("test_sample", get_samples_from_dataset(language="en", length=1)) +@pytest.mark.precommit +def test_static_whisper_generation_compare_with_cpu(model_descr, test_sample): + model_id, model_path = load_and_save_whisper_model(model_descr) + + expected, actual_out = get_results_cpu_npu(model_path, test_sample) + + compare_results_with_assert(expected, actual_out) + + +@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) +@pytest.mark.parametrize("test_sample", + [ + *get_samples_from_dataset(language="fr", length=2), + *get_samples_from_dataset(language="de", length=2), + *get_samples_from_dataset(language="es", length=2), + ],) +@pytest.mark.precommit +def test_static_whisper_autodetect(model_descr, test_sample): + model_id, model_path = load_and_save_whisper_model(model_descr) + + expected, actual_out = get_results_cpu_npu(model_path, test_sample) + + compare_results_with_assert(expected, actual_out) + + +@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) +@pytest.mark.parametrize( + "test_sample", get_samples_from_dataset(language="de", length=3) +) +@pytest.mark.precommit +def test_static_whisper_language_de(model_descr, test_sample): + model_id, model_path = load_and_save_whisper_model(model_descr) + + expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|de|>") + + compare_results_with_assert(expected, actual_out) + + +@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) +@pytest.mark.parametrize( + "test_sample", get_samples_from_dataset(language="fr", length=3) +) +@pytest.mark.precommit +def test_static_whisper_language_fr(model_descr, test_sample): + model_id, model_path = load_and_save_whisper_model(model_descr) + + expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|fr|>") + + compare_results_with_assert(expected, actual_out) + + +@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) +@pytest.mark.parametrize( + "test_sample", get_samples_from_dataset(language="ru", length=3) +) +@pytest.mark.precommit +def test_static_whisper_language_ru(model_descr, test_sample): + model_id, model_path = load_and_save_whisper_model(model_descr) + + expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|ru|>") + + compare_results_with_assert(expected, actual_out) + + +@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) +@pytest.mark.parametrize("test_sample", get_samples_from_dataset(language="en", length=1, long_form=True)) +@pytest.mark.precommit +def test_static_whisper_generation_long(model_descr, test_sample): + model_id, model_path = load_and_save_whisper_model(model_descr) + + expected, actual_out = get_results_cpu_npu(model_path, test_sample) + + compare_results_with_assert(expected, actual_out)