Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tests for Whisper static pipeline #1250

Merged
merged 11 commits into from
Jan 20, 2025
1 change: 1 addition & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
- 'src/cpp/src/whisper_pipeline_static.hpp'
- 'src/python/py_whisper_pipeline.cpp'
- 'tests/python_tests/test_whisper_pipeline.py'
- 'tests/python_tests/test_whisper_pipeline_static.py'

'category: Python API':
- 'src/python/**/*'
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -268,9 +268,10 @@ jobs:
matrix:
test:
- name: 'Whisper'
cmd: 'tests/python_tests/test_whisper_pipeline.py'
cmd: 'tests/python_tests/test_whisper_pipeline.py tests/python_tests/test_whisper_pipeline_static.py'
- name: 'LLM & VLM'
cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py'
cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_whisper_pipeline_static.py'

defaults:
run:
shell: bash
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/mac.yml
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ jobs:
python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels
python -c "from openvino_genai import LLMPipeline"
python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_whisper_pipeline_static.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"

genai_python_lib_whisper:
name: OpenVINO genai extension whisper tests (cmake + wheel)
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ jobs:
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_whisper_pipeline_static.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"

genai_python_lib_whisper:
name: OpenVINO genai extension whisper tests (cmake + wheel)
Expand Down Expand Up @@ -315,7 +315,7 @@ jobs:
# this check enabled for windows only. Ticket: 160205.
python -m pip install transformers==4.46.3

python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k "not test_smoke"
python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py ./tests/python_tests/test_whisper_pipeline_static.py -k "not test_smoke"

genai_python_lib_vlm:
name: OpenVINO genai VLM tests (cmake + wheel)
Expand Down
8 changes: 4 additions & 4 deletions src/cpp/src/whisper_pipeline_static.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ ov::InferRequest DecoderCache::get_model(uint8_t input_ids_size) {
reshape_input_ids(m_decoder_model, input_ids_size);

ov::Core core = utils::singleton_core();
ov::CompiledModel compiled_model = core.compile_model(m_decoder_model, "NPU");
ov::CompiledModel compiled_model = core.compile_model(m_decoder_model, "NPU", m_properties);
ov::genai::utils::print_compiled_model_properties(compiled_model, "Static Whisper decoder model");
m_cache.emplace(input_ids_size, compiled_model.create_infer_request());
}
Expand Down Expand Up @@ -544,14 +544,14 @@ WhisperPipeline::StaticWhisperPipeline::StaticWhisperPipeline(const std::filesys
preprocess_decoder(decoder_with_past_model);

ov::CompiledModel compiled_model;
compiled_model = core.compile_model(encoder_model, "NPU");
compiled_model = core.compile_model(encoder_model, "NPU", properties);
ov::genai::utils::print_compiled_model_properties(compiled_model, "Static Whisper encoder model");
m_models.encoder = compiled_model.create_infer_request();

// Will compile decoder model when it's needed
m_decoder_cache = DecoderCache(decoder_model);
m_decoder_cache = DecoderCache(decoder_model, properties);

compiled_model = core.compile_model(decoder_with_past_model, "NPU");
compiled_model = core.compile_model(decoder_with_past_model, "NPU", properties);
ov::genai::utils::print_compiled_model_properties(compiled_model, "Static Whisper decoder with past model");
m_models.decoder_with_past = compiled_model.create_infer_request();

Expand Down
5 changes: 4 additions & 1 deletion src/cpp/src/whisper_pipeline_static.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,15 @@ namespace genai {
class DecoderCache {
public:
DecoderCache() = default;
DecoderCache(std::shared_ptr<ov::Model> model) : m_decoder_model(model) {}
DecoderCache(std::shared_ptr<ov::Model> model, ov::AnyMap properties)
: m_decoder_model(model)
, m_properties(properties) {}

ov::InferRequest get_model(uint8_t input_ids_size);
private:
std::unordered_map<uint8_t, ov::InferRequest> m_cache;
std::shared_ptr<ov::Model> m_decoder_model;
ov::AnyMap m_properties;
};

class WhisperPipeline::StaticWhisperPipeline : public WhisperPipeline::WhisperPipelineImplBase {
Expand Down
150 changes: 150 additions & 0 deletions tests/python_tests/test_whisper_pipeline_static.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

from test_whisper_pipeline import get_whisper_models_list, get_samples_from_dataset
from transformers import WhisperProcessor, AutoTokenizer
from optimum.intel.openvino import OVModelForSpeechSeq2Seq
import openvino_genai as ov_genai
import openvino_tokenizers
import openvino
import pytest
import pathlib

# This test suite is designed specifically to validate the functionality
# and robustness of the WhisperStaticPipeline on NPUW:CPU.
config = {"NPU_USE_NPUW" : "YES",
TolyaTalamanov marked this conversation as resolved.
Show resolved Hide resolved
"NPUW_DEVICES" : "CPU",
"NPUW_ONLINE_PIPELINE" : "NONE"}

def load_and_save_whisper_model(params, stateful=False, **tokenizer_kwargs):
model_id, path = params

processor = WhisperProcessor.from_pretrained(model_id, trust_remote_code=True)
if not stateful:
path = pathlib.Path(f"{path}_with_past")

if not (path / "openvino_encoder_model.xml").exists():
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(
tokenizer,
with_detokenizer=True,
clean_up_tokenization_spaces=False,
**tokenizer_kwargs,
)

openvino.save_model(ov_tokenizer, path / "openvino_tokenizer.xml")
openvino.save_model(ov_detokenizer, path / "openvino_detokenizer.xml")

# to store tokenizer config jsons with special tokens
tokenizer.save_pretrained(path)

opt_model = OVModelForSpeechSeq2Seq.from_pretrained(
model_id,
export=True,
trust_remote_code=True,
stateful=stateful,
compile=False,
device="CPU",
load_in_8bit=False,
)
opt_model.generation_config.save_pretrained(path)
opt_model.config.save_pretrained(path)
opt_model.save_pretrained(path)
processor.save_pretrained(path)

return model_id, path

def get_results_cpu_npu(model_path, audio_sample, **config_kwargs):
cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
expected = cpu_pipe.generate(audio_sample, **config_kwargs)

npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config)
actual_out = npu_pipe.generate(audio_sample, **config_kwargs)

return expected, actual_out

def compare_results_with_assert(expected, actual_out):
assert len(expected.texts) == len(actual_out.texts)

for i in range(0, len(expected.texts)):
if expected.texts[i] != actual_out.texts[i]:
print(f'expected: {expected.texts[i]}\n')
print(f'actual_out: {actual_out.texts[i]}')
assert expected.texts[i] == actual_out.texts[i]


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize("test_sample", get_samples_from_dataset(language="en", length=1))
@pytest.mark.precommit
def test_static_whisper_generation_compare_with_cpu(model_descr, test_sample):
model_id, model_path = load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample)

compare_results_with_assert(expected, actual_out)


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize("test_sample",
[
*get_samples_from_dataset(language="fr", length=2),
*get_samples_from_dataset(language="de", length=2),
*get_samples_from_dataset(language="es", length=2),
],)
@pytest.mark.precommit
def test_static_whisper_autodetect(model_descr, test_sample):
model_id, model_path = load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample)

compare_results_with_assert(expected, actual_out)


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize(
"test_sample", get_samples_from_dataset(language="de", length=3)
)
@pytest.mark.precommit
def test_static_whisper_language_de(model_descr, test_sample):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does it actually check? How it's different from test_static_whisper_autodetect?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we explicitly set language in config, in test_static_whisper_autodetect at first additional infer request will be called to detect language of the audio.

model_id, model_path = load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|de|>")

compare_results_with_assert(expected, actual_out)


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize(
"test_sample", get_samples_from_dataset(language="fr", length=3)
)
@pytest.mark.precommit
def test_static_whisper_language_fr(model_descr, test_sample):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question, how it's different from test_static_whisper_autodetect

model_id, model_path = load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|fr|>")

compare_results_with_assert(expected, actual_out)


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize(
"test_sample", get_samples_from_dataset(language="ru", length=3)
)
@pytest.mark.precommit
def test_static_whisper_language_ru(model_descr, test_sample):
model_id, model_path = load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|ru|>")

compare_results_with_assert(expected, actual_out)


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize("test_sample", get_samples_from_dataset(language="en", length=1, long_form=True))
@pytest.mark.precommit
def test_static_whisper_generation_long(model_descr, test_sample):
model_id, model_path = load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample)

compare_results_with_assert(expected, actual_out)
Loading