Skip to content

Commit

Permalink
[TESTS] Use FP32 inference precision, FP16 KV cache precision for all…
Browse files Browse the repository at this point in the history
… pipelines
  • Loading branch information
ilya-lavrenov committed Jan 6, 2025
1 parent b04b28b commit f86a642
Show file tree
Hide file tree
Showing 7 changed files with 19 additions and 20 deletions.
2 changes: 1 addition & 1 deletion src/python/openvino_genai/py_openvino_genai.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ class ContinuousBatchingPipeline:
def __init__(self, models_path: os.PathLike, scheduler_config: SchedulerConfig, device: str, properties: dict[str, typing.Any] = {}, tokenizer_properties: dict[str, typing.Any] = {}) -> None:
...
@typing.overload
def __init__(self, models_path: os.PathLike, tokenizer: Tokenizer, scheduler_config: SchedulerConfig, device: str, properties: dict[str, typing.Any] = {}) -> None:
def __init__(self, models_path: os.PathLike, tokenizer: Tokenizer, scheduler_config: SchedulerConfig, device: str, **kwargs) -> None:
...
@typing.overload
def add_request(self, request_id: int, input_ids: openvino._pyopenvino.Tensor, generation_config: GenerationConfig) -> GenerationHandle:
Expand Down
7 changes: 3 additions & 4 deletions src/python/py_continuous_batching_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,15 +223,14 @@ void init_continuous_batching_pipeline(py::module_& m) {
py::arg("properties") = ov::AnyMap({}),
py::arg("tokenizer_properties") = ov::AnyMap({}))

.def(py::init([](const std::filesystem::path& models_path, const ov::genai::Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, const std::map<std::string, py::object>& plugin_config) {
.def(py::init([](const std::filesystem::path& models_path, const ov::genai::Tokenizer& tokenizer, const SchedulerConfig& scheduler_config, const std::string& device, const py::kwargs& kwargs) {
ScopedVar env_manager(pyutils::ov_tokenizers_module_path());
return std::make_unique<ContinuousBatchingPipeline>(models_path, tokenizer, scheduler_config, device, pyutils::properties_to_any_map(plugin_config));
return std::make_unique<ContinuousBatchingPipeline>(models_path, tokenizer, scheduler_config, device, pyutils::kwargs_to_any_map(kwargs));
}),
py::arg("models_path"),
py::arg("tokenizer"),
py::arg("scheduler_config"),
py::arg("device"),
py::arg("properties") = ov::AnyMap({}))
py::arg("device"))

.def("get_tokenizer", &ContinuousBatchingPipeline::get_tokenizer)
.def("get_config", &ContinuousBatchingPipeline::get_config)
Expand Down
2 changes: 1 addition & 1 deletion tests/python_tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def run_continuous_batching(
if type(generation_configs) is not list:
generation_configs = [generation_configs] * len(prompts)

cb_pipe = ContinuousBatchingPipeline(models_path, scheduler_config=scheduler_config, device='CPU')
cb_pipe = ContinuousBatchingPipeline(models_path, scheduler_config=scheduler_config, device='CPU', tokenizer_properties={}, properties=get_default_properties())
output = cb_pipe.generate(prompts, generation_configs)

del cb_pipe
Expand Down
12 changes: 6 additions & 6 deletions tests/python_tests/ov_genai_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import json

import openvino_genai as ov_genai

from common import get_default_properties

def get_models_list():
precommit_models = [
Expand Down Expand Up @@ -92,7 +92,7 @@ def read_model(params, **tokenizer_kwargs):

if (models_path / "openvino_model.xml").exists():
opt_model = OVModelForCausalLM.from_pretrained(models_path, trust_remote_code=True,
compile=False, device='CPU')
compile=False, device='CPU', ov_config=get_default_properties())
else:
ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(hf_tokenizer,
with_detokenizer=True,
Expand All @@ -104,7 +104,7 @@ def read_model(params, **tokenizer_kwargs):
hf_tokenizer.save_pretrained(models_path)

opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True,
compile=False, device='CPU', load_in_8bit=False)
compile=False, device='CPU', load_in_8bit=False, ov_config=get_default_properties())
opt_model.generation_config.save_pretrained(models_path)
opt_model.config.save_pretrained(models_path)
opt_model.save_pretrained(models_path)
Expand All @@ -114,7 +114,7 @@ def read_model(params, **tokenizer_kwargs):
models_path,
hf_tokenizer,
opt_model,
ov_genai.LLMPipeline(models_path, 'CPU', ENABLE_MMAP=False),
ov_genai.LLMPipeline(models_path, 'CPU', ENABLE_MMAP=False, **get_default_properties()),
)


Expand Down Expand Up @@ -178,7 +178,7 @@ def load_genai_pipe_with_configs(configs: List[Tuple], temp_path):
with (temp_path / config_name).open('w') as f:
json.dump(config_json, f)

ov_pipe = ov_genai.LLMPipeline(temp_path, 'CPU')
ov_pipe = ov_genai.LLMPipeline(temp_path, 'CPU', **get_default_properties())

for _, config_name in configs:
os.remove(temp_path / config_name)
Expand All @@ -188,4 +188,4 @@ def load_genai_pipe_with_configs(configs: List[Tuple], temp_path):

@functools.lru_cache(1)
def get_continuous_batching(path):
return ov_genai.LLMPipeline(path, 'CPU', scheduler_config=ov_genai.SchedulerConfig())
return ov_genai.LLMPipeline(path, 'CPU', scheduler_config=ov_genai.SchedulerConfig(), **get_default_properties())
4 changes: 2 additions & 2 deletions tests/python_tests/test_continuous_batching.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pathlib import Path
from openvino_genai import ContinuousBatchingPipeline, GenerationConfig, Tokenizer

from common import get_hugging_face_models, convert_models, generate_and_compare_with_reference_text, \
from common import get_default_properties, get_hugging_face_models, convert_models, generate_and_compare_with_reference_text, \
get_scheduler_config, get_greedy, run_cb_pipeline_with_ref, get_beam_search, get_greedy, \
get_multinomial_all_parameters, get_multinomial_temperature_and_num_return_sequence, \
get_multinomial_temperature_and_top_k, get_multinomial_temperature, get_multinomial_temperature_and_top_p
Expand Down Expand Up @@ -155,7 +155,7 @@ def test_post_oom_health(tmp_path, sampling_config):
models_path : Path = tmp_path / model_id
convert_models(opt_model, hf_tokenizer, models_path)

cb_pipe = ContinuousBatchingPipeline(models_path, Tokenizer(models_path), scheduler_config, "CPU")
cb_pipe = ContinuousBatchingPipeline(models_path, Tokenizer(models_path), scheduler_config, "CPU", **get_default_properties())

# First run should return incomplete response
output = cb_pipe.generate(["What is OpenVINO?"], [generation_config])
Expand Down
8 changes: 4 additions & 4 deletions tests/python_tests/test_kv_cache_eviction.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from openvino import serialize
from transformers import AutoTokenizer

from common import TESTS_ROOT, run_cb_pipeline_with_ref
from common import TESTS_ROOT, run_cb_pipeline_with_ref, get_default_properties


def load_prompts_dataset(file_name : str) -> Dict[str, List[str]]:
Expand All @@ -42,7 +42,7 @@ class ConvertedModel:
@pytest.fixture(scope='module')
def converted_model(tmp_path_factory):
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True, load_in_8bit=False, compile=False)
model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True, load_in_8bit=False, compile=False, ov_config=get_default_properties())
tokenizer = AutoTokenizer.from_pretrained(model_id)
models_path = tmp_path_factory.mktemp("cacheopt_test_models") / model_id
model.save_pretrained(models_path)
Expand Down Expand Up @@ -112,8 +112,8 @@ def test_cache_optimized_generation_is_similar_to_unoptimized(converted_model, t
scheduler_config_opt.enable_prefix_caching = enable_prefix_caching

models_path = converted_model.models_path
model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, "CPU")
model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, "CPU")
model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, "CPU", {}, get_default_properties())
model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, "CPU", {}, get_default_properties())

tokenizer = converted_model.tokenizer

Expand Down
4 changes: 2 additions & 2 deletions tests/python_tests/test_vlm_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import transformers
from optimum.intel.openvino import OVModelForVisualCausalLM
from openvino_genai import VLMPipeline, GenerationConfig
from common import get_image_by_link, get_beam_search, get_multinomial_all_parameters
from common import get_image_by_link, get_beam_search, get_multinomial_all_parameters, get_default_properties

def get_ov_model(cache):
model_dir = cache.mkdir("tiny-random-minicpmv-2_6")
Expand All @@ -19,7 +19,7 @@ def get_ov_model(cache):
ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(processor.tokenizer, with_detokenizer=True)
openvino.save_model(ov_tokenizer, model_dir / "openvino_tokenizer.xml")
openvino.save_model(ov_detokenizer, model_dir / "openvino_detokenizer.xml")
model = OVModelForVisualCausalLM.from_pretrained(model_id, compile=False, device="CPU", export=True, load_in_8bit=False, trust_remote_code=True)
model = OVModelForVisualCausalLM.from_pretrained(model_id, compile=False, device="CPU", export=True, load_in_8bit=False, trust_remote_code=True, ov_config=get_default_properties())
processor.save_pretrained(model_dir)
model.save_pretrained(model_dir)
return model_dir
Expand Down

0 comments on commit f86a642

Please sign in to comment.