Skip to content

Commit

Permalink
Disable kv cache compression for fp vlm (#1080)
Browse files Browse the repository at this point in the history
  • Loading branch information
eaidova authored Dec 19, 2024
1 parent cda4908 commit 8ef3997
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 2 deletions.
10 changes: 8 additions & 2 deletions optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,15 @@ def _set_runtime_options(
for model_name in models_and_export_configs.keys():
_, sub_export_config = models_and_export_configs[model_name]
sub_export_config.runtime_options = {}
if "diffusers" in library_name or "text-generation" in task:
if (
"diffusers" in library_name
or "text-generation" in task
or ("image-text-to-text" in task and model_name == "language_model")
):
sub_export_config.runtime_options["ACTIVATIONS_SCALE_FACTOR"] = "8.0"
if not quantized_model and "text-generation" in task:
if not quantized_model and (
"text-generation" in task or ("image-text-to-text" in task and model_name == "language_model")
):
sub_export_config.runtime_options["KV_CACHE_PRECISION"] = "f16"


Expand Down
8 changes: 8 additions & 0 deletions tests/openvino/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,14 @@ def _openvino_export(
self.assertTrue(ov_model.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"]))
self.assertTrue(ov_model.model.has_rt_info(["runtime_options", "KV_CACHE_PRECISION"]))

if task == "image-text-to-text":
self.assertTrue(
ov_model.language_model.model.has_rt_info(["runtime_options", "KV_CACHE_PRECISION"])
)
self.assertTrue(
ov_model.language_model.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
)

if library_name == "diffusers":
self.assertTrue(
ov_model.vae_encoder.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
Expand Down

0 comments on commit 8ef3997

Please sign in to comment.