From 8ef3997bf33ed7c33fc9b5d515f6aec8d62a51ff Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova Date: Thu, 19 Dec 2024 20:40:35 +0400 Subject: [PATCH] Disable kv cache compression for fp vlm (#1080) --- optimum/exporters/openvino/convert.py | 10 ++++++++-- tests/openvino/test_export.py | 8 ++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index c9e18cff6a..66e6c13a28 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -100,9 +100,15 @@ def _set_runtime_options( for model_name in models_and_export_configs.keys(): _, sub_export_config = models_and_export_configs[model_name] sub_export_config.runtime_options = {} - if "diffusers" in library_name or "text-generation" in task: + if ( + "diffusers" in library_name + or "text-generation" in task + or ("image-text-to-text" in task and model_name == "language_model") + ): sub_export_config.runtime_options["ACTIVATIONS_SCALE_FACTOR"] = "8.0" - if not quantized_model and "text-generation" in task: + if not quantized_model and ( + "text-generation" in task or ("image-text-to-text" in task and model_name == "language_model") + ): sub_export_config.runtime_options["KV_CACHE_PRECISION"] = "f16" diff --git a/tests/openvino/test_export.py b/tests/openvino/test_export.py index 2d57f92d0e..55500471ae 100644 --- a/tests/openvino/test_export.py +++ b/tests/openvino/test_export.py @@ -134,6 +134,14 @@ def _openvino_export( self.assertTrue(ov_model.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])) self.assertTrue(ov_model.model.has_rt_info(["runtime_options", "KV_CACHE_PRECISION"])) + if task == "image-text-to-text": + self.assertTrue( + ov_model.language_model.model.has_rt_info(["runtime_options", "KV_CACHE_PRECISION"]) + ) + self.assertTrue( + ov_model.language_model.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"]) + ) + if library_name == "diffusers": self.assertTrue( ov_model.vae_encoder.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])