diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index 831bcc827..1316ed6b6 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -301,9 +301,11 @@ def main_export( and task.startswith("text-generation") and getattr(config, "torch_dtype", torch.float32) in [torch.float16, torch.bfloat16] ): - if is_openvino_version(">=", "2024.2") and config.torch_dtype == torch.float16: + if ov_config is not None and ov_config.dtype in {"fp16", "fp32"}: + dtype = torch.float16 if ov_config.dtype == "fp16" else torch.float32 + elif is_openvino_version(">=", "2024.2") and config.torch_dtype == torch.float16: dtype = torch.float16 - if is_openvino_version(">=", "2024.3") and config.torch_dtype == torch.bfloat16: + elif is_openvino_version(">=", "2024.3") and config.torch_dtype == torch.bfloat16: dtype = torch.bfloat16 if dtype is not None: diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index febbce4ff..2306ec96b 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -385,6 +385,7 @@ def ts_patched_forward(*args, **kwargs): with patcher: if patch_16bit_model: from openvino.frontend.pytorch.patch_model import __make_16bit_traceable + __make_16bit_traceable(model) check_dummy_inputs_are_allowed(model, dummy_inputs) sig = inspect.signature(model.forward) if hasattr(model, "forward") else inspect.signature(model.call) diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index b83af1126..e8c834a39 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -66,8 +66,8 @@ PersimmonModelPatcher, Phi3ModelPatcher, QwenModelPatcher, - UpdateCausalMaskModelPatcher, RotaryEmbPatcher, + UpdateCausalMaskModelPatcher, XverseModelPatcher, ) diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 2c1a0f39c..642b40424 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -105,6 +105,7 @@ def patch_update_causal_mask(model, transformers_version): if is_transformers_version(">=", transformers_version): model.model._update_causal_mask = types.MethodType(_llama_gemma_update_causal_mask, model.model) + # initialization of sin/cos cached in bf16/fp16 leads to accuracy loss # reinitialize them to save in float32 before export def _reinitialize_cos_sin_cached_fp32(rotary_emb): diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 26c412105..554fdee7c 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -281,7 +281,7 @@ def _from_transformers( if load_in_8bit is None and not quantization_config: ov_export_config = None else: - ov_export_config = OVConfig(dtype="fp32") + ov_export_config = OVConfig(dtype="auto") stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache)