force precision using --weight-format

huggingface · Aug 13, 2024 · afc5dc7 · afc5dc7
1 parent c0ef027
commit afc5dc7
Show file tree

Hide file tree

Showing 5 changed files with 8 additions and 4 deletions.
diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
@@ -301,9 +301,11 @@ def main_export(
         and task.startswith("text-generation")
         and getattr(config, "torch_dtype", torch.float32) in [torch.float16, torch.bfloat16]
     ):
-        if is_openvino_version(">=", "2024.2") and config.torch_dtype == torch.float16:
+        if ov_config is not None and ov_config.dtype in {"fp16", "fp32"}:
+            dtype = torch.float16 if ov_config.dtype == "fp16" else torch.float32
+        elif is_openvino_version(">=", "2024.2") and config.torch_dtype == torch.float16:
             dtype = torch.float16
-        if is_openvino_version(">=", "2024.3") and config.torch_dtype == torch.bfloat16:
+        elif is_openvino_version(">=", "2024.3") and config.torch_dtype == torch.bfloat16:
             dtype = torch.bfloat16
 
     if dtype is not None:

diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py
@@ -385,6 +385,7 @@ def ts_patched_forward(*args, **kwargs):
             with patcher:
                 if patch_16bit_model:
                     from openvino.frontend.pytorch.patch_model import __make_16bit_traceable
+
                     __make_16bit_traceable(model)
                 check_dummy_inputs_are_allowed(model, dummy_inputs)
                 sig = inspect.signature(model.forward) if hasattr(model, "forward") else inspect.signature(model.call)

diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py
@@ -66,8 +66,8 @@
     PersimmonModelPatcher,
     Phi3ModelPatcher,
     QwenModelPatcher,
-    UpdateCausalMaskModelPatcher,
     RotaryEmbPatcher,
+    UpdateCausalMaskModelPatcher,
     XverseModelPatcher,
 )
 

diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py
@@ -105,6 +105,7 @@ def patch_update_causal_mask(model, transformers_version):
     if is_transformers_version(">=", transformers_version):
         model.model._update_causal_mask = types.MethodType(_llama_gemma_update_causal_mask, model.model)
 
+
 # initialization of sin/cos cached in bf16/fp16 leads to accuracy loss
 # reinitialize them to save in float32 before export
 def _reinitialize_cos_sin_cached_fp32(rotary_emb):

diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py
@@ -281,7 +281,7 @@ def _from_transformers(
         if load_in_8bit is None and not quantization_config:
             ov_export_config = None
         else:
-            ov_export_config = OVConfig(dtype="fp32")
+            ov_export_config = OVConfig(dtype="auto")
 
         stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache)