force precision using --weight-format

huggingface · Jul 29, 2024 · 8625e36 · 8625e36
1 parent 0791c4c
commit 8625e36
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 3 deletions.
diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
@@ -301,9 +301,11 @@ def main_export(
         and task.startswith("text-generation")
         and getattr(config, "torch_dtype", torch.float32) in [torch.float16, torch.bfloat16]
     ):
-        if is_openvino_version(">=", "2024.2") and config.torch_dtype == torch.float16:
+        if ov_config is not None and ov_config.dtype in {"fp16", "fp32"}:
+            dtype = torch.float16 if ov_config.dtype == "fp16" else torch.float32
+        elif is_openvino_version(">=", "2024.2") and config.torch_dtype == torch.float16:
             dtype = torch.float16
-        if is_openvino_version(">=", "2024.3") and config.torch_dtype == torch.bfloat16:
+        elif is_openvino_version(">=", "2024.3") and config.torch_dtype == torch.bfloat16:
             dtype = torch.bfloat16
 
     if dtype is not None:

diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py
@@ -281,7 +281,7 @@ def _from_transformers(
         if load_in_8bit is None and not quantization_config:
             ov_export_config = None
         else:
-            ov_export_config = OVConfig(dtype="fp32")
+            ov_export_config = OVConfig(dtype="auto")
 
         stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache)