From 8c95cae69b72547cce9caef4e018925593f220ed Mon Sep 17 00:00:00 2001 From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> Date: Thu, 7 Mar 2024 16:50:54 +0100 Subject: [PATCH] Fix default int8 quantization for CLI (#592) --- optimum/commands/export/openvino.py | 19 +++++++++---------- optimum/exporters/openvino/__main__.py | 17 ++--------------- 2 files changed, 11 insertions(+), 25 deletions(-) diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py index 255e2a7e13..997ec44aa5 100644 --- a/optimum/commands/export/openvino.py +++ b/optimum/commands/export/openvino.py @@ -157,13 +157,12 @@ def run(self): ) self.args.weight_format = "int8" - weight_format = self.args.weight_format or "fp32" - - ov_config = None - if weight_format in {"fp16", "fp32"}: - ov_config = OVConfig(dtype=weight_format) + if self.args.weight_format is None: + ov_config = None + elif self.args.weight_format in {"fp16", "fp32"}: + ov_config = OVConfig(dtype=self.args.weight_format) else: - is_int8 = weight_format == "int8" + is_int8 = self.args.weight_format == "int8" # For int4 quantization if not parameter is provided, then use the default config if exist if ( @@ -182,12 +181,12 @@ def run(self): "group_size": -1 if is_int8 else self.args.group_size, } - if weight_format in {"int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"}: + if self.args.weight_format in {"int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"}: logger.warning( - f"--weight-format {weight_format} is deprecated, possible choices are fp32, fp16, int8, int4" + f"--weight-format {self.args.weight_format} is deprecated, possible choices are fp32, fp16, int8, int4" ) - quantization_config["sym"] = "asym" not in weight_format - quantization_config["group_size"] = 128 if "128" in weight_format else 64 + quantization_config["sym"] = "asym" not in self.args.weight_format + quantization_config["group_size"] = 128 if "128" in self.args.weight_format else 64 ov_config = OVConfig(quantization_config=quantization_config) # TODO : add input shapes diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index 24b65f9032..1c695e2f19 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -21,26 +21,13 @@ from optimum.exporters import TasksManager from optimum.exporters.onnx.base import OnnxConfig +from optimum.exporters.onnx.constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED from optimum.utils.save_utils import maybe_load_preprocessors -from ...intel.utils.import_utils import ( - is_openvino_tokenizers_available, - is_optimum_version, - is_transformers_version, -) +from ...intel.utils.import_utils import is_openvino_tokenizers_available, is_transformers_version from .convert import export_from_model, export_tokenizer -if is_optimum_version(">=", "1.16.0"): - from optimum.exporters.onnx.constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED -else: - # Copied from https://github.com/huggingface/optimum/blob/main/optimum/exporters/onnx/constants.py - SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED = [ - "bart", - "whisper", - ] - - if TYPE_CHECKING: from optimum.intel.openvino.configuration import OVConfig