Skip to content

Commit

Permalink
Fix default int8 quantization for CLI (huggingface#592)
Browse files Browse the repository at this point in the history
  • Loading branch information
echarlaix authored and PenghuiCheng committed Mar 13, 2024
1 parent 4c481e6 commit 126a581
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 25 deletions.
19 changes: 9 additions & 10 deletions optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,13 +157,12 @@ def run(self):
)
self.args.weight_format = "int8"

weight_format = self.args.weight_format or "fp32"

ov_config = None
if weight_format in {"fp16", "fp32"}:
ov_config = OVConfig(dtype=weight_format)
if self.args.weight_format is None:
ov_config = None
elif self.args.weight_format in {"fp16", "fp32"}:
ov_config = OVConfig(dtype=self.args.weight_format)
else:
is_int8 = weight_format == "int8"
is_int8 = self.args.weight_format == "int8"

# For int4 quantization if not parameter is provided, then use the default config if exist
if (
Expand All @@ -182,12 +181,12 @@ def run(self):
"group_size": -1 if is_int8 else self.args.group_size,
}

if weight_format in {"int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"}:
if self.args.weight_format in {"int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"}:
logger.warning(
f"--weight-format {weight_format} is deprecated, possible choices are fp32, fp16, int8, int4"
f"--weight-format {self.args.weight_format} is deprecated, possible choices are fp32, fp16, int8, int4"
)
quantization_config["sym"] = "asym" not in weight_format
quantization_config["group_size"] = 128 if "128" in weight_format else 64
quantization_config["sym"] = "asym" not in self.args.weight_format
quantization_config["group_size"] = 128 if "128" in self.args.weight_format else 64
ov_config = OVConfig(quantization_config=quantization_config)

# TODO : add input shapes
Expand Down
17 changes: 2 additions & 15 deletions optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,13 @@

from optimum.exporters import TasksManager
from optimum.exporters.onnx.base import OnnxConfig
from optimum.exporters.onnx.constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED
from optimum.utils.save_utils import maybe_load_preprocessors

from ...intel.utils.import_utils import (
is_openvino_tokenizers_available,
is_optimum_version,
is_transformers_version,
)
from ...intel.utils.import_utils import is_openvino_tokenizers_available, is_transformers_version
from .convert import export_from_model, export_tokenizer


if is_optimum_version(">=", "1.16.0"):
from optimum.exporters.onnx.constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED
else:
# Copied from https://github.com/huggingface/optimum/blob/main/optimum/exporters/onnx/constants.py
SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED = [
"bart",
"whisper",
]


if TYPE_CHECKING:
from optimum.intel.openvino.configuration import OVConfig

Expand Down

0 comments on commit 126a581

Please sign in to comment.