Skip to content

Commit

Permalink
Set default 4-bit compression ratio to 1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
echarlaix committed Jul 10, 2024
1 parent 01b069f commit efd6c0d
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def parse_args_openvino(parser: "ArgumentParser"):
default=None,
help=(
"A parameter used when applying 4-bit quantization to control the ratio between 4-bit and 8-bit quantization. If set to 0.8, 80%% of the layers will be quantized to int4 "
"while 20%% will be quantized to int8. This helps to achieve better accuracy at the sacrifice of the model size and inference latency. Default value is 0.8."
"while 20%% will be quantized to int8. This helps to achieve better accuracy at the sacrifice of the model size and inference latency. Default value is 1.0."
),
)
optional_group.add_argument(
Expand Down Expand Up @@ -277,7 +277,7 @@ def _get_default_int4_config(model_id_or_path, library_name):
else:
quantization_config = {
"bits": 8 if is_int8 else 4,
"ratio": 1 if is_int8 else (self.args.ratio or 0.8),
"ratio": 1 if is_int8 else (self.args.ratio or 1.0),
"sym": self.args.sym or False,
"group_size": -1 if is_int8 else self.args.group_size,
"all_layers": None if is_int8 else self.args.all_layers,
Expand Down

0 comments on commit efd6c0d

Please sign in to comment.