Skip to content

Commit

Permalink
Increase default 4-bit compression ratio from 0.8 to 1.0 (#805)
Browse files Browse the repository at this point in the history
* Increase default 4-bit ratio from 0.8 to 1.0

* Style

* Fix test
  • Loading branch information
nikita-savelyevv authored Jul 8, 2024
1 parent 328259a commit eac1f6c
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 14 deletions.
10 changes: 2 additions & 8 deletions optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def parse_args(parser: "ArgumentParser"):

def run(self):
from ...exporters.openvino.__main__ import infer_task, main_export, maybe_convert_tokenizers
from ...intel.openvino.configuration import _DEFAULT_4BIT_CONFIGS, OVConfig
from ...intel.openvino.configuration import _DEFAULT_4BIT_CONFIG, _DEFAULT_4BIT_CONFIGS, OVConfig

def _get_default_int4_config(model_id_or_path, library_name):
if model_id_or_path in _DEFAULT_4BIT_CONFIGS:
Expand All @@ -233,13 +233,7 @@ def _get_default_int4_config(model_id_or_path, library_name):
if original_model_name in _DEFAULT_4BIT_CONFIGS:
return _DEFAULT_4BIT_CONFIGS[original_model_name]

return {
"bits": 4,
"ratio": 0.8,
"sym": False,
"group_size": None,
"all_layers": None,
}
return _DEFAULT_4BIT_CONFIG

library_name = TasksManager.infer_library_from_model(self.args.model, library_name=self.args.library)
if library_name == "sentence_transformers" and self.args.library is None:
Expand Down
8 changes: 8 additions & 0 deletions optimum/intel/openvino/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,14 @@
},
}

_DEFAULT_4BIT_CONFIG = {
"bits": 4,
"ratio": 1.0,
"sym": False,
"group_size": 128,
"all_layers": None,
}


class OVQuantizationMethod(str, Enum):
DEFAULT = "default"
Expand Down
10 changes: 8 additions & 2 deletions optimum/intel/openvino/modeling_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,13 @@
from ...exporters.openvino.stateful import model_has_state
from ..utils.import_utils import is_nncf_available, is_transformers_version
from ..utils.modeling_utils import MULTI_QUERY_ATTN_MODELS
from .configuration import _DEFAULT_4BIT_CONFIGS, OVConfig, OVWeightQuantizationConfig, _check_default_4bit_configs
from .configuration import (
_DEFAULT_4BIT_CONFIG,
_DEFAULT_4BIT_CONFIGS,
OVConfig,
OVWeightQuantizationConfig,
_check_default_4bit_configs,
)
from .modeling import _TOKENIZER_FOR_DOC, INPUTS_DOCSTRING, MODEL_START_DOCSTRING, OVModel
from .utils import ONNX_WEIGHTS_NAME, OV_TO_NP_TYPE, OV_XML_FILE_NAME, STR_TO_OV_TYPE

Expand Down Expand Up @@ -775,7 +781,7 @@ def _from_pretrained(
init_cls = cls

if isinstance(quantization_config, dict) and quantization_config == {"bits": 4}:
quantization_config = _DEFAULT_4BIT_CONFIGS.get(config.name_or_path, quantization_config)
quantization_config = _DEFAULT_4BIT_CONFIGS.get(config.name_or_path, _DEFAULT_4BIT_CONFIG)
quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit)

enable_compilation = kwargs.pop("compile", True) and not quantization_config
Expand Down
8 changes: 4 additions & 4 deletions tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ class OVCLIExportTestCase(unittest.TestCase):
)

TEST_4BIT_CONFIGURATONS = [
("text-generation-with-past", "opt125m", "int4_sym_g128", 62, 86),
("text-generation-with-past", "opt125m", "int4_asym_g128", 62, 86),
("text-generation-with-past", "opt125m", "int4_sym_g64", 62, 86),
("text-generation-with-past", "opt125m", "int4_asym_g64", 62, 86),
("text-generation-with-past", "opt125m", "int4_sym_g128", 4, 144),
("text-generation-with-past", "opt125m", "int4_asym_g128", 4, 144),
("text-generation-with-past", "opt125m", "int4_sym_g64", 4, 144),
("text-generation-with-past", "opt125m", "int4_asym_g64", 4, 144),
("text-generation-with-past", "llama_awq", "int4 --ratio 1.0 --sym --group-size 16 --all-layers", 0, 32),
(
"text-generation-with-past",
Expand Down

0 comments on commit eac1f6c

Please sign in to comment.