Skip to content

Commit 0a6075b

Browse files
authored
Try to resolve default int4 config for local models (#760)
* try to resolve default int4 config for local models * Update optimum/commands/export/openvino.py * apply review comment * add test case * improve test * update tests
1 parent f20e5b8 commit 0a6075b

File tree

2 files changed

+62
-10
lines changed

2 files changed

+62
-10
lines changed

optimum/commands/export/openvino.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414
"""Defines the command line for the export with OpenVINO."""
1515

16+
import json
1617
import logging
1718
import sys
1819
from pathlib import Path
@@ -212,6 +213,32 @@ def run(self):
212213
from ...exporters.openvino.__main__ import infer_task, main_export, maybe_convert_tokenizers
213214
from ...intel.openvino.configuration import _DEFAULT_4BIT_CONFIGS, OVConfig
214215

216+
def _get_default_int4_config(model_id_or_path, library_name):
217+
if model_id_or_path in _DEFAULT_4BIT_CONFIGS:
218+
return _DEFAULT_4BIT_CONFIGS[model_id_or_path]
219+
if "transformers" in library_name and (Path(model_id_or_path) / "config.json").exists():
220+
with (Path(model_id_or_path) / "config.json").open("r") as config_f:
221+
config = json.load(config_f)
222+
original_model_name = config.get("_name_or_path", "")
223+
if original_model_name in _DEFAULT_4BIT_CONFIGS:
224+
return _DEFAULT_4BIT_CONFIGS[original_model_name]
225+
226+
return {
227+
"bits": 4,
228+
"ratio": 0.8,
229+
"sym": False,
230+
"group_size": None,
231+
"all_layers": None,
232+
}
233+
234+
library_name = TasksManager.infer_library_from_model(self.args.model, library_name=self.args.library)
235+
if library_name == "sentence_transformers" and self.args.library is None:
236+
logger.warning(
237+
"Library name is not specified. There are multiple possible variants: `sentence_transformers`, `transformers`."
238+
"`transformers` will be selected. If you want to load your model with the `sentence-transformers` library instead, please set --library sentence_transformers"
239+
)
240+
library_name = "transformers"
241+
215242
if self.args.fp16:
216243
logger.warning(
217244
"`--fp16` option is deprecated and will be removed in a future version. Use `--weight-format` instead."
@@ -241,9 +268,8 @@ def run(self):
241268
and self.args.num_samples is None
242269
and self.args.awq is None
243270
and self.args.sensitivity_metric is None
244-
and self.args.model in _DEFAULT_4BIT_CONFIGS
245271
):
246-
quantization_config = _DEFAULT_4BIT_CONFIGS[self.args.model]
272+
quantization_config = _get_default_int4_config(self.args.model, library_name)
247273
else:
248274
quantization_config = {
249275
"bits": 8 if is_int8 else 4,
@@ -265,14 +291,6 @@ def run(self):
265291
quantization_config["group_size"] = 128 if "128" in self.args.weight_format else 64
266292
ov_config = OVConfig(quantization_config=quantization_config)
267293

268-
library_name = TasksManager.infer_library_from_model(self.args.model, library_name=self.args.library)
269-
if library_name == "sentence_transformers" and self.args.library is None:
270-
logger.warning(
271-
"Library name is not specified. There are multiple possible variants: `sentence_transformers`, `transformers`."
272-
"`transformers` will be selected. If you want to load your model with the `sentence-transformers` library instead, please set --library sentence_transformers"
273-
)
274-
library_name = "transformers"
275-
276294
if self.args.convert_tokenizer:
277295
logger.warning("`--convert-tokenizer` option is deprecated. Tokenizer will be converted by default.")
278296

tests/openvino/test_exporters_cli.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from tempfile import TemporaryDirectory
1818

1919
from parameterized import parameterized
20+
from transformers import AutoModelForCausalLM
2021
from utils_tests import (
2122
_ARCHITECTURES_TO_EXPECTED_INT8,
2223
MODEL_NAMES,
@@ -38,6 +39,7 @@
3839
OVStableDiffusionPipeline,
3940
OVStableDiffusionXLPipeline,
4041
)
42+
from optimum.intel.openvino.configuration import _DEFAULT_4BIT_CONFIGS
4143
from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS
4244
from optimum.intel.utils.import_utils import is_openvino_tokenizers_available
4345

@@ -219,6 +221,38 @@ def test_exporters_cli_int4(self, task: str, model_type: str, option: str, expec
219221
self.assertEqual(expected_int4, num_int4)
220222
self.assertTrue("--awq" not in option or b"Applying AWQ" in result.stdout)
221223

224+
def test_exporters_cli_int4_with_local_model_and_default_config(self):
225+
with TemporaryDirectory() as tmpdir:
226+
pt_model = AutoModelForCausalLM.from_pretrained(MODEL_NAMES["bloom"])
227+
# overload for matching with default configuration
228+
pt_model.config._name_or_path = "bigscience/bloomz-7b1"
229+
pt_model.save_pretrained(tmpdir)
230+
subprocess.run(
231+
f"optimum-cli export openvino --model {tmpdir} --task text-generation-with-past --weight-format int4 {tmpdir}",
232+
shell=True,
233+
check=True,
234+
)
235+
236+
model = OVModelForCausalLM.from_pretrained(tmpdir)
237+
rt_info = model.model.get_rt_info()
238+
self.assertTrue("nncf" in rt_info)
239+
self.assertTrue("weight_compression" in rt_info["nncf"])
240+
default_config = _DEFAULT_4BIT_CONFIGS["bigscience/bloomz-7b1"]
241+
model_weight_compression_config = rt_info["nncf"]["weight_compression"]
242+
sym = default_config.pop("sym", False)
243+
bits = default_config.pop("bits", None)
244+
self.assertEqual(bits, 4)
245+
246+
mode = f'int{bits}_{"sym" if sym else "asym"}'
247+
default_config["mode"] = mode
248+
for key, value in default_config.items():
249+
self.assertTrue(key in model_weight_compression_config)
250+
self.assertEqual(
251+
model_weight_compression_config[key].value,
252+
str(value),
253+
f"Parameter {key} not matched with expected, {model_weight_compression_config[key].value} != {value}",
254+
)
255+
222256
def test_exporters_cli_help(self):
223257
subprocess.run(
224258
"optimum-cli export openvino --help",

0 commit comments

Comments
 (0)