Skip to content

Commit

Permalink
Merge pull request #673 from nikita-savelyevv/nncf-210-update
Browse files Browse the repository at this point in the history
Update NNCF to 2.10. Enable AWQ algorithm.
  • Loading branch information
AlexKoff88 authored Apr 26, 2024
2 parents b383ffb + a65c9e5 commit 920b237
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 7 deletions.
5 changes: 3 additions & 2 deletions optimum/intel/openvino/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from transformers import AutoTokenizer, DataCollator, PreTrainedModel, default_data_collator
from transformers.pytorch_utils import Conv1D
from transformers.utils import is_accelerate_available
from transformers.utils.quantization_config import QuantizationMethod

from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed
from optimum.exporters.tasks import TasksManager
Expand Down Expand Up @@ -670,10 +671,10 @@ def _weight_only_quantization(
group_size=config.group_size,
all_layers=config.all_layers,
sensitivity_metric=sensitivity_metric,
# awq=config.quant_method == QuantizationMethod.AWQ, # TODO : enable from nncf v2.9.0
awq=config.quant_method == QuantizationMethod.AWQ or None,
ignored_scope=config.get_ignored_scope_instance(),
dataset=dataset,
# subset_size=config.num_samples if config.num_samples else 128, # TODO : enable from nncf v2.9.0
subset_size=config.num_samples if config.num_samples else 128,
)


Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@

EXTRAS_REQUIRE = {
"neural-compressor": ["neural-compressor>=2.2.0", "onnxruntime<1.15.0", "accelerate"],
"openvino": ["openvino>=2023.3", "nncf>=2.8.1", "openvino-tokenizers[transformers]"],
"nncf": ["nncf>=2.8.1"],
"openvino": ["openvino>=2023.3", "nncf>=2.10.0", "openvino-tokenizers[transformers]"],
"nncf": ["nncf>=2.10.0"],
"ipex": ["intel-extension-for-pytorch", "transformers>=4.36.0,<4.39.0"],
"diffusers": ["diffusers"],
"quality": QUALITY_REQUIRE,
Expand Down
12 changes: 9 additions & 3 deletions tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,17 +221,17 @@ class OVWeightCompressionTest(unittest.TestCase):
),
(
OVModelForCausalLM,
"opt",
"llama_awq",
dict(
bits=4,
sym=True,
group_size=-1,
group_size=16,
ratio=0.8,
sensitivity_metric="mean_activation_magnitude",
dataset="ptb",
quant_method=QuantizationMethod.AWQ,
),
14,
16,
),
)

Expand Down Expand Up @@ -452,6 +452,10 @@ def test_ovmodel_4bit_auto_compression_with_config(
with tempfile.TemporaryDirectory() as tmp_dir:
quantization_config = OVWeightQuantizationConfig.from_dict(quantization_config)
model = model_cls.from_pretrained(model_id, export=True, quantization_config=quantization_config)
if quantization_config.quant_method == QuantizationMethod.AWQ:
# TODO: Check that AWQ was actually applied
pass

tokenizer = AutoTokenizer.from_pretrained(model_id)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
Expand Down Expand Up @@ -548,6 +552,8 @@ def test_ovmodel_load_large_model_with_additional_quantization_config(self):
"sensitivity_metric": None,
"dataset": None,
"ignored_scope": nncf.IgnoredScope(),
"awq": None,
"subset_size": 128,
}
compress_weights_patch.assert_called_with(unittest.mock.ANY, **compression_params)

Expand Down
1 change: 1 addition & 0 deletions tests/openvino/utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
"levit": "hf-internal-testing/tiny-random-LevitModel",
"longt5": "hf-internal-testing/tiny-random-longt5",
"llama": "fxmarty/tiny-llama-fast-tokenizer",
"llama_awq": "HuggingFaceH4/tiny-random-LlamaForCausalLM",
"llama_gptq": "hf-internal-testing/TinyLlama-1.1B-Chat-v0.3-GPTQ",
"m2m_100": "hf-internal-testing/tiny-random-m2m_100",
"opt": "hf-internal-testing/tiny-random-OPTModel",
Expand Down

0 comments on commit 920b237

Please sign in to comment.