Skip to content

Commit

Permalink
Enable AWQ; add AWQ test
Browse files Browse the repository at this point in the history
  • Loading branch information
nikita-savelyevv committed Apr 22, 2024
1 parent 673b88b commit f696353
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 8 deletions.
6 changes: 4 additions & 2 deletions optimum/intel/openvino/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union

import datasets
from transformers.utils.quantization_config import QuantizationMethod

import nncf
import openvino
import torch
Expand Down Expand Up @@ -677,10 +679,10 @@ def _weight_only_quantization(
group_size=config.group_size,
all_layers=config.all_layers,
sensitivity_metric=sensitivity_metric,
# awq=config.quant_method == QuantizationMethod.AWQ, # TODO : enable from nncf v2.9.0
awq=config.quant_method == QuantizationMethod.AWQ,
ignored_scope=config.get_ignored_scope_instance(),
dataset=dataset,
# subset_size=config.num_samples if config.num_samples else 128, # TODO : enable from nncf v2.9.0
subset_size=config.num_samples if config.num_samples else 128,
)


Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@

EXTRAS_REQUIRE = {
"neural-compressor": ["neural-compressor>=2.2.0", "onnxruntime<1.15.0", "accelerate"],
"openvino": ["openvino>=2023.3", "nncf>=2.8.1", "openvino-tokenizers[transformers]"],
"nncf": ["nncf>=2.8.1"],
"openvino": ["openvino>=2023.3", "nncf>=2.10.0", "openvino-tokenizers[transformers]"],
"nncf": ["nncf>=2.10.0"],
"ipex": ["intel-extension-for-pytorch", "transformers>=4.36.0,<4.39.0"],
"diffusers": ["diffusers"],
"quality": QUALITY_REQUIRE,
Expand Down
21 changes: 17 additions & 4 deletions tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,17 +224,17 @@ class OVWeightCompressionTest(unittest.TestCase):
),
(
OVModelForCausalLM,
"hf-internal-testing/tiny-random-OPTForCausalLM",
"HuggingFaceH4/tiny-random-LlamaForCausalLM",
dict(
bits=4,
sym=True,
group_size=-1,
group_size=16,
ratio=0.8,
sensitivity_metric="mean_activation_magnitude",
dataset="ptb",
quant_method=QuantizationMethod.AWQ,
),
14,
16,
),
)

Expand Down Expand Up @@ -455,7 +455,20 @@ def test_ovmodel_4bit_auto_compression_with_config(
):
with tempfile.TemporaryDirectory() as tmp_dir:
quantization_config = OVWeightQuantizationConfig.from_dict(quantization_config)
model = model_cls.from_pretrained(model_id, export=True, quantization_config=quantization_config)

from nncf.common.logging.track_progress import track

with unittest.mock.patch("nncf.common.logging.track_progress.track", wraps=track) as track_patch:
model = model_cls.from_pretrained(model_id, export=True, quantization_config=quantization_config)
if quantization_config.quant_method == QuantizationMethod.AWQ:
# Called at least once with description="Applying AWQ"
self.assertTrue(
any(
args.kwargs.get("description", None) == "Applying AWQ"
for args in track_patch.call_args_list
)
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
Expand Down

0 comments on commit f696353

Please sign in to comment.