From 28aac242454fab5ecdc51deb5c37c34a63e057e9 Mon Sep 17 00:00:00 2001 From: changwa1 Date: Tue, 26 Nov 2024 15:24:11 +0800 Subject: [PATCH] improve ut Signed-off-by: changwa1 --- tests/neural_compressor/test_optimization.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/neural_compressor/test_optimization.py b/tests/neural_compressor/test_optimization.py index 7d54214ca..56054e480 100644 --- a/tests/neural_compressor/test_optimization.py +++ b/tests/neural_compressor/test_optimization.py @@ -467,12 +467,14 @@ def _compute_metrics(pred): class WeightOnlyQuantizationTest(INCTestMixin): WEIGHT_ONLY_CONFIG = ( - ("rtn", 4), - ("gptq", 4), + ("rtn", 4, False), + ("rtn", 4, True), + ("gptq", 4, False), + ("gptq", 4, True), ) @parameterized.expand(WEIGHT_ONLY_CONFIG) - def test_weight_only_quantization(self, methodology, bits): + def test_weight_only_quantization(self, methodology, bits, use_layer_wise): from neural_compressor.transformers import GPTQConfig, RtnConfig model_name = "hf-internal-testing/tiny-random-GPTNeoForCausalLM" @@ -489,10 +491,10 @@ def test_weight_only_quantization(self, methodology, bits): batch_size=5, seq_len=32, block_size=16, - use_layer_wise=True, + use_layer_wise=use_layer_wise, ) else: - quantization_config = RtnConfig(bits=bits, group_size=8, use_layer_wise=True) + quantization_config = RtnConfig(bits=bits, group_size=8, use_layer_wise=use_layer_wise) tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer.add_special_tokens({"pad_token": "[PAD]"})