From a907bd8759ae41b6cf1fe3c3fe858218c40aab10 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Mon, 5 Aug 2024 11:38:41 +0200 Subject: [PATCH] Update tests in accordance to NNCF 2.12 (#851) * Fix OV nncf-related tests * Fix auto-config test * Style tweaks * Tweak reference actual to OV 2024.3 * Change ref. number for newer transformers version. Add SDPA into ignored scope. * Create a special config for bert quantization * Missing comma --- tests/openvino/test_exporters_cli.py | 16 +++++++------- tests/openvino/test_quantization.py | 8 +++---- tests/openvino/test_training.py | 31 +++++++++++++++------------- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index c11f3518e..20e126ac2 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -86,16 +86,16 @@ class OVCLIExportTestCase(unittest.TestCase): ) TEST_4BIT_CONFIGURATONS = [ - ("text-generation-with-past", "opt125m", "int4_sym_g128", 4, 144), + ("text-generation-with-past", "opt125m", "int4_sym_g128", 4, 72), ("text-generation-with-past", "opt125m", "int4_asym_g128", 4, 144), - ("text-generation-with-past", "opt125m", "int4_sym_g64", 4, 144), + ("text-generation-with-past", "opt125m", "int4_sym_g64", 4, 72), ("text-generation-with-past", "opt125m", "int4_asym_g64", 4, 144), ( "text-generation-with-past", "llama_awq", "int4 --ratio 1.0 --sym --group-size 8 --all-layers", 0, - 32, + 16, ), ( "text-generation-with-past", @@ -103,14 +103,14 @@ class OVCLIExportTestCase(unittest.TestCase): "int4 --ratio 1.0 --sym --group-size 16 --awq --dataset wikitext2 --num-samples 100 " "--sensitivity-metric max_activation_variance", 4, - 28, + 14, ), ( "text-generation-with-past", "llama_awq", "int4 --ratio 1.0 --sym --group-size 16 --scale-estimation --dataset wikitext2 --num-samples 100 ", 4, - 28, + 14, ), ] @@ -253,9 +253,9 @@ def test_exporters_cli_int4(self, task: str, model_type: str, option: str, expec def test_exporters_cli_int4_with_local_model_and_default_config(self): with TemporaryDirectory() as tmpdir: - pt_model = AutoModelForCausalLM.from_pretrained(MODEL_NAMES["bloom"]) + pt_model = AutoModelForCausalLM.from_pretrained(MODEL_NAMES["falcon-40b"]) # overload for matching with default configuration - pt_model.config._name_or_path = "bigscience/bloomz-7b1" + pt_model.config._name_or_path = "tiiuae/falcon-7b-instruct" pt_model.save_pretrained(tmpdir) subprocess.run( f"optimum-cli export openvino --model {tmpdir} --task text-generation-with-past --weight-format int4 {tmpdir}", @@ -267,7 +267,7 @@ def test_exporters_cli_int4_with_local_model_and_default_config(self): rt_info = model.model.get_rt_info() self.assertTrue("nncf" in rt_info) self.assertTrue("weight_compression" in rt_info["nncf"]) - default_config = _DEFAULT_4BIT_CONFIGS["bigscience/bloomz-7b1"] + default_config = _DEFAULT_4BIT_CONFIGS["tiiuae/falcon-7b-instruct"] model_weight_compression_config = rt_info["nncf"]["weight_compression"] sym = default_config.pop("sym", False) bits = default_config.pop("bits", None) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index c6da7e77b..dedc5d441 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -81,8 +81,8 @@ class OVQuantizerTest(unittest.TestCase): SUPPORTED_ARCHITECTURES_TORCH_MODEL = ( - (OVModelForSequenceClassification, "bert", 32 if is_transformers_version("<", "4.41.0") else 22, 35), - (OVModelForCausalLM, "gpt2", 41 if is_transformers_version("<", "4.42.0") else 21, 3), + (OVModelForSequenceClassification, "bert", 32, 35), + (OVModelForCausalLM, "gpt2", 41 if is_transformers_version("<", "4.42.0") else 31, 22), ) SUPPORTED_ARCHITECTURES_OV_MODEL = ( (OVModelForSequenceClassification, "bert", 32, 35), @@ -182,8 +182,8 @@ class OVWeightCompressionTest(unittest.TestCase): (OVModelForCausalLM, "gpt2", 44, 44), ) - SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 62, 86),) - SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 0, 148),) + SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 62, 43),) + SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 0, 74),) SUPPORTED_ARCHITECTURES_STATEFUL_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "gpt2", 44, 44),) LOAD_IN_4_BITS_SCOPE = ( diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py index 9e8527445..1f88b501a 100644 --- a/tests/openvino/test_training.py +++ b/tests/openvino/test_training.py @@ -280,8 +280,11 @@ def tearDown(self): shutil.rmtree(self.output_dir) -CUSTOMIZED_QUANTIZATION_CONFIG = deepcopy(DEFAULT_QUANTIZATION_CONFIG) -CUSTOMIZED_QUANTIZATION_CONFIG.update( +QUANTIZATION_CONFIG_FOR_BERT = deepcopy(DEFAULT_QUANTIZATION_CONFIG) +QUANTIZATION_CONFIG_FOR_BERT["ignored_scopes"].append("{re}.*scaled_dot_product_attention_0") + +CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT = deepcopy(QUANTIZATION_CONFIG_FOR_BERT) +CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT.update( { "overflow_fix": "disable", "initializer": { @@ -325,7 +328,7 @@ def tearDown(self): ), "default_quantization": OVTrainerTestDescriptor( model_id=MODEL_NAMES["bert"], - nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG, + nncf_compression_config=QUANTIZATION_CONFIG_FOR_BERT, expected_fake_quantize=22, expected_int8=32, compression_metrics=["compression_loss"], @@ -333,14 +336,14 @@ def tearDown(self): "distillation,default_quantization": OVTrainerTestDescriptor( model_id=MODEL_NAMES["bert"], teacher_model_id=MODEL_NAMES["bert"], - nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG, + nncf_compression_config=QUANTIZATION_CONFIG_FOR_BERT, expected_fake_quantize=22, expected_int8=32, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "customized_quantization": OVTrainerTestDescriptor( model_id=MODEL_NAMES["bert"], - nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG, + nncf_compression_config=QUANTIZATION_CONFIG_FOR_BERT, expected_fake_quantize=22, expected_int8=32, compression_metrics=["compression_loss"], @@ -348,7 +351,7 @@ def tearDown(self): "distillation,customized_quantization": OVTrainerTestDescriptor( model_id=MODEL_NAMES["bert"], teacher_model_id=MODEL_NAMES["bert"], - nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG, + nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT, expected_fake_quantize=22, expected_int8=32, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], @@ -368,7 +371,7 @@ def tearDown(self): ), "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( model_id=MODEL_NAMES["bert"], - nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], + nncf_compression_config=[QUANTIZATION_CONFIG_FOR_BERT, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], expected_fake_quantize=22, expected_int8=32, expected_binary_masks=60, @@ -377,7 +380,7 @@ def tearDown(self): "customized_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( model_id=MODEL_NAMES["bert"], nncf_compression_config=[ - CUSTOMIZED_QUANTIZATION_CONFIG, + CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, ], expected_fake_quantize=22, @@ -388,7 +391,7 @@ def tearDown(self): "distillation,default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( model_id=MODEL_NAMES["bert"], teacher_model_id=MODEL_NAMES["bert"], - nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], + nncf_compression_config=[QUANTIZATION_CONFIG_FOR_BERT, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], expected_fake_quantize=22, expected_int8=32, expected_binary_masks=60, @@ -398,7 +401,7 @@ def tearDown(self): model_id=MODEL_NAMES["bert"], teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=[ - CUSTOMIZED_QUANTIZATION_CONFIG, + CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, ], expected_fake_quantize=22, @@ -421,7 +424,7 @@ def tearDown(self): ), "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( model_id=MODEL_NAMES["bert"], - nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], + nncf_compression_config=[QUANTIZATION_CONFIG_FOR_BERT, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], expected_fake_quantize=22, expected_int8=32, expected_binary_masks=60, @@ -430,7 +433,7 @@ def tearDown(self): "customized_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( model_id=MODEL_NAMES["bert"], nncf_compression_config=[ - CUSTOMIZED_QUANTIZATION_CONFIG, + CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, ], expected_fake_quantize=22, @@ -441,7 +444,7 @@ def tearDown(self): "distillation,default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( model_id=MODEL_NAMES["bert"], teacher_model_id=MODEL_NAMES["bert"], - nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], + nncf_compression_config=[QUANTIZATION_CONFIG_FOR_BERT, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], expected_fake_quantize=22, expected_int8=32, expected_binary_masks=60, @@ -451,7 +454,7 @@ def tearDown(self): model_id=MODEL_NAMES["bert"], teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=[ - CUSTOMIZED_QUANTIZATION_CONFIG, + CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, ], expected_fake_quantize=22,