Update tests in accordance to NNCF 2.12 (#851)

* Fix OV nncf-related tests * Fix auto-config test * Style tweaks * Tweak reference actual to OV 2024.3 * Change ref. number for newer transformers version. Add SDPA into ignored scope. * Create a special config for bert quantization * Missing comma
huggingface · Aug 5, 2024 · a907bd8 · a907bd8
1 parent 6388aeb
commit a907bd8
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 26 deletions.
diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
@@ -86,31 +86,31 @@ class OVCLIExportTestCase(unittest.TestCase):
     )
 
     TEST_4BIT_CONFIGURATONS = [
-        ("text-generation-with-past", "opt125m", "int4_sym_g128", 4, 144),
+        ("text-generation-with-past", "opt125m", "int4_sym_g128", 4, 72),
         ("text-generation-with-past", "opt125m", "int4_asym_g128", 4, 144),
-        ("text-generation-with-past", "opt125m", "int4_sym_g64", 4, 144),
+        ("text-generation-with-past", "opt125m", "int4_sym_g64", 4, 72),
         ("text-generation-with-past", "opt125m", "int4_asym_g64", 4, 144),
         (
             "text-generation-with-past",
             "llama_awq",
             "int4 --ratio 1.0 --sym --group-size 8 --all-layers",
             0,
-            32,
+            16,
         ),
         (
             "text-generation-with-past",
             "llama_awq",
             "int4 --ratio 1.0 --sym --group-size 16 --awq --dataset wikitext2 --num-samples 100 "
             "--sensitivity-metric max_activation_variance",
             4,
-            28,
+            14,
         ),
         (
             "text-generation-with-past",
             "llama_awq",
             "int4 --ratio 1.0 --sym --group-size 16 --scale-estimation --dataset wikitext2 --num-samples 100 ",
             4,
-            28,
+            14,
         ),
     ]
 
@@ -253,9 +253,9 @@ def test_exporters_cli_int4(self, task: str, model_type: str, option: str, expec
 
     def test_exporters_cli_int4_with_local_model_and_default_config(self):
         with TemporaryDirectory() as tmpdir:
-            pt_model = AutoModelForCausalLM.from_pretrained(MODEL_NAMES["bloom"])
+            pt_model = AutoModelForCausalLM.from_pretrained(MODEL_NAMES["falcon-40b"])
             # overload for matching with default configuration
-            pt_model.config._name_or_path = "bigscience/bloomz-7b1"
+            pt_model.config._name_or_path = "tiiuae/falcon-7b-instruct"
             pt_model.save_pretrained(tmpdir)
             subprocess.run(
                 f"optimum-cli export openvino --model {tmpdir} --task text-generation-with-past --weight-format int4 {tmpdir}",
@@ -267,7 +267,7 @@ def test_exporters_cli_int4_with_local_model_and_default_config(self):
             rt_info = model.model.get_rt_info()
             self.assertTrue("nncf" in rt_info)
             self.assertTrue("weight_compression" in rt_info["nncf"])
-            default_config = _DEFAULT_4BIT_CONFIGS["bigscience/bloomz-7b1"]
+            default_config = _DEFAULT_4BIT_CONFIGS["tiiuae/falcon-7b-instruct"]
             model_weight_compression_config = rt_info["nncf"]["weight_compression"]
             sym = default_config.pop("sym", False)
             bits = default_config.pop("bits", None)

diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
@@ -81,8 +81,8 @@
 
 class OVQuantizerTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES_TORCH_MODEL = (
-        (OVModelForSequenceClassification, "bert", 32 if is_transformers_version("<", "4.41.0") else 22, 35),
-        (OVModelForCausalLM, "gpt2", 41 if is_transformers_version("<", "4.42.0") else 21, 3),
+        (OVModelForSequenceClassification, "bert", 32, 35),
+        (OVModelForCausalLM, "gpt2", 41 if is_transformers_version("<", "4.42.0") else 31, 22),
     )
     SUPPORTED_ARCHITECTURES_OV_MODEL = (
         (OVModelForSequenceClassification, "bert", 32, 35),
@@ -182,8 +182,8 @@ class OVWeightCompressionTest(unittest.TestCase):
         (OVModelForCausalLM, "gpt2", 44, 44),
     )
 
-    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 62, 86),)
-    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 0, 148),)
+    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 62, 43),)
+    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 0, 74),)
     SUPPORTED_ARCHITECTURES_STATEFUL_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "gpt2", 44, 44),)
 
     LOAD_IN_4_BITS_SCOPE = (

diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py
@@ -280,8 +280,11 @@ def tearDown(self):
         shutil.rmtree(self.output_dir)
 
 
-CUSTOMIZED_QUANTIZATION_CONFIG = deepcopy(DEFAULT_QUANTIZATION_CONFIG)
-CUSTOMIZED_QUANTIZATION_CONFIG.update(
+QUANTIZATION_CONFIG_FOR_BERT = deepcopy(DEFAULT_QUANTIZATION_CONFIG)
+QUANTIZATION_CONFIG_FOR_BERT["ignored_scopes"].append("{re}.*scaled_dot_product_attention_0")
+
+CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT = deepcopy(QUANTIZATION_CONFIG_FOR_BERT)
+CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT.update(
     {
         "overflow_fix": "disable",
         "initializer": {
@@ -325,30 +328,30 @@ def tearDown(self):
     ),
     "default_quantization": OVTrainerTestDescriptor(
         model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG,
+        nncf_compression_config=QUANTIZATION_CONFIG_FOR_BERT,
         expected_fake_quantize=22,
         expected_int8=32,
         compression_metrics=["compression_loss"],
     ),
     "distillation,default_quantization": OVTrainerTestDescriptor(
         model_id=MODEL_NAMES["bert"],
         teacher_model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG,
+        nncf_compression_config=QUANTIZATION_CONFIG_FOR_BERT,
         expected_fake_quantize=22,
         expected_int8=32,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
     "customized_quantization": OVTrainerTestDescriptor(
         model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG,
+        nncf_compression_config=QUANTIZATION_CONFIG_FOR_BERT,
         expected_fake_quantize=22,
         expected_int8=32,
         compression_metrics=["compression_loss"],
     ),
     "distillation,customized_quantization": OVTrainerTestDescriptor(
         model_id=MODEL_NAMES["bert"],
         teacher_model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG,
+        nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT,
         expected_fake_quantize=22,
         expected_int8=32,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -368,7 +371,7 @@ def tearDown(self):
     ),
     "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
         model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
+        nncf_compression_config=[QUANTIZATION_CONFIG_FOR_BERT, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
         expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
@@ -377,7 +380,7 @@ def tearDown(self):
     "customized_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
         model_id=MODEL_NAMES["bert"],
         nncf_compression_config=[
-            CUSTOMIZED_QUANTIZATION_CONFIG,
+            CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT,
             STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         ],
         expected_fake_quantize=22,
@@ -388,7 +391,7 @@ def tearDown(self):
     "distillation,default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
         model_id=MODEL_NAMES["bert"],
         teacher_model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
+        nncf_compression_config=[QUANTIZATION_CONFIG_FOR_BERT, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
         expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
@@ -398,7 +401,7 @@ def tearDown(self):
         model_id=MODEL_NAMES["bert"],
         teacher_model_id=MODEL_NAMES["bert"],
         nncf_compression_config=[
-            CUSTOMIZED_QUANTIZATION_CONFIG,
+            CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT,
             STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         ],
         expected_fake_quantize=22,
@@ -421,7 +424,7 @@ def tearDown(self):
     ),
     "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
         model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
+        nncf_compression_config=[QUANTIZATION_CONFIG_FOR_BERT, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
         expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
@@ -430,7 +433,7 @@ def tearDown(self):
     "customized_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
         model_id=MODEL_NAMES["bert"],
         nncf_compression_config=[
-            CUSTOMIZED_QUANTIZATION_CONFIG,
+            CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT,
             UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         ],
         expected_fake_quantize=22,
@@ -441,7 +444,7 @@ def tearDown(self):
     "distillation,default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
         model_id=MODEL_NAMES["bert"],
         teacher_model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
+        nncf_compression_config=[QUANTIZATION_CONFIG_FOR_BERT, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
         expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
@@ -451,7 +454,7 @@ def tearDown(self):
         model_id=MODEL_NAMES["bert"],
         teacher_model_id=MODEL_NAMES["bert"],
         nncf_compression_config=[
-            CUSTOMIZED_QUANTIZATION_CONFIG,
+            CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT,
             UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         ],
         expected_fake_quantize=22,