Skip to content

Commit

Permalink
Update tests in accordance to NNCF 2.12 (#851)
Browse files Browse the repository at this point in the history
* Fix OV nncf-related tests

* Fix auto-config test

* Style tweaks

* Tweak reference actual to OV 2024.3

* Change ref. number for newer transformers version. Add SDPA into ignored scope.

* Create a special config for bert quantization

* Missing comma
  • Loading branch information
nikita-savelyevv authored Aug 5, 2024
1 parent 6388aeb commit a907bd8
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 26 deletions.
16 changes: 8 additions & 8 deletions tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,31 +86,31 @@ class OVCLIExportTestCase(unittest.TestCase):
)

TEST_4BIT_CONFIGURATONS = [
("text-generation-with-past", "opt125m", "int4_sym_g128", 4, 144),
("text-generation-with-past", "opt125m", "int4_sym_g128", 4, 72),
("text-generation-with-past", "opt125m", "int4_asym_g128", 4, 144),
("text-generation-with-past", "opt125m", "int4_sym_g64", 4, 144),
("text-generation-with-past", "opt125m", "int4_sym_g64", 4, 72),
("text-generation-with-past", "opt125m", "int4_asym_g64", 4, 144),
(
"text-generation-with-past",
"llama_awq",
"int4 --ratio 1.0 --sym --group-size 8 --all-layers",
0,
32,
16,
),
(
"text-generation-with-past",
"llama_awq",
"int4 --ratio 1.0 --sym --group-size 16 --awq --dataset wikitext2 --num-samples 100 "
"--sensitivity-metric max_activation_variance",
4,
28,
14,
),
(
"text-generation-with-past",
"llama_awq",
"int4 --ratio 1.0 --sym --group-size 16 --scale-estimation --dataset wikitext2 --num-samples 100 ",
4,
28,
14,
),
]

Expand Down Expand Up @@ -253,9 +253,9 @@ def test_exporters_cli_int4(self, task: str, model_type: str, option: str, expec

def test_exporters_cli_int4_with_local_model_and_default_config(self):
with TemporaryDirectory() as tmpdir:
pt_model = AutoModelForCausalLM.from_pretrained(MODEL_NAMES["bloom"])
pt_model = AutoModelForCausalLM.from_pretrained(MODEL_NAMES["falcon-40b"])
# overload for matching with default configuration
pt_model.config._name_or_path = "bigscience/bloomz-7b1"
pt_model.config._name_or_path = "tiiuae/falcon-7b-instruct"
pt_model.save_pretrained(tmpdir)
subprocess.run(
f"optimum-cli export openvino --model {tmpdir} --task text-generation-with-past --weight-format int4 {tmpdir}",
Expand All @@ -267,7 +267,7 @@ def test_exporters_cli_int4_with_local_model_and_default_config(self):
rt_info = model.model.get_rt_info()
self.assertTrue("nncf" in rt_info)
self.assertTrue("weight_compression" in rt_info["nncf"])
default_config = _DEFAULT_4BIT_CONFIGS["bigscience/bloomz-7b1"]
default_config = _DEFAULT_4BIT_CONFIGS["tiiuae/falcon-7b-instruct"]
model_weight_compression_config = rt_info["nncf"]["weight_compression"]
sym = default_config.pop("sym", False)
bits = default_config.pop("bits", None)
Expand Down
8 changes: 4 additions & 4 deletions tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@

class OVQuantizerTest(unittest.TestCase):
SUPPORTED_ARCHITECTURES_TORCH_MODEL = (
(OVModelForSequenceClassification, "bert", 32 if is_transformers_version("<", "4.41.0") else 22, 35),
(OVModelForCausalLM, "gpt2", 41 if is_transformers_version("<", "4.42.0") else 21, 3),
(OVModelForSequenceClassification, "bert", 32, 35),
(OVModelForCausalLM, "gpt2", 41 if is_transformers_version("<", "4.42.0") else 31, 22),
)
SUPPORTED_ARCHITECTURES_OV_MODEL = (
(OVModelForSequenceClassification, "bert", 32, 35),
Expand Down Expand Up @@ -182,8 +182,8 @@ class OVWeightCompressionTest(unittest.TestCase):
(OVModelForCausalLM, "gpt2", 44, 44),
)

SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 62, 86),)
SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 0, 148),)
SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 62, 43),)
SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 0, 74),)
SUPPORTED_ARCHITECTURES_STATEFUL_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "gpt2", 44, 44),)

LOAD_IN_4_BITS_SCOPE = (
Expand Down
31 changes: 17 additions & 14 deletions tests/openvino/test_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,8 +280,11 @@ def tearDown(self):
shutil.rmtree(self.output_dir)


CUSTOMIZED_QUANTIZATION_CONFIG = deepcopy(DEFAULT_QUANTIZATION_CONFIG)
CUSTOMIZED_QUANTIZATION_CONFIG.update(
QUANTIZATION_CONFIG_FOR_BERT = deepcopy(DEFAULT_QUANTIZATION_CONFIG)
QUANTIZATION_CONFIG_FOR_BERT["ignored_scopes"].append("{re}.*scaled_dot_product_attention_0")

CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT = deepcopy(QUANTIZATION_CONFIG_FOR_BERT)
CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT.update(
{
"overflow_fix": "disable",
"initializer": {
Expand Down Expand Up @@ -325,30 +328,30 @@ def tearDown(self):
),
"default_quantization": OVTrainerTestDescriptor(
model_id=MODEL_NAMES["bert"],
nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG,
nncf_compression_config=QUANTIZATION_CONFIG_FOR_BERT,
expected_fake_quantize=22,
expected_int8=32,
compression_metrics=["compression_loss"],
),
"distillation,default_quantization": OVTrainerTestDescriptor(
model_id=MODEL_NAMES["bert"],
teacher_model_id=MODEL_NAMES["bert"],
nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG,
nncf_compression_config=QUANTIZATION_CONFIG_FOR_BERT,
expected_fake_quantize=22,
expected_int8=32,
compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
),
"customized_quantization": OVTrainerTestDescriptor(
model_id=MODEL_NAMES["bert"],
nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG,
nncf_compression_config=QUANTIZATION_CONFIG_FOR_BERT,
expected_fake_quantize=22,
expected_int8=32,
compression_metrics=["compression_loss"],
),
"distillation,customized_quantization": OVTrainerTestDescriptor(
model_id=MODEL_NAMES["bert"],
teacher_model_id=MODEL_NAMES["bert"],
nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG,
nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT,
expected_fake_quantize=22,
expected_int8=32,
compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
Expand All @@ -368,7 +371,7 @@ def tearDown(self):
),
"default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
model_id=MODEL_NAMES["bert"],
nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
nncf_compression_config=[QUANTIZATION_CONFIG_FOR_BERT, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
expected_fake_quantize=22,
expected_int8=32,
expected_binary_masks=60,
Expand All @@ -377,7 +380,7 @@ def tearDown(self):
"customized_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
model_id=MODEL_NAMES["bert"],
nncf_compression_config=[
CUSTOMIZED_QUANTIZATION_CONFIG,
CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT,
STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
],
expected_fake_quantize=22,
Expand All @@ -388,7 +391,7 @@ def tearDown(self):
"distillation,default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
model_id=MODEL_NAMES["bert"],
teacher_model_id=MODEL_NAMES["bert"],
nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
nncf_compression_config=[QUANTIZATION_CONFIG_FOR_BERT, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
expected_fake_quantize=22,
expected_int8=32,
expected_binary_masks=60,
Expand All @@ -398,7 +401,7 @@ def tearDown(self):
model_id=MODEL_NAMES["bert"],
teacher_model_id=MODEL_NAMES["bert"],
nncf_compression_config=[
CUSTOMIZED_QUANTIZATION_CONFIG,
CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT,
STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
],
expected_fake_quantize=22,
Expand All @@ -421,7 +424,7 @@ def tearDown(self):
),
"default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
model_id=MODEL_NAMES["bert"],
nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
nncf_compression_config=[QUANTIZATION_CONFIG_FOR_BERT, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
expected_fake_quantize=22,
expected_int8=32,
expected_binary_masks=60,
Expand All @@ -430,7 +433,7 @@ def tearDown(self):
"customized_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
model_id=MODEL_NAMES["bert"],
nncf_compression_config=[
CUSTOMIZED_QUANTIZATION_CONFIG,
CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT,
UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
],
expected_fake_quantize=22,
Expand All @@ -441,7 +444,7 @@ def tearDown(self):
"distillation,default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
model_id=MODEL_NAMES["bert"],
teacher_model_id=MODEL_NAMES["bert"],
nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
nncf_compression_config=[QUANTIZATION_CONFIG_FOR_BERT, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
expected_fake_quantize=22,
expected_int8=32,
expected_binary_masks=60,
Expand All @@ -451,7 +454,7 @@ def tearDown(self):
model_id=MODEL_NAMES["bert"],
teacher_model_id=MODEL_NAMES["bert"],
nncf_compression_config=[
CUSTOMIZED_QUANTIZATION_CONFIG,
CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT,
UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
],
expected_fake_quantize=22,
Expand Down

0 comments on commit a907bd8

Please sign in to comment.