From 46624c39145058ffa03ae0e9d957ee711fa4e0f8 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 1 Jul 2024 16:08:14 +0200 Subject: [PATCH 01/17] fix transformers v4.42.0 compatibility --- .github/workflows/test_openvino.yml | 2 +- optimum/intel/ipex/modeling_base.py | 1 + optimum/intel/openvino/modeling_base.py | 1 + setup.py | 4 ++-- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml index ff38fb41df..6f9675cde7 100644 --- a/.github/workflows/test_openvino.yml +++ b/.github/workflows/test_openvino.yml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: python-version: ["3.8", "3.12"] - transformers-version: ["4.36.0", "4.41.*"] + transformers-version: ["4.36.0", "4.42.*"] os: [ubuntu-latest] runs-on: ${{ matrix.os }} diff --git a/optimum/intel/ipex/modeling_base.py b/optimum/intel/ipex/modeling_base.py index 9f4c0d1056..58fe359877 100644 --- a/optimum/intel/ipex/modeling_base.py +++ b/optimum/intel/ipex/modeling_base.py @@ -127,6 +127,7 @@ class IPEXModel(OptimizedModel): base_model_prefix = "ipex_model" main_input_name = "input_ids" output_name = "last_hidden_state" + _supports_cache_class = False def __init__( self, diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index c33fcdd44e..9fbc73e856 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -50,6 +50,7 @@ class OVBaseModel(OptimizedModel): auto_model_class = None export_feature = None + _supports_cache_class = False def __init__( self, diff --git a/setup.py b/setup.py index 6c35a264bc..196591cfe0 100644 --- a/setup.py +++ b/setup.py @@ -28,8 +28,8 @@ INSTALL_REQUIRE = [ "torch>=1.11", - "transformers>=4.36.0,<4.42.0", - "optimum~=1.20", + "transformers>=4.36.0,<4.43.0", + "optimum @ git+https://github.com/huggingface/optimum.git@transformer-442", "datasets>=1.4.0", "sentencepiece", "setuptools", From 4104598336fe5afb3e64122a1ae1a7c9a2417e5a Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 1 Jul 2024 16:18:08 +0200 Subject: [PATCH 02/17] fix inc modeling --- optimum/intel/neural_compressor/modeling_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/intel/neural_compressor/modeling_base.py b/optimum/intel/neural_compressor/modeling_base.py index bb3d2fe8c8..a12cfc84e5 100644 --- a/optimum/intel/neural_compressor/modeling_base.py +++ b/optimum/intel/neural_compressor/modeling_base.py @@ -71,6 +71,7 @@ class INCModel(OptimizedModel): auto_model_class = AutoModel export_feature = "feature-extraction" base_model_prefix = "inc_model" + _supports_cache_class = False def __init__( self, From 7abc2b0ea2eff95613f0d61758cc770c82e4a3fc Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Tue, 2 Jul 2024 11:10:38 +0200 Subject: [PATCH 03/17] update setup --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 196591cfe0..b063b08c69 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ INSTALL_REQUIRE = [ "torch>=1.11", "transformers>=4.36.0,<4.43.0", - "optimum @ git+https://github.com/huggingface/optimum.git@transformer-442", + "optimum @ git+https://github.com/huggingface/optimum.git@fix-sentence-trfs", "datasets>=1.4.0", "sentencepiece", "setuptools", From e261d2cb3fc1f0869d52d240fadb473240ee7c57 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Tue, 2 Jul 2024 11:28:36 +0200 Subject: [PATCH 04/17] fix --- optimum/intel/generation/modeling.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/intel/generation/modeling.py b/optimum/intel/generation/modeling.py index 7d7e854311..22a4745f0c 100644 --- a/optimum/intel/generation/modeling.py +++ b/optimum/intel/generation/modeling.py @@ -90,6 +90,7 @@ class BaseModelForCausalLM(OptimizedModel, GenerationMixin): export_feature = "text-generation" main_input_name = "input_ids" base_model_prefix = "torch_script_model" + _supports_cache_class = False def __init__( self, From 1e28f4b1af7d9a40885fa55262fa547153a0ae79 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Tue, 2 Jul 2024 12:03:03 +0200 Subject: [PATCH 05/17] add missing argument --- optimum/intel/neural_compressor/trainer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/optimum/intel/neural_compressor/trainer.py b/optimum/intel/neural_compressor/trainer.py index ea9fe9a1cd..8a16b30c87 100644 --- a/optimum/intel/neural_compressor/trainer.py +++ b/optimum/intel/neural_compressor/trainer.py @@ -681,6 +681,7 @@ def _inner_training_loop( def save_model( self, output_dir: Optional[str] = None, + _internal_call: bool = False, save_onnx_model: bool = False, ): """ @@ -695,6 +696,7 @@ def save_model( output_dir=output_dir, save_onnx_model=save_onnx_model, ) + # TODO: push to hub if self.args.push_to_hub and not _internal_call def _save( self, From 20f5c37b4d217333e2c9149949b289ec8a52f25d Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Tue, 2 Jul 2024 15:01:01 +0200 Subject: [PATCH 06/17] fix patching --- optimum/exporters/openvino/model_patcher.py | 37 +++++++++++++-------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index d0aabfb2d5..21451d492e 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -1161,7 +1161,7 @@ def __exit__(self, exc_type, exc_value, traceback): block.attention.forward = block.attention._orig_forward -# Adapted from https://github.com/huggingface/transformers/blob/ccdabc5642bf84849af93f591e207dc625c8e1e1/src/transformers/models/phi3/modeling_phi3.py#L426 +# Adapted from https://github.com/huggingface/transformers/blob/ccdabc5642bf84849af93f591e207dc625c8e1e1/src/transformers/models/phi3/modeling_phi3.py#L729 def _phi3_self_attn_sdpa_forward( self, hidden_states: torch.Tensor, @@ -1170,6 +1170,7 @@ def _phi3_self_attn_sdpa_forward( past_key_value: Optional[Tuple[torch.Tensor]] = None, output_attentions: bool = False, use_cache: bool = False, + cache_position: Optional[torch.LongTensor] = None, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: if output_attentions: return self._orig_forward( @@ -1181,10 +1182,9 @@ def _phi3_self_attn_sdpa_forward( use_cache=use_cache, ) - # TO DO: remove llama imports when transformers with phi3 support will be released - try: + if is_transformers_version(">=", "4.41.0"): from transformers.models.phi3.modeling_phi3 import apply_rotary_pos_emb, repeat_kv - except ImportError: + else: from transformers.models.llama.modeling_llama import apply_rotary_pos_emb, repeat_kv bsz, q_len, _ = hidden_states.size() @@ -1206,17 +1206,15 @@ def _phi3_self_attn_sdpa_forward( query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids) if past_key_value is not None: - cache_kwargs = {"sin": sin, "cos": cos} # Specific to RoPE models + cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position} # Specific to RoPE models key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs) key_states = repeat_kv(key_states, self.num_key_value_groups) value_states = repeat_kv(value_states, self.num_key_value_groups) + causal_mask = attention_mask if attention_mask is not None: - if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): - raise ValueError( - f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}" - ) + causal_mask = attention_mask[:, :, :, : key_states.shape[-2]] # SDPA with memory-efficient backend is currently (torch==2.1.2) bugged with non-contiguous inputs with custom attn_mask, # Reference: https://github.com/pytorch/pytorch/issues/112577. @@ -1229,7 +1227,7 @@ def _phi3_self_attn_sdpa_forward( query_states, key_states, value_states, - attn_mask=attention_mask, + attn_mask=causal_mask, dropout_p=self.attention_dropout if self.training else 0.0, # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. is_causal=self.is_causal and attention_mask is None and q_len > 1, @@ -1561,7 +1559,7 @@ def __exit__(self, exc_type, exc_value, traceback): layer.attn._attn = layer.attn._orig_attn -# adapted from https://github.com/huggingface/transformers/blob/v4.40.2/src/transformers/models/dbrx/modeling_dbrx.py#L763 +# Adapted from https://github.com/huggingface/transformers/blob/v4.40.2/src/transformers/models/dbrx/modeling_dbrx.py#L763 def _dbrx_experts_forward( self, x: torch.Tensor, weights: torch.Tensor, top_weights: torch.Tensor, top_experts: torch.LongTensor ): @@ -1606,7 +1604,7 @@ def _dbrx_experts_forward( return out -# adapted from https://github.com/huggingface/transformers/blob/v4.40.2/src/transformers/models/dbrx/modeling_dbrx.py#L1228 +# Adapted from https://github.com/huggingface/transformers/blob/v4.40.2/src/transformers/models/dbrx/modeling_dbrx.py#L1228 def _dbrx_update_causal_mask_legacy( self, attention_mask: Optional[torch.Tensor], input_tensor: torch.Tensor, cache_position: torch.Tensor ) -> Optional[torch.Tensor]: @@ -1803,6 +1801,7 @@ def __exit__(self, exc_type, exc_value, traceback): block.ffn.experts.forward = block.ffn.experts._orig_forward +# Adapted from https://github.com/huggingface/transformers/blob/v4.41.0/src/transformers/models/persimmon/modeling_persimmon.py#L264 def _persimmon_self_attn_sdpa_forward( self, hidden_states: torch.Tensor, @@ -1811,6 +1810,7 @@ def _persimmon_self_attn_sdpa_forward( past_key_value: Optional["Cache"] = None, output_attentions: bool = False, use_cache: bool = False, + cache_position: Optional[torch.LongTensor] = None, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: from transformers.models.persimmon.modeling_persimmon import apply_rotary_pos_emb @@ -1865,14 +1865,23 @@ def _persimmon_self_attn_sdpa_forward( if past_key_value is not None: # Specific to RoPE models with partial rotation - cache_kwargs = {"sin": sin, "cos": cos, "partial_rotation_size": self.rotary_emb.dim} + cache_kwargs = { + "sin": sin, + "cos": cos, + "partial_rotation_size": self.rotary_emb.dim, + "cache_position": cache_position, + } key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs) + causal_mask = attention_mask + if attention_mask is not None: # no matter the length, we just slice it + causal_mask = attention_mask[:, :, :, : key_states.shape[-2]] + attn_output = F.scaled_dot_product_attention( query_states, key_states, value_states, - attention_mask, + causal_mask, scale=1 / math.sqrt(self.head_dim), dropout_p=self.attention_dropout.p, ) From e7c072f934e72414203c89e0047c30ad2108eb3e Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Tue, 2 Jul 2024 15:02:10 +0200 Subject: [PATCH 07/17] format --- optimum/exporters/openvino/model_patcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 21451d492e..7812682b8b 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -1876,7 +1876,7 @@ def _persimmon_self_attn_sdpa_forward( causal_mask = attention_mask if attention_mask is not None: # no matter the length, we just slice it causal_mask = attention_mask[:, :, :, : key_states.shape[-2]] - + attn_output = F.scaled_dot_product_attention( query_states, key_states, From 1b89adba844d338600c9fa271afcd2f09c428479 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Tue, 2 Jul 2024 16:44:43 +0200 Subject: [PATCH 08/17] fix num quant op --- tests/openvino/test_quantization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 67970fbbcb..0d9329ecdc 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -76,7 +76,7 @@ class OVQuantizerTest(unittest.TestCase): SUPPORTED_ARCHITECTURES_TORCH_MODEL = ( (OVModelForSequenceClassification, "bert", 22, 35), - (OVModelForCausalLM, "gpt2", 41, 3), + (OVModelForCausalLM, "gpt2", 21, 3), ) SUPPORTED_ARCHITECTURES_OV_MODEL = ( (OVModelForSequenceClassification, "bert", 32, 35), From f3b704e4194d54db3b6badc4f326098d02bc6e28 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Tue, 2 Jul 2024 16:48:08 +0200 Subject: [PATCH 09/17] remove incompatible transformers generation --- tests/openvino/test_modeling.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index c7a381a0e2..a4fbba6ca2 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -697,7 +697,6 @@ def test_compare_to_transformers(self, model_arch): ov_model = OVModelForCausalLM.from_pretrained(model_id, export=True, ov_config=F32_CONFIG, **model_kwargs) self.assertIsInstance(ov_model.config, PretrainedConfig) self.assertTrue(ov_model.use_cache) - self.assertEqual(ov_model.stateful, ov_model.config.model_type not in not_stateful) tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS) tokens = tokenizer("This is a sample output", return_tensors="pt") tokens.pop("token_type_ids", None) @@ -749,6 +748,11 @@ def test_compare_to_transformers(self, model_arch): ) ov_outputs = ov_model.generate(**tokens, generation_config=gen_config) + + # TODO: update _update_model_kwargs_for_generation so that it's compatibile with transformers >= v4.42.0 + if model_arch not in ["chatglm", "glm4"] and is_transformers_version(">=", "4.42.0"): + return + transformers_outputs = transformers_model.generate(**tokens, generation_config=gen_config) self.assertTrue(torch.allclose(ov_outputs, transformers_outputs)) From 9f6767a077f5500d61094b8afda4ab2b55d25576 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Tue, 2 Jul 2024 17:03:32 +0200 Subject: [PATCH 10/17] udpate setup --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b063b08c69..e475e59dca 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ INSTALL_REQUIRE = [ "torch>=1.11", "transformers>=4.36.0,<4.43.0", - "optimum @ git+https://github.com/huggingface/optimum.git@fix-sentence-trfs", + "optimum~=1.21", "datasets>=1.4.0", "sentencepiece", "setuptools", From 31c93103cfd6a581912f503fd363b2c5aeecd4f3 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Tue, 2 Jul 2024 18:25:13 +0200 Subject: [PATCH 11/17] fix op --- tests/openvino/test_quantization.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 0d9329ecdc..f0f18830e3 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -90,8 +90,10 @@ def test_automodel_static_quantization(self, model_cls, model_name, expected_fak dataset_name, dataset_config_name, column_name = _TASK_TO_DATASET[task] file_name = "openvino_quantized_model.xml" - if model_name == "bert" and is_transformers_version("<", "4.41.0"): + if is_transformers_version("<", "4.41.0") and model_name == "bert": expected_fake_quantize = 32 + if is_transformers_version("<", "4.42.0") and model_name == "gpt2": + expected_fake_quantize = 41 def preprocess_function(examples, tokenizer): return tokenizer(examples[column_name], padding="max_length", max_length=128, truncation=True) From abc14284ecb69da1206826e332d8a27d5e02444d Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 3 Jul 2024 11:54:37 +0200 Subject: [PATCH 12/17] fix test --- tests/openvino/test_training.py | 111 ++++++++++++++++---------------- tests/openvino/utils_tests.py | 1 + 2 files changed, 57 insertions(+), 55 deletions(-) diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py index 375fc6e4a1..475299e18a 100644 --- a/tests/openvino/test_training.py +++ b/tests/openvino/test_training.py @@ -58,6 +58,7 @@ from optimum.intel.openvino.utils import OV_XML_FILE_NAME from optimum.intel.utils.import_utils import is_transformers_version +from utils_tests import MODEL_NAMES F32_CONFIG = {"INFERENCE_PRECISION_HINT": "f32"} @@ -317,56 +318,56 @@ def tearDown(self): # TODO: Uncomment failes tests after NNCF 2.8.1 patch release OVTRAINER_TEXT_CLASSIFICATION_TEST_DESCRIPTORS = { "distillation": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", - teacher_model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], + teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=[], compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "default_quantization": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG, expected_fake_quantize=22, expected_int8=32, compression_metrics=["compression_loss"], ), "distillation,default_quantization": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", - teacher_model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], + teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG, expected_fake_quantize=22, expected_int8=32, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "customized_quantization": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG, expected_fake_quantize=22, expected_int8=32, compression_metrics=["compression_loss"], ), "distillation,customized_quantization": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", - teacher_model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], + teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG, expected_fake_quantize=22, expected_int8=32, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], nncf_compression_config=STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, expected_binary_masks=60, compression_metrics=["compression_loss"], ), "distillation,structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", - teacher_model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], + teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, expected_binary_masks=60, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], expected_fake_quantize=22, expected_int8=32, @@ -374,7 +375,7 @@ def tearDown(self): compression_metrics=["compression_loss"], ), "customized_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], nncf_compression_config=[ CUSTOMIZED_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, @@ -385,8 +386,8 @@ def tearDown(self): compression_metrics=["compression_loss"], ), "distillation,default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", - teacher_model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], + teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], expected_fake_quantize=22, expected_int8=32, @@ -394,8 +395,8 @@ def tearDown(self): compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "distillation,customized_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", - teacher_model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], + teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=[ CUSTOMIZED_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, @@ -406,20 +407,20 @@ def tearDown(self): compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], nncf_compression_config=UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, expected_binary_masks=60, compression_metrics=["compression_loss"], ), "distillation,unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", - teacher_model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], + teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, expected_binary_masks=60, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], expected_fake_quantize=22, expected_int8=32, @@ -427,7 +428,7 @@ def tearDown(self): compression_metrics=["compression_loss"], ), "customized_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], nncf_compression_config=[ CUSTOMIZED_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, @@ -438,8 +439,8 @@ def tearDown(self): compression_metrics=["compression_loss"], ), "distillation,default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", - teacher_model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], + teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], expected_fake_quantize=22, expected_int8=32, @@ -447,8 +448,8 @@ def tearDown(self): compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "distillation,customized_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", - teacher_model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], + teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=[ CUSTOMIZED_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, @@ -552,62 +553,62 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): } UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN = deepcopy(STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN) UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN["params"]["enable_structured_masking"] = False - OVTRAINER_IMAGE_CLASSIFICATION_TEST_DESCRIPTORS = { "default_quantization": OVTrainerTestDescriptor( - model_id="yujiepan/tiny-random-swin-patch4-window7-224", + model_id=MODEL_NAMES["swin"], nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG, - expected_fake_quantize=36, - expected_int8=28, + expected_fake_quantize=35, + expected_int8=27, compression_metrics=["compression_loss"], ), "structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="yujiepan/tiny-random-swin-patch4-window7-224", + model_id=MODEL_NAMES["swin"], nncf_compression_config=STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, expected_binary_masks=48, compression_metrics=["compression_loss"], ), "unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="yujiepan/tiny-random-swin-patch4-window7-224", + model_id=MODEL_NAMES["swin"], nncf_compression_config=UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, expected_binary_masks=48, compression_metrics=["compression_loss"], ), "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="yujiepan/tiny-random-swin-patch4-window7-224", + model_id=MODEL_NAMES["swin"], nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG], - expected_fake_quantize=36, - expected_int8=28, + expected_fake_quantize=35, + expected_int8=27, expected_binary_masks=48, compression_metrics=["compression_loss"], ), "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="yujiepan/tiny-random-swin-patch4-window7-224", + model_id=MODEL_NAMES["swin"], nncf_compression_config=[UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG], - expected_fake_quantize=36, - expected_int8=28, + expected_fake_quantize=35, + expected_int8=27, expected_binary_masks=48, compression_metrics=["compression_loss"], ), "distillation,default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="yujiepan/tiny-random-swin-patch4-window7-224", - teacher_model_id="yujiepan/tiny-random-swin-patch4-window7-224", + model_id=MODEL_NAMES["swin"], + teacher_model_id=MODEL_NAMES["swin"], nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG], - expected_fake_quantize=36, - expected_int8=28, + expected_fake_quantize=35, + expected_int8=27, expected_binary_masks=48, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "distillation,default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="yujiepan/tiny-random-swin-patch4-window7-224", - teacher_model_id="yujiepan/tiny-random-swin-patch4-window7-224", + model_id=MODEL_NAMES["swin"], + teacher_model_id=MODEL_NAMES["swin"], nncf_compression_config=[UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG], - expected_fake_quantize=36, - expected_int8=28, + expected_fake_quantize=35, + expected_int8=27, expected_binary_masks=48, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), } +# TODO : can be moved to MODEL_NAMES["swin-window"] after transformers v4.42.3 class OVTrainerImageClassificationTrainingTest(OVTrainerBaseTrainingTest): @@ -735,26 +736,26 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): OVTRAINER_AUDIO_CLASSIFICATION_TEST_DESCRIPTORS = { "quantization": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", + model_id=MODEL_NAMES["wav2vec2-hf"], nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2], expected_fake_quantize=40, expected_int8=30, compression_metrics=["compression_loss"], ), "structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", + model_id=MODEL_NAMES["wav2vec2-hf"] nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], expected_binary_masks=48, compression_metrics=["compression_loss"], ), "unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", + model_id=MODEL_NAMES["wav2vec2-hf"], nncf_compression_config=[UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], expected_binary_masks=48, compression_metrics=["compression_loss"], ), "quantization,structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", + model_id=MODEL_NAMES["wav2vec2-hf"], nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], expected_fake_quantize=40, expected_int8=30, @@ -762,7 +763,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): compression_metrics=["compression_loss"], ), "quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", + model_id=MODEL_NAMES["wav2vec2-hf"], nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], expected_fake_quantize=40, expected_int8=30, @@ -770,8 +771,8 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): compression_metrics=["compression_loss"], ), "distillation,quantization,structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", - teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", + model_id=MODEL_NAMES["wav2vec2-hf"], + teacher_model_id=MODEL_NAMES["wav2vec2-hf"], nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], expected_fake_quantize=40, expected_int8=30, @@ -779,8 +780,8 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "distillation,quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", - teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", + model_id=MODEL_NAMES["wav2vec2-hf"], + teacher_model_id=MODEL_NAMES["wav2vec2-hf"], nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], expected_fake_quantize=40, expected_int8=30, diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 590beefb36..1f9b051d7d 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -118,6 +118,7 @@ "sew_d": "asapp/sew-d-tiny-100k-ft-ls100h", "arctic": "katuni4ka/tiny-random-snowflake", "swin": "hf-internal-testing/tiny-random-SwinModel", + "swin-window": "yujiepan/tiny-random-swin-patch4-window7-224", "t5": "hf-internal-testing/tiny-random-t5", "trocr": "microsoft/trocr-small-handwritten", "unispeech": "hf-internal-testing/tiny-random-unispeech", From 5e9be54e56f1362d218f20c2dcdbc7fbacad5e07 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 3 Jul 2024 11:57:03 +0200 Subject: [PATCH 13/17] tmp for test --- .github/workflows/test_openvino.yml | 2 +- setup.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml index 6f9675cde7..c73b5ca3a8 100644 --- a/.github/workflows/test_openvino.yml +++ b/.github/workflows/test_openvino.yml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: python-version: ["3.8", "3.12"] - transformers-version: ["4.36.0", "4.42.*"] + transformers-version: ["4.36.0", "4.41.*","4.42.*"] os: [ubuntu-latest] runs-on: ${{ matrix.os }} diff --git a/setup.py b/setup.py index e475e59dca..7d958a8a0b 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,8 @@ INSTALL_REQUIRE = [ "torch>=1.11", "transformers>=4.36.0,<4.43.0", - "optimum~=1.21", + "optimum @ git+https://github.com/huggingface/optimum.git", + #"optimum>=1.21.2,<1.22.0", "datasets>=1.4.0", "sentencepiece", "setuptools", From 4ee9c995a2a81b9396fdd2194f5a2a558ad11aa5 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 3 Jul 2024 12:13:35 +0200 Subject: [PATCH 14/17] format --- setup.py | 2 +- tests/openvino/test_training.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 7d958a8a0b..f4d9997298 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ "torch>=1.11", "transformers>=4.36.0,<4.43.0", "optimum @ git+https://github.com/huggingface/optimum.git", - #"optimum>=1.21.2,<1.22.0", + # "optimum>=1.21.2,<1.22.0", "datasets>=1.4.0", "sentencepiece", "setuptools", diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py index 475299e18a..9e85274454 100644 --- a/tests/openvino/test_training.py +++ b/tests/openvino/test_training.py @@ -45,6 +45,7 @@ from transformers.testing_utils import slow from transformers.trainer_utils import EvalPrediction, TrainOutput from transformers.utils import WEIGHTS_NAME +from utils_tests import MODEL_NAMES from optimum.intel.openvino import OVTrainingArguments from optimum.intel.openvino.configuration import OVConfig @@ -58,7 +59,6 @@ from optimum.intel.openvino.utils import OV_XML_FILE_NAME from optimum.intel.utils.import_utils import is_transformers_version -from utils_tests import MODEL_NAMES F32_CONFIG = {"INFERENCE_PRECISION_HINT": "f32"} @@ -743,7 +743,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): compression_metrics=["compression_loss"], ), "structured_movement_sparsity": OVTrainerTestDescriptor( - model_id=MODEL_NAMES["wav2vec2-hf"] + model_id=MODEL_NAMES["wav2vec2-hf"], nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], expected_binary_masks=48, compression_metrics=["compression_loss"], From d8fed913ea39cd82b3b38566bc56fc48acd6beb1 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 4 Jul 2024 10:46:27 +0200 Subject: [PATCH 15/17] udpate test --- .github/workflows/test_openvino.yml | 2 +- tests/openvino/test_modeling.py | 5 ----- tests/openvino/test_quantization.py | 9 ++------- 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml index c73b5ca3a8..1ab2d5296f 100644 --- a/.github/workflows/test_openvino.yml +++ b/.github/workflows/test_openvino.yml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: python-version: ["3.8", "3.12"] - transformers-version: ["4.36.0", "4.41.*","4.42.*"] + transformers-version: ["4.36.0","4.42.*"] os: [ubuntu-latest] runs-on: ${{ matrix.os }} diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index a4fbba6ca2..bdc1e9afee 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -748,11 +748,6 @@ def test_compare_to_transformers(self, model_arch): ) ov_outputs = ov_model.generate(**tokens, generation_config=gen_config) - - # TODO: update _update_model_kwargs_for_generation so that it's compatibile with transformers >= v4.42.0 - if model_arch not in ["chatglm", "glm4"] and is_transformers_version(">=", "4.42.0"): - return - transformers_outputs = transformers_model.generate(**tokens, generation_config=gen_config) self.assertTrue(torch.allclose(ov_outputs, transformers_outputs)) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index f0f18830e3..1f71c9a9ed 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -75,8 +75,8 @@ class OVQuantizerTest(unittest.TestCase): SUPPORTED_ARCHITECTURES_TORCH_MODEL = ( - (OVModelForSequenceClassification, "bert", 22, 35), - (OVModelForCausalLM, "gpt2", 21, 3), + (OVModelForSequenceClassification, "bert", 32 if is_transformers_version("<", "4.41.0") else 22, 35), + (OVModelForCausalLM, "gpt2", 41 if is_transformers_version("<", "4.42.0") else 21, 3), ) SUPPORTED_ARCHITECTURES_OV_MODEL = ( (OVModelForSequenceClassification, "bert", 32, 35), @@ -90,11 +90,6 @@ def test_automodel_static_quantization(self, model_cls, model_name, expected_fak dataset_name, dataset_config_name, column_name = _TASK_TO_DATASET[task] file_name = "openvino_quantized_model.xml" - if is_transformers_version("<", "4.41.0") and model_name == "bert": - expected_fake_quantize = 32 - if is_transformers_version("<", "4.42.0") and model_name == "gpt2": - expected_fake_quantize = 41 - def preprocess_function(examples, tokenizer): return tokenizer(examples[column_name], padding="max_length", max_length=128, truncation=True) From 65e1b040645840f3c008ac1daac0bca282b73900 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 4 Jul 2024 11:38:21 +0200 Subject: [PATCH 16/17] update setup --- .github/workflows/test_openvino.yml | 2 +- setup.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml index 1ab2d5296f..6f9675cde7 100644 --- a/.github/workflows/test_openvino.yml +++ b/.github/workflows/test_openvino.yml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: python-version: ["3.8", "3.12"] - transformers-version: ["4.36.0","4.42.*"] + transformers-version: ["4.36.0", "4.42.*"] os: [ubuntu-latest] runs-on: ${{ matrix.os }} diff --git a/setup.py b/setup.py index f4d9997298..8abcbfaf08 100644 --- a/setup.py +++ b/setup.py @@ -29,8 +29,7 @@ INSTALL_REQUIRE = [ "torch>=1.11", "transformers>=4.36.0,<4.43.0", - "optimum @ git+https://github.com/huggingface/optimum.git", - # "optimum>=1.21.2,<1.22.0", + "optimum>=1.21.2,<1.22.0", "datasets>=1.4.0", "sentencepiece", "setuptools", From 52c3fc07ca5e114ab22698ed3f47de83181960c3 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 5 Jul 2024 11:13:13 +0200 Subject: [PATCH 17/17] update setup --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8abcbfaf08..a1c83259a6 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,8 @@ INSTALL_REQUIRE = [ "torch>=1.11", "transformers>=4.36.0,<4.43.0", - "optimum>=1.21.2,<1.22.0", + "optimum~=1.21", + # "optimum>=1.21.2,<1.22.0", "datasets>=1.4.0", "sentencepiece", "setuptools",