diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml index ff38fb41df..6f9675cde7 100644 --- a/.github/workflows/test_openvino.yml +++ b/.github/workflows/test_openvino.yml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: python-version: ["3.8", "3.12"] - transformers-version: ["4.36.0", "4.41.*"] + transformers-version: ["4.36.0", "4.42.*"] os: [ubuntu-latest] runs-on: ${{ matrix.os }} diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index d0aabfb2d5..7812682b8b 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -1161,7 +1161,7 @@ def __exit__(self, exc_type, exc_value, traceback): block.attention.forward = block.attention._orig_forward -# Adapted from https://github.com/huggingface/transformers/blob/ccdabc5642bf84849af93f591e207dc625c8e1e1/src/transformers/models/phi3/modeling_phi3.py#L426 +# Adapted from https://github.com/huggingface/transformers/blob/ccdabc5642bf84849af93f591e207dc625c8e1e1/src/transformers/models/phi3/modeling_phi3.py#L729 def _phi3_self_attn_sdpa_forward( self, hidden_states: torch.Tensor, @@ -1170,6 +1170,7 @@ def _phi3_self_attn_sdpa_forward( past_key_value: Optional[Tuple[torch.Tensor]] = None, output_attentions: bool = False, use_cache: bool = False, + cache_position: Optional[torch.LongTensor] = None, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: if output_attentions: return self._orig_forward( @@ -1181,10 +1182,9 @@ def _phi3_self_attn_sdpa_forward( use_cache=use_cache, ) - # TO DO: remove llama imports when transformers with phi3 support will be released - try: + if is_transformers_version(">=", "4.41.0"): from transformers.models.phi3.modeling_phi3 import apply_rotary_pos_emb, repeat_kv - except ImportError: + else: from transformers.models.llama.modeling_llama import apply_rotary_pos_emb, repeat_kv bsz, q_len, _ = hidden_states.size() @@ -1206,17 +1206,15 @@ def _phi3_self_attn_sdpa_forward( query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids) if past_key_value is not None: - cache_kwargs = {"sin": sin, "cos": cos} # Specific to RoPE models + cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position} # Specific to RoPE models key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs) key_states = repeat_kv(key_states, self.num_key_value_groups) value_states = repeat_kv(value_states, self.num_key_value_groups) + causal_mask = attention_mask if attention_mask is not None: - if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): - raise ValueError( - f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}" - ) + causal_mask = attention_mask[:, :, :, : key_states.shape[-2]] # SDPA with memory-efficient backend is currently (torch==2.1.2) bugged with non-contiguous inputs with custom attn_mask, # Reference: https://github.com/pytorch/pytorch/issues/112577. @@ -1229,7 +1227,7 @@ def _phi3_self_attn_sdpa_forward( query_states, key_states, value_states, - attn_mask=attention_mask, + attn_mask=causal_mask, dropout_p=self.attention_dropout if self.training else 0.0, # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. is_causal=self.is_causal and attention_mask is None and q_len > 1, @@ -1561,7 +1559,7 @@ def __exit__(self, exc_type, exc_value, traceback): layer.attn._attn = layer.attn._orig_attn -# adapted from https://github.com/huggingface/transformers/blob/v4.40.2/src/transformers/models/dbrx/modeling_dbrx.py#L763 +# Adapted from https://github.com/huggingface/transformers/blob/v4.40.2/src/transformers/models/dbrx/modeling_dbrx.py#L763 def _dbrx_experts_forward( self, x: torch.Tensor, weights: torch.Tensor, top_weights: torch.Tensor, top_experts: torch.LongTensor ): @@ -1606,7 +1604,7 @@ def _dbrx_experts_forward( return out -# adapted from https://github.com/huggingface/transformers/blob/v4.40.2/src/transformers/models/dbrx/modeling_dbrx.py#L1228 +# Adapted from https://github.com/huggingface/transformers/blob/v4.40.2/src/transformers/models/dbrx/modeling_dbrx.py#L1228 def _dbrx_update_causal_mask_legacy( self, attention_mask: Optional[torch.Tensor], input_tensor: torch.Tensor, cache_position: torch.Tensor ) -> Optional[torch.Tensor]: @@ -1803,6 +1801,7 @@ def __exit__(self, exc_type, exc_value, traceback): block.ffn.experts.forward = block.ffn.experts._orig_forward +# Adapted from https://github.com/huggingface/transformers/blob/v4.41.0/src/transformers/models/persimmon/modeling_persimmon.py#L264 def _persimmon_self_attn_sdpa_forward( self, hidden_states: torch.Tensor, @@ -1811,6 +1810,7 @@ def _persimmon_self_attn_sdpa_forward( past_key_value: Optional["Cache"] = None, output_attentions: bool = False, use_cache: bool = False, + cache_position: Optional[torch.LongTensor] = None, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: from transformers.models.persimmon.modeling_persimmon import apply_rotary_pos_emb @@ -1865,14 +1865,23 @@ def _persimmon_self_attn_sdpa_forward( if past_key_value is not None: # Specific to RoPE models with partial rotation - cache_kwargs = {"sin": sin, "cos": cos, "partial_rotation_size": self.rotary_emb.dim} + cache_kwargs = { + "sin": sin, + "cos": cos, + "partial_rotation_size": self.rotary_emb.dim, + "cache_position": cache_position, + } key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs) + causal_mask = attention_mask + if attention_mask is not None: # no matter the length, we just slice it + causal_mask = attention_mask[:, :, :, : key_states.shape[-2]] + attn_output = F.scaled_dot_product_attention( query_states, key_states, value_states, - attention_mask, + causal_mask, scale=1 / math.sqrt(self.head_dim), dropout_p=self.attention_dropout.p, ) diff --git a/optimum/intel/generation/modeling.py b/optimum/intel/generation/modeling.py index 054ef44bfe..8b1bbaa266 100644 --- a/optimum/intel/generation/modeling.py +++ b/optimum/intel/generation/modeling.py @@ -90,6 +90,7 @@ class BaseModelForCausalLM(OptimizedModel, GenerationMixin): export_feature = "text-generation" main_input_name = "input_ids" base_model_prefix = "torch_script_model" + _supports_cache_class = False def __init__( self, diff --git a/optimum/intel/ipex/modeling_base.py b/optimum/intel/ipex/modeling_base.py index 000ccfa3fe..5a53445469 100644 --- a/optimum/intel/ipex/modeling_base.py +++ b/optimum/intel/ipex/modeling_base.py @@ -128,6 +128,7 @@ class IPEXModel(OptimizedModel): base_model_prefix = "ipex_model" main_input_name = "input_ids" output_name = "last_hidden_state" + _supports_cache_class = False def __init__( self, diff --git a/optimum/intel/neural_compressor/modeling_base.py b/optimum/intel/neural_compressor/modeling_base.py index bb3d2fe8c8..a12cfc84e5 100644 --- a/optimum/intel/neural_compressor/modeling_base.py +++ b/optimum/intel/neural_compressor/modeling_base.py @@ -71,6 +71,7 @@ class INCModel(OptimizedModel): auto_model_class = AutoModel export_feature = "feature-extraction" base_model_prefix = "inc_model" + _supports_cache_class = False def __init__( self, diff --git a/optimum/intel/neural_compressor/trainer.py b/optimum/intel/neural_compressor/trainer.py index ff6dd95947..e4be3ca2dc 100644 --- a/optimum/intel/neural_compressor/trainer.py +++ b/optimum/intel/neural_compressor/trainer.py @@ -682,6 +682,7 @@ def _inner_training_loop( def save_model( self, output_dir: Optional[str] = None, + _internal_call: bool = False, save_onnx_model: bool = False, ): """ @@ -696,6 +697,7 @@ def save_model( output_dir=output_dir, save_onnx_model=save_onnx_model, ) + # TODO: push to hub if self.args.push_to_hub and not _internal_call def _save( self, diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index c33fcdd44e..9fbc73e856 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -50,6 +50,7 @@ class OVBaseModel(OptimizedModel): auto_model_class = None export_feature = None + _supports_cache_class = False def __init__( self, diff --git a/setup.py b/setup.py index 6c35a264bc..a1c83259a6 100644 --- a/setup.py +++ b/setup.py @@ -28,8 +28,9 @@ INSTALL_REQUIRE = [ "torch>=1.11", - "transformers>=4.36.0,<4.42.0", - "optimum~=1.20", + "transformers>=4.36.0,<4.43.0", + "optimum~=1.21", + # "optimum>=1.21.2,<1.22.0", "datasets>=1.4.0", "sentencepiece", "setuptools", diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index c7a381a0e2..bdc1e9afee 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -697,7 +697,6 @@ def test_compare_to_transformers(self, model_arch): ov_model = OVModelForCausalLM.from_pretrained(model_id, export=True, ov_config=F32_CONFIG, **model_kwargs) self.assertIsInstance(ov_model.config, PretrainedConfig) self.assertTrue(ov_model.use_cache) - self.assertEqual(ov_model.stateful, ov_model.config.model_type not in not_stateful) tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS) tokens = tokenizer("This is a sample output", return_tensors="pt") tokens.pop("token_type_ids", None) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 67970fbbcb..1f71c9a9ed 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -75,8 +75,8 @@ class OVQuantizerTest(unittest.TestCase): SUPPORTED_ARCHITECTURES_TORCH_MODEL = ( - (OVModelForSequenceClassification, "bert", 22, 35), - (OVModelForCausalLM, "gpt2", 41, 3), + (OVModelForSequenceClassification, "bert", 32 if is_transformers_version("<", "4.41.0") else 22, 35), + (OVModelForCausalLM, "gpt2", 41 if is_transformers_version("<", "4.42.0") else 21, 3), ) SUPPORTED_ARCHITECTURES_OV_MODEL = ( (OVModelForSequenceClassification, "bert", 32, 35), @@ -90,9 +90,6 @@ def test_automodel_static_quantization(self, model_cls, model_name, expected_fak dataset_name, dataset_config_name, column_name = _TASK_TO_DATASET[task] file_name = "openvino_quantized_model.xml" - if model_name == "bert" and is_transformers_version("<", "4.41.0"): - expected_fake_quantize = 32 - def preprocess_function(examples, tokenizer): return tokenizer(examples[column_name], padding="max_length", max_length=128, truncation=True) diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py index 375fc6e4a1..9e85274454 100644 --- a/tests/openvino/test_training.py +++ b/tests/openvino/test_training.py @@ -45,6 +45,7 @@ from transformers.testing_utils import slow from transformers.trainer_utils import EvalPrediction, TrainOutput from transformers.utils import WEIGHTS_NAME +from utils_tests import MODEL_NAMES from optimum.intel.openvino import OVTrainingArguments from optimum.intel.openvino.configuration import OVConfig @@ -317,56 +318,56 @@ def tearDown(self): # TODO: Uncomment failes tests after NNCF 2.8.1 patch release OVTRAINER_TEXT_CLASSIFICATION_TEST_DESCRIPTORS = { "distillation": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", - teacher_model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], + teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=[], compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "default_quantization": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG, expected_fake_quantize=22, expected_int8=32, compression_metrics=["compression_loss"], ), "distillation,default_quantization": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", - teacher_model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], + teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG, expected_fake_quantize=22, expected_int8=32, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "customized_quantization": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG, expected_fake_quantize=22, expected_int8=32, compression_metrics=["compression_loss"], ), "distillation,customized_quantization": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", - teacher_model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], + teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG, expected_fake_quantize=22, expected_int8=32, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], nncf_compression_config=STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, expected_binary_masks=60, compression_metrics=["compression_loss"], ), "distillation,structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", - teacher_model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], + teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, expected_binary_masks=60, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], expected_fake_quantize=22, expected_int8=32, @@ -374,7 +375,7 @@ def tearDown(self): compression_metrics=["compression_loss"], ), "customized_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], nncf_compression_config=[ CUSTOMIZED_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, @@ -385,8 +386,8 @@ def tearDown(self): compression_metrics=["compression_loss"], ), "distillation,default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", - teacher_model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], + teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], expected_fake_quantize=22, expected_int8=32, @@ -394,8 +395,8 @@ def tearDown(self): compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "distillation,customized_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", - teacher_model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], + teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=[ CUSTOMIZED_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, @@ -406,20 +407,20 @@ def tearDown(self): compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], nncf_compression_config=UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, expected_binary_masks=60, compression_metrics=["compression_loss"], ), "distillation,unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", - teacher_model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], + teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, expected_binary_masks=60, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], expected_fake_quantize=22, expected_int8=32, @@ -427,7 +428,7 @@ def tearDown(self): compression_metrics=["compression_loss"], ), "customized_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], nncf_compression_config=[ CUSTOMIZED_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, @@ -438,8 +439,8 @@ def tearDown(self): compression_metrics=["compression_loss"], ), "distillation,default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", - teacher_model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], + teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], expected_fake_quantize=22, expected_int8=32, @@ -447,8 +448,8 @@ def tearDown(self): compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "distillation,customized_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-bert", - teacher_model_id="hf-internal-testing/tiny-random-bert", + model_id=MODEL_NAMES["bert"], + teacher_model_id=MODEL_NAMES["bert"], nncf_compression_config=[ CUSTOMIZED_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, @@ -552,62 +553,62 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): } UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN = deepcopy(STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN) UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN["params"]["enable_structured_masking"] = False - OVTRAINER_IMAGE_CLASSIFICATION_TEST_DESCRIPTORS = { "default_quantization": OVTrainerTestDescriptor( - model_id="yujiepan/tiny-random-swin-patch4-window7-224", + model_id=MODEL_NAMES["swin"], nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG, - expected_fake_quantize=36, - expected_int8=28, + expected_fake_quantize=35, + expected_int8=27, compression_metrics=["compression_loss"], ), "structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="yujiepan/tiny-random-swin-patch4-window7-224", + model_id=MODEL_NAMES["swin"], nncf_compression_config=STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, expected_binary_masks=48, compression_metrics=["compression_loss"], ), "unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="yujiepan/tiny-random-swin-patch4-window7-224", + model_id=MODEL_NAMES["swin"], nncf_compression_config=UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, expected_binary_masks=48, compression_metrics=["compression_loss"], ), "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="yujiepan/tiny-random-swin-patch4-window7-224", + model_id=MODEL_NAMES["swin"], nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG], - expected_fake_quantize=36, - expected_int8=28, + expected_fake_quantize=35, + expected_int8=27, expected_binary_masks=48, compression_metrics=["compression_loss"], ), "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="yujiepan/tiny-random-swin-patch4-window7-224", + model_id=MODEL_NAMES["swin"], nncf_compression_config=[UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG], - expected_fake_quantize=36, - expected_int8=28, + expected_fake_quantize=35, + expected_int8=27, expected_binary_masks=48, compression_metrics=["compression_loss"], ), "distillation,default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="yujiepan/tiny-random-swin-patch4-window7-224", - teacher_model_id="yujiepan/tiny-random-swin-patch4-window7-224", + model_id=MODEL_NAMES["swin"], + teacher_model_id=MODEL_NAMES["swin"], nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG], - expected_fake_quantize=36, - expected_int8=28, + expected_fake_quantize=35, + expected_int8=27, expected_binary_masks=48, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "distillation,default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="yujiepan/tiny-random-swin-patch4-window7-224", - teacher_model_id="yujiepan/tiny-random-swin-patch4-window7-224", + model_id=MODEL_NAMES["swin"], + teacher_model_id=MODEL_NAMES["swin"], nncf_compression_config=[UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG], - expected_fake_quantize=36, - expected_int8=28, + expected_fake_quantize=35, + expected_int8=27, expected_binary_masks=48, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), } +# TODO : can be moved to MODEL_NAMES["swin-window"] after transformers v4.42.3 class OVTrainerImageClassificationTrainingTest(OVTrainerBaseTrainingTest): @@ -735,26 +736,26 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): OVTRAINER_AUDIO_CLASSIFICATION_TEST_DESCRIPTORS = { "quantization": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", + model_id=MODEL_NAMES["wav2vec2-hf"], nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2], expected_fake_quantize=40, expected_int8=30, compression_metrics=["compression_loss"], ), "structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", + model_id=MODEL_NAMES["wav2vec2-hf"], nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], expected_binary_masks=48, compression_metrics=["compression_loss"], ), "unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", + model_id=MODEL_NAMES["wav2vec2-hf"], nncf_compression_config=[UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], expected_binary_masks=48, compression_metrics=["compression_loss"], ), "quantization,structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", + model_id=MODEL_NAMES["wav2vec2-hf"], nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], expected_fake_quantize=40, expected_int8=30, @@ -762,7 +763,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): compression_metrics=["compression_loss"], ), "quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", + model_id=MODEL_NAMES["wav2vec2-hf"], nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], expected_fake_quantize=40, expected_int8=30, @@ -770,8 +771,8 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): compression_metrics=["compression_loss"], ), "distillation,quantization,structured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", - teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", + model_id=MODEL_NAMES["wav2vec2-hf"], + teacher_model_id=MODEL_NAMES["wav2vec2-hf"], nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], expected_fake_quantize=40, expected_int8=30, @@ -779,8 +780,8 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "distillation,quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( - model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", - teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", + model_id=MODEL_NAMES["wav2vec2-hf"], + teacher_model_id=MODEL_NAMES["wav2vec2-hf"], nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], expected_fake_quantize=40, expected_int8=30, diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 590beefb36..1f9b051d7d 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -118,6 +118,7 @@ "sew_d": "asapp/sew-d-tiny-100k-ft-ls100h", "arctic": "katuni4ka/tiny-random-snowflake", "swin": "hf-internal-testing/tiny-random-SwinModel", + "swin-window": "yujiepan/tiny-random-swin-patch4-window7-224", "t5": "hf-internal-testing/tiny-random-t5", "trocr": "microsoft/trocr-small-handwritten", "unispeech": "hf-internal-testing/tiny-random-unispeech",