From 46624c39145058ffa03ae0e9d957ee711fa4e0f8 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Mon, 1 Jul 2024 16:08:14 +0200
Subject: [PATCH 01/17] fix transformers v4.42.0 compatibility

---
 .github/workflows/test_openvino.yml     | 2 +-
 optimum/intel/ipex/modeling_base.py     | 1 +
 optimum/intel/openvino/modeling_base.py | 1 +
 setup.py                                | 4 ++--
 4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml
index ff38fb41df..6f9675cde7 100644
--- a/.github/workflows/test_openvino.yml
+++ b/.github/workflows/test_openvino.yml
@@ -21,7 +21,7 @@ jobs:
       fail-fast: false
       matrix:
         python-version: ["3.8", "3.12"]
-        transformers-version: ["4.36.0", "4.41.*"]
+        transformers-version: ["4.36.0", "4.42.*"]
         os: [ubuntu-latest]
 
     runs-on: ${{ matrix.os }}
diff --git a/optimum/intel/ipex/modeling_base.py b/optimum/intel/ipex/modeling_base.py
index 9f4c0d1056..58fe359877 100644
--- a/optimum/intel/ipex/modeling_base.py
+++ b/optimum/intel/ipex/modeling_base.py
@@ -127,6 +127,7 @@ class IPEXModel(OptimizedModel):
     base_model_prefix = "ipex_model"
     main_input_name = "input_ids"
     output_name = "last_hidden_state"
+    _supports_cache_class = False
 
     def __init__(
         self,
diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py
index c33fcdd44e..9fbc73e856 100644
--- a/optimum/intel/openvino/modeling_base.py
+++ b/optimum/intel/openvino/modeling_base.py
@@ -50,6 +50,7 @@
 class OVBaseModel(OptimizedModel):
     auto_model_class = None
     export_feature = None
+    _supports_cache_class = False
 
     def __init__(
         self,
diff --git a/setup.py b/setup.py
index 6c35a264bc..196591cfe0 100644
--- a/setup.py
+++ b/setup.py
@@ -28,8 +28,8 @@
 
 INSTALL_REQUIRE = [
     "torch>=1.11",
-    "transformers>=4.36.0,<4.42.0",
-    "optimum~=1.20",
+    "transformers>=4.36.0,<4.43.0",
+    "optimum @ git+https://github.com/huggingface/optimum.git@transformer-442",
     "datasets>=1.4.0",
     "sentencepiece",
     "setuptools",

From 4104598336fe5afb3e64122a1ae1a7c9a2417e5a Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Mon, 1 Jul 2024 16:18:08 +0200
Subject: [PATCH 02/17] fix inc modeling

---
 optimum/intel/neural_compressor/modeling_base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/optimum/intel/neural_compressor/modeling_base.py b/optimum/intel/neural_compressor/modeling_base.py
index bb3d2fe8c8..a12cfc84e5 100644
--- a/optimum/intel/neural_compressor/modeling_base.py
+++ b/optimum/intel/neural_compressor/modeling_base.py
@@ -71,6 +71,7 @@ class INCModel(OptimizedModel):
     auto_model_class = AutoModel
     export_feature = "feature-extraction"
     base_model_prefix = "inc_model"
+    _supports_cache_class = False
 
     def __init__(
         self,

From 7abc2b0ea2eff95613f0d61758cc770c82e4a3fc Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Tue, 2 Jul 2024 11:10:38 +0200
Subject: [PATCH 03/17] update setup

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 196591cfe0..b063b08c69 100644
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,7 @@
 INSTALL_REQUIRE = [
     "torch>=1.11",
     "transformers>=4.36.0,<4.43.0",
-    "optimum @ git+https://github.com/huggingface/optimum.git@transformer-442",
+    "optimum @ git+https://github.com/huggingface/optimum.git@fix-sentence-trfs",
     "datasets>=1.4.0",
     "sentencepiece",
     "setuptools",

From e261d2cb3fc1f0869d52d240fadb473240ee7c57 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Tue, 2 Jul 2024 11:28:36 +0200
Subject: [PATCH 04/17] fix

---
 optimum/intel/generation/modeling.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/optimum/intel/generation/modeling.py b/optimum/intel/generation/modeling.py
index 7d7e854311..22a4745f0c 100644
--- a/optimum/intel/generation/modeling.py
+++ b/optimum/intel/generation/modeling.py
@@ -90,6 +90,7 @@ class BaseModelForCausalLM(OptimizedModel, GenerationMixin):
     export_feature = "text-generation"
     main_input_name = "input_ids"
     base_model_prefix = "torch_script_model"
+    _supports_cache_class = False
 
     def __init__(
         self,

From 1e28f4b1af7d9a40885fa55262fa547153a0ae79 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Tue, 2 Jul 2024 12:03:03 +0200
Subject: [PATCH 05/17] add missing argument

---
 optimum/intel/neural_compressor/trainer.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/optimum/intel/neural_compressor/trainer.py b/optimum/intel/neural_compressor/trainer.py
index ea9fe9a1cd..8a16b30c87 100644
--- a/optimum/intel/neural_compressor/trainer.py
+++ b/optimum/intel/neural_compressor/trainer.py
@@ -681,6 +681,7 @@ def _inner_training_loop(
     def save_model(
         self,
         output_dir: Optional[str] = None,
+        _internal_call: bool = False,
         save_onnx_model: bool = False,
     ):
         """
@@ -695,6 +696,7 @@ def save_model(
                 output_dir=output_dir,
                 save_onnx_model=save_onnx_model,
             )
+        # TODO: push to hub if self.args.push_to_hub and not _internal_call
 
     def _save(
         self,

From 20f5c37b4d217333e2c9149949b289ec8a52f25d Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Tue, 2 Jul 2024 15:01:01 +0200
Subject: [PATCH 06/17] fix patching

---
 optimum/exporters/openvino/model_patcher.py | 37 +++++++++++++--------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py
index d0aabfb2d5..21451d492e 100644
--- a/optimum/exporters/openvino/model_patcher.py
+++ b/optimum/exporters/openvino/model_patcher.py
@@ -1161,7 +1161,7 @@ def __exit__(self, exc_type, exc_value, traceback):
                 block.attention.forward = block.attention._orig_forward
 
 
-# Adapted from https://github.com/huggingface/transformers/blob/ccdabc5642bf84849af93f591e207dc625c8e1e1/src/transformers/models/phi3/modeling_phi3.py#L426
+# Adapted from https://github.com/huggingface/transformers/blob/ccdabc5642bf84849af93f591e207dc625c8e1e1/src/transformers/models/phi3/modeling_phi3.py#L729
 def _phi3_self_attn_sdpa_forward(
     self,
     hidden_states: torch.Tensor,
@@ -1170,6 +1170,7 @@ def _phi3_self_attn_sdpa_forward(
     past_key_value: Optional[Tuple[torch.Tensor]] = None,
     output_attentions: bool = False,
     use_cache: bool = False,
+    cache_position: Optional[torch.LongTensor] = None,
 ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
     if output_attentions:
         return self._orig_forward(
@@ -1181,10 +1182,9 @@ def _phi3_self_attn_sdpa_forward(
             use_cache=use_cache,
         )
 
-    # TO DO: remove llama imports when transformers with phi3 support will be released
-    try:
+    if is_transformers_version(">=", "4.41.0"):
         from transformers.models.phi3.modeling_phi3 import apply_rotary_pos_emb, repeat_kv
-    except ImportError:
+    else:
         from transformers.models.llama.modeling_llama import apply_rotary_pos_emb, repeat_kv
 
     bsz, q_len, _ = hidden_states.size()
@@ -1206,17 +1206,15 @@ def _phi3_self_attn_sdpa_forward(
     query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
 
     if past_key_value is not None:
-        cache_kwargs = {"sin": sin, "cos": cos}  # Specific to RoPE models
+        cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}  # Specific to RoPE models
         key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
 
     key_states = repeat_kv(key_states, self.num_key_value_groups)
     value_states = repeat_kv(value_states, self.num_key_value_groups)
 
+    causal_mask = attention_mask
     if attention_mask is not None:
-        if attention_mask.size() != (bsz, 1, q_len, kv_seq_len):
-            raise ValueError(
-                f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}"
-            )
+        causal_mask = attention_mask[:, :, :, : key_states.shape[-2]]
 
     # SDPA with memory-efficient backend is currently (torch==2.1.2) bugged with non-contiguous inputs with custom attn_mask,
     # Reference: https://github.com/pytorch/pytorch/issues/112577.
@@ -1229,7 +1227,7 @@ def _phi3_self_attn_sdpa_forward(
         query_states,
         key_states,
         value_states,
-        attn_mask=attention_mask,
+        attn_mask=causal_mask,
         dropout_p=self.attention_dropout if self.training else 0.0,
         # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1.
         is_causal=self.is_causal and attention_mask is None and q_len > 1,
@@ -1561,7 +1559,7 @@ def __exit__(self, exc_type, exc_value, traceback):
                 layer.attn._attn = layer.attn._orig_attn
 
 
-# adapted from https://github.com/huggingface/transformers/blob/v4.40.2/src/transformers/models/dbrx/modeling_dbrx.py#L763
+# Adapted from https://github.com/huggingface/transformers/blob/v4.40.2/src/transformers/models/dbrx/modeling_dbrx.py#L763
 def _dbrx_experts_forward(
     self, x: torch.Tensor, weights: torch.Tensor, top_weights: torch.Tensor, top_experts: torch.LongTensor
 ):
@@ -1606,7 +1604,7 @@ def _dbrx_experts_forward(
     return out
 
 
-# adapted from https://github.com/huggingface/transformers/blob/v4.40.2/src/transformers/models/dbrx/modeling_dbrx.py#L1228
+# Adapted from https://github.com/huggingface/transformers/blob/v4.40.2/src/transformers/models/dbrx/modeling_dbrx.py#L1228
 def _dbrx_update_causal_mask_legacy(
     self, attention_mask: Optional[torch.Tensor], input_tensor: torch.Tensor, cache_position: torch.Tensor
 ) -> Optional[torch.Tensor]:
@@ -1803,6 +1801,7 @@ def __exit__(self, exc_type, exc_value, traceback):
             block.ffn.experts.forward = block.ffn.experts._orig_forward
 
 
+# Adapted from https://github.com/huggingface/transformers/blob/v4.41.0/src/transformers/models/persimmon/modeling_persimmon.py#L264
 def _persimmon_self_attn_sdpa_forward(
     self,
     hidden_states: torch.Tensor,
@@ -1811,6 +1810,7 @@ def _persimmon_self_attn_sdpa_forward(
     past_key_value: Optional["Cache"] = None,
     output_attentions: bool = False,
     use_cache: bool = False,
+    cache_position: Optional[torch.LongTensor] = None,
 ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
     from transformers.models.persimmon.modeling_persimmon import apply_rotary_pos_emb
 
@@ -1865,14 +1865,23 @@ def _persimmon_self_attn_sdpa_forward(
 
     if past_key_value is not None:
         # Specific to RoPE models with partial rotation
-        cache_kwargs = {"sin": sin, "cos": cos, "partial_rotation_size": self.rotary_emb.dim}
+        cache_kwargs = {
+            "sin": sin,
+            "cos": cos,
+            "partial_rotation_size": self.rotary_emb.dim,
+            "cache_position": cache_position,
+        }
         key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
 
+    causal_mask = attention_mask
+    if attention_mask is not None:  # no matter the length, we just slice it
+        causal_mask = attention_mask[:, :, :, : key_states.shape[-2]]
+        
     attn_output = F.scaled_dot_product_attention(
         query_states,
         key_states,
         value_states,
-        attention_mask,
+        causal_mask,
         scale=1 / math.sqrt(self.head_dim),
         dropout_p=self.attention_dropout.p,
     )

From e7c072f934e72414203c89e0047c30ad2108eb3e Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Tue, 2 Jul 2024 15:02:10 +0200
Subject: [PATCH 07/17] format

---
 optimum/exporters/openvino/model_patcher.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py
index 21451d492e..7812682b8b 100644
--- a/optimum/exporters/openvino/model_patcher.py
+++ b/optimum/exporters/openvino/model_patcher.py
@@ -1876,7 +1876,7 @@ def _persimmon_self_attn_sdpa_forward(
     causal_mask = attention_mask
     if attention_mask is not None:  # no matter the length, we just slice it
         causal_mask = attention_mask[:, :, :, : key_states.shape[-2]]
-        
+
     attn_output = F.scaled_dot_product_attention(
         query_states,
         key_states,

From 1b89adba844d338600c9fa271afcd2f09c428479 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Tue, 2 Jul 2024 16:44:43 +0200
Subject: [PATCH 08/17] fix num quant op

---
 tests/openvino/test_quantization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
index 67970fbbcb..0d9329ecdc 100644
--- a/tests/openvino/test_quantization.py
+++ b/tests/openvino/test_quantization.py
@@ -76,7 +76,7 @@
 class OVQuantizerTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES_TORCH_MODEL = (
         (OVModelForSequenceClassification, "bert", 22, 35),
-        (OVModelForCausalLM, "gpt2", 41, 3),
+        (OVModelForCausalLM, "gpt2", 21, 3),
     )
     SUPPORTED_ARCHITECTURES_OV_MODEL = (
         (OVModelForSequenceClassification, "bert", 32, 35),

From f3b704e4194d54db3b6badc4f326098d02bc6e28 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Tue, 2 Jul 2024 16:48:08 +0200
Subject: [PATCH 09/17] remove incompatible transformers generation

---
 tests/openvino/test_modeling.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
index c7a381a0e2..a4fbba6ca2 100644
--- a/tests/openvino/test_modeling.py
+++ b/tests/openvino/test_modeling.py
@@ -697,7 +697,6 @@ def test_compare_to_transformers(self, model_arch):
         ov_model = OVModelForCausalLM.from_pretrained(model_id, export=True, ov_config=F32_CONFIG, **model_kwargs)
         self.assertIsInstance(ov_model.config, PretrainedConfig)
         self.assertTrue(ov_model.use_cache)
-        self.assertEqual(ov_model.stateful, ov_model.config.model_type not in not_stateful)
         tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS)
         tokens = tokenizer("This is a sample output", return_tensors="pt")
         tokens.pop("token_type_ids", None)
@@ -749,6 +748,11 @@ def test_compare_to_transformers(self, model_arch):
         )
 
         ov_outputs = ov_model.generate(**tokens, generation_config=gen_config)
+
+        # TODO: update _update_model_kwargs_for_generation so that it's compatibile with transformers >= v4.42.0
+        if model_arch not in ["chatglm", "glm4"] and is_transformers_version(">=", "4.42.0"):
+            return
+
         transformers_outputs = transformers_model.generate(**tokens, generation_config=gen_config)
         self.assertTrue(torch.allclose(ov_outputs, transformers_outputs))
 

From 9f6767a077f5500d61094b8afda4ab2b55d25576 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Tue, 2 Jul 2024 17:03:32 +0200
Subject: [PATCH 10/17] udpate setup

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index b063b08c69..e475e59dca 100644
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,7 @@
 INSTALL_REQUIRE = [
     "torch>=1.11",
     "transformers>=4.36.0,<4.43.0",
-    "optimum @ git+https://github.com/huggingface/optimum.git@fix-sentence-trfs",
+    "optimum~=1.21",
     "datasets>=1.4.0",
     "sentencepiece",
     "setuptools",

From 31c93103cfd6a581912f503fd363b2c5aeecd4f3 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Tue, 2 Jul 2024 18:25:13 +0200
Subject: [PATCH 11/17] fix op

---
 tests/openvino/test_quantization.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
index 0d9329ecdc..f0f18830e3 100644
--- a/tests/openvino/test_quantization.py
+++ b/tests/openvino/test_quantization.py
@@ -90,8 +90,10 @@ def test_automodel_static_quantization(self, model_cls, model_name, expected_fak
         dataset_name, dataset_config_name, column_name = _TASK_TO_DATASET[task]
         file_name = "openvino_quantized_model.xml"
 
-        if model_name == "bert" and is_transformers_version("<", "4.41.0"):
+        if is_transformers_version("<", "4.41.0") and model_name == "bert":
             expected_fake_quantize = 32
+        if is_transformers_version("<", "4.42.0") and model_name == "gpt2":
+            expected_fake_quantize = 41
 
         def preprocess_function(examples, tokenizer):
             return tokenizer(examples[column_name], padding="max_length", max_length=128, truncation=True)

From abc14284ecb69da1206826e332d8a27d5e02444d Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 3 Jul 2024 11:54:37 +0200
Subject: [PATCH 12/17] fix test

---
 tests/openvino/test_training.py | 111 ++++++++++++++++----------------
 tests/openvino/utils_tests.py   |   1 +
 2 files changed, 57 insertions(+), 55 deletions(-)

diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py
index 375fc6e4a1..475299e18a 100644
--- a/tests/openvino/test_training.py
+++ b/tests/openvino/test_training.py
@@ -58,6 +58,7 @@
 from optimum.intel.openvino.utils import OV_XML_FILE_NAME
 from optimum.intel.utils.import_utils import is_transformers_version
 
+from utils_tests import MODEL_NAMES
 
 F32_CONFIG = {"INFERENCE_PRECISION_HINT": "f32"}
 
@@ -317,56 +318,56 @@ def tearDown(self):
 # TODO: Uncomment failes tests after NNCF 2.8.1 patch release
 OVTRAINER_TEXT_CLASSIFICATION_TEST_DESCRIPTORS = {
     "distillation": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-bert",
-        teacher_model_id="hf-internal-testing/tiny-random-bert",
+        model_id=MODEL_NAMES["bert"],
+        teacher_model_id=MODEL_NAMES["bert"],
         nncf_compression_config=[],
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
     "default_quantization": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-bert",
+        model_id=MODEL_NAMES["bert"],
         nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG,
         expected_fake_quantize=22,
         expected_int8=32,
         compression_metrics=["compression_loss"],
     ),
     "distillation,default_quantization": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-bert",
-        teacher_model_id="hf-internal-testing/tiny-random-bert",
+        model_id=MODEL_NAMES["bert"],
+        teacher_model_id=MODEL_NAMES["bert"],
         nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG,
         expected_fake_quantize=22,
         expected_int8=32,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
     "customized_quantization": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-bert",
+        model_id=MODEL_NAMES["bert"],
         nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG,
         expected_fake_quantize=22,
         expected_int8=32,
         compression_metrics=["compression_loss"],
     ),
     "distillation,customized_quantization": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-bert",
-        teacher_model_id="hf-internal-testing/tiny-random-bert",
+        model_id=MODEL_NAMES["bert"],
+        teacher_model_id=MODEL_NAMES["bert"],
         nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG,
         expected_fake_quantize=22,
         expected_int8=32,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
     "structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-bert",
+        model_id=MODEL_NAMES["bert"],
         nncf_compression_config=STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         expected_binary_masks=60,
         compression_metrics=["compression_loss"],
     ),
     "distillation,structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-bert",
-        teacher_model_id="hf-internal-testing/tiny-random-bert",
+        model_id=MODEL_NAMES["bert"],
+        teacher_model_id=MODEL_NAMES["bert"],
         nncf_compression_config=STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         expected_binary_masks=60,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
     "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-bert",
+        model_id=MODEL_NAMES["bert"],
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
         expected_fake_quantize=22,
         expected_int8=32,
@@ -374,7 +375,7 @@ def tearDown(self):
         compression_metrics=["compression_loss"],
     ),
     "customized_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-bert",
+        model_id=MODEL_NAMES["bert"],
         nncf_compression_config=[
             CUSTOMIZED_QUANTIZATION_CONFIG,
             STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
@@ -385,8 +386,8 @@ def tearDown(self):
         compression_metrics=["compression_loss"],
     ),
     "distillation,default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-bert",
-        teacher_model_id="hf-internal-testing/tiny-random-bert",
+        model_id=MODEL_NAMES["bert"],
+        teacher_model_id=MODEL_NAMES["bert"],
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
         expected_fake_quantize=22,
         expected_int8=32,
@@ -394,8 +395,8 @@ def tearDown(self):
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
     "distillation,customized_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-bert",
-        teacher_model_id="hf-internal-testing/tiny-random-bert",
+        model_id=MODEL_NAMES["bert"],
+        teacher_model_id=MODEL_NAMES["bert"],
         nncf_compression_config=[
             CUSTOMIZED_QUANTIZATION_CONFIG,
             STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
@@ -406,20 +407,20 @@ def tearDown(self):
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
     "unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-bert",
+        model_id=MODEL_NAMES["bert"],
         nncf_compression_config=UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         expected_binary_masks=60,
         compression_metrics=["compression_loss"],
     ),
     "distillation,unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-bert",
-        teacher_model_id="hf-internal-testing/tiny-random-bert",
+        model_id=MODEL_NAMES["bert"],
+        teacher_model_id=MODEL_NAMES["bert"],
         nncf_compression_config=UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         expected_binary_masks=60,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
     "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-bert",
+        model_id=MODEL_NAMES["bert"],
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
         expected_fake_quantize=22,
         expected_int8=32,
@@ -427,7 +428,7 @@ def tearDown(self):
         compression_metrics=["compression_loss"],
     ),
     "customized_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-bert",
+        model_id=MODEL_NAMES["bert"],
         nncf_compression_config=[
             CUSTOMIZED_QUANTIZATION_CONFIG,
             UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
@@ -438,8 +439,8 @@ def tearDown(self):
         compression_metrics=["compression_loss"],
     ),
     "distillation,default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-bert",
-        teacher_model_id="hf-internal-testing/tiny-random-bert",
+        model_id=MODEL_NAMES["bert"],
+        teacher_model_id=MODEL_NAMES["bert"],
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
         expected_fake_quantize=22,
         expected_int8=32,
@@ -447,8 +448,8 @@ def tearDown(self):
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
     "distillation,customized_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-bert",
-        teacher_model_id="hf-internal-testing/tiny-random-bert",
+        model_id=MODEL_NAMES["bert"],
+        teacher_model_id=MODEL_NAMES["bert"],
         nncf_compression_config=[
             CUSTOMIZED_QUANTIZATION_CONFIG,
             UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
@@ -552,62 +553,62 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
 }
 UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN = deepcopy(STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN)
 UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN["params"]["enable_structured_masking"] = False
-
 OVTRAINER_IMAGE_CLASSIFICATION_TEST_DESCRIPTORS = {
     "default_quantization": OVTrainerTestDescriptor(
-        model_id="yujiepan/tiny-random-swin-patch4-window7-224",
+        model_id=MODEL_NAMES["swin"],
         nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG,
-        expected_fake_quantize=36,
-        expected_int8=28,
+        expected_fake_quantize=35,
+        expected_int8=27,
         compression_metrics=["compression_loss"],
     ),
     "structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="yujiepan/tiny-random-swin-patch4-window7-224",
+        model_id=MODEL_NAMES["swin"],
         nncf_compression_config=STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN,
         expected_binary_masks=48,
         compression_metrics=["compression_loss"],
     ),
     "unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="yujiepan/tiny-random-swin-patch4-window7-224",
+        model_id=MODEL_NAMES["swin"],
         nncf_compression_config=UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN,
         expected_binary_masks=48,
         compression_metrics=["compression_loss"],
     ),
     "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="yujiepan/tiny-random-swin-patch4-window7-224",
+        model_id=MODEL_NAMES["swin"],
         nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
-        expected_fake_quantize=36,
-        expected_int8=28,
+        expected_fake_quantize=35,
+        expected_int8=27,
         expected_binary_masks=48,
         compression_metrics=["compression_loss"],
     ),
     "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="yujiepan/tiny-random-swin-patch4-window7-224",
+        model_id=MODEL_NAMES["swin"],
         nncf_compression_config=[UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
-        expected_fake_quantize=36,
-        expected_int8=28,
+        expected_fake_quantize=35,
+        expected_int8=27,
         expected_binary_masks=48,
         compression_metrics=["compression_loss"],
     ),
     "distillation,default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="yujiepan/tiny-random-swin-patch4-window7-224",
-        teacher_model_id="yujiepan/tiny-random-swin-patch4-window7-224",
+        model_id=MODEL_NAMES["swin"],
+        teacher_model_id=MODEL_NAMES["swin"],
         nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
-        expected_fake_quantize=36,
-        expected_int8=28,
+        expected_fake_quantize=35,
+        expected_int8=27,
         expected_binary_masks=48,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
     "distillation,default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="yujiepan/tiny-random-swin-patch4-window7-224",
-        teacher_model_id="yujiepan/tiny-random-swin-patch4-window7-224",
+        model_id=MODEL_NAMES["swin"],
+        teacher_model_id=MODEL_NAMES["swin"],
         nncf_compression_config=[UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
-        expected_fake_quantize=36,
-        expected_int8=28,
+        expected_fake_quantize=35,
+        expected_int8=27,
         expected_binary_masks=48,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
 }
+# TODO : can be moved to MODEL_NAMES["swin-window"] after transformers v4.42.3
 
 
 class OVTrainerImageClassificationTrainingTest(OVTrainerBaseTrainingTest):
@@ -735,26 +736,26 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
 
 OVTRAINER_AUDIO_CLASSIFICATION_TEST_DESCRIPTORS = {
     "quantization": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
+        model_id=MODEL_NAMES["wav2vec2-hf"],
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2],
         expected_fake_quantize=40,
         expected_int8=30,
         compression_metrics=["compression_loss"],
     ),
     "structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
+        model_id=MODEL_NAMES["wav2vec2-hf"]
         nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
         expected_binary_masks=48,
         compression_metrics=["compression_loss"],
     ),
     "unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
+        model_id=MODEL_NAMES["wav2vec2-hf"],
         nncf_compression_config=[UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
         expected_binary_masks=48,
         compression_metrics=["compression_loss"],
     ),
     "quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
+        model_id=MODEL_NAMES["wav2vec2-hf"],
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
         expected_fake_quantize=40,
         expected_int8=30,
@@ -762,7 +763,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         compression_metrics=["compression_loss"],
     ),
     "quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
+        model_id=MODEL_NAMES["wav2vec2-hf"],
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
         expected_fake_quantize=40,
         expected_int8=30,
@@ -770,8 +771,8 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         compression_metrics=["compression_loss"],
     ),
     "distillation,quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
-        teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
+        model_id=MODEL_NAMES["wav2vec2-hf"],
+        teacher_model_id=MODEL_NAMES["wav2vec2-hf"],
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
         expected_fake_quantize=40,
         expected_int8=30,
@@ -779,8 +780,8 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
     "distillation,quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
-        teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
+        model_id=MODEL_NAMES["wav2vec2-hf"],
+        teacher_model_id=MODEL_NAMES["wav2vec2-hf"],
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
         expected_fake_quantize=40,
         expected_int8=30,
diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
index 590beefb36..1f9b051d7d 100644
--- a/tests/openvino/utils_tests.py
+++ b/tests/openvino/utils_tests.py
@@ -118,6 +118,7 @@
     "sew_d": "asapp/sew-d-tiny-100k-ft-ls100h",
     "arctic": "katuni4ka/tiny-random-snowflake",
     "swin": "hf-internal-testing/tiny-random-SwinModel",
+    "swin-window": "yujiepan/tiny-random-swin-patch4-window7-224",
     "t5": "hf-internal-testing/tiny-random-t5",
     "trocr": "microsoft/trocr-small-handwritten",
     "unispeech": "hf-internal-testing/tiny-random-unispeech",

From 5e9be54e56f1362d218f20c2dcdbc7fbacad5e07 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 3 Jul 2024 11:57:03 +0200
Subject: [PATCH 13/17] tmp for test

---
 .github/workflows/test_openvino.yml | 2 +-
 setup.py                            | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml
index 6f9675cde7..c73b5ca3a8 100644
--- a/.github/workflows/test_openvino.yml
+++ b/.github/workflows/test_openvino.yml
@@ -21,7 +21,7 @@ jobs:
       fail-fast: false
       matrix:
         python-version: ["3.8", "3.12"]
-        transformers-version: ["4.36.0", "4.42.*"]
+        transformers-version: ["4.36.0", "4.41.*","4.42.*"]
         os: [ubuntu-latest]
 
     runs-on: ${{ matrix.os }}
diff --git a/setup.py b/setup.py
index e475e59dca..7d958a8a0b 100644
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,8 @@
 INSTALL_REQUIRE = [
     "torch>=1.11",
     "transformers>=4.36.0,<4.43.0",
-    "optimum~=1.21",
+    "optimum @ git+https://github.com/huggingface/optimum.git",
+    #"optimum>=1.21.2,<1.22.0",
     "datasets>=1.4.0",
     "sentencepiece",
     "setuptools",

From 4ee9c995a2a81b9396fdd2194f5a2a558ad11aa5 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 3 Jul 2024 12:13:35 +0200
Subject: [PATCH 14/17] format

---
 setup.py                        | 2 +-
 tests/openvino/test_training.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index 7d958a8a0b..f4d9997298 100644
--- a/setup.py
+++ b/setup.py
@@ -30,7 +30,7 @@
     "torch>=1.11",
     "transformers>=4.36.0,<4.43.0",
     "optimum @ git+https://github.com/huggingface/optimum.git",
-    #"optimum>=1.21.2,<1.22.0",
+    # "optimum>=1.21.2,<1.22.0",
     "datasets>=1.4.0",
     "sentencepiece",
     "setuptools",
diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py
index 475299e18a..9e85274454 100644
--- a/tests/openvino/test_training.py
+++ b/tests/openvino/test_training.py
@@ -45,6 +45,7 @@
 from transformers.testing_utils import slow
 from transformers.trainer_utils import EvalPrediction, TrainOutput
 from transformers.utils import WEIGHTS_NAME
+from utils_tests import MODEL_NAMES
 
 from optimum.intel.openvino import OVTrainingArguments
 from optimum.intel.openvino.configuration import OVConfig
@@ -58,7 +59,6 @@
 from optimum.intel.openvino.utils import OV_XML_FILE_NAME
 from optimum.intel.utils.import_utils import is_transformers_version
 
-from utils_tests import MODEL_NAMES
 
 F32_CONFIG = {"INFERENCE_PRECISION_HINT": "f32"}
 
@@ -743,7 +743,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         compression_metrics=["compression_loss"],
     ),
     "structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["wav2vec2-hf"]
+        model_id=MODEL_NAMES["wav2vec2-hf"],
         nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
         expected_binary_masks=48,
         compression_metrics=["compression_loss"],

From d8fed913ea39cd82b3b38566bc56fc48acd6beb1 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 4 Jul 2024 10:46:27 +0200
Subject: [PATCH 15/17] udpate test

---
 .github/workflows/test_openvino.yml | 2 +-
 tests/openvino/test_modeling.py     | 5 -----
 tests/openvino/test_quantization.py | 9 ++-------
 3 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml
index c73b5ca3a8..1ab2d5296f 100644
--- a/.github/workflows/test_openvino.yml
+++ b/.github/workflows/test_openvino.yml
@@ -21,7 +21,7 @@ jobs:
       fail-fast: false
       matrix:
         python-version: ["3.8", "3.12"]
-        transformers-version: ["4.36.0", "4.41.*","4.42.*"]
+        transformers-version: ["4.36.0","4.42.*"]
         os: [ubuntu-latest]
 
     runs-on: ${{ matrix.os }}
diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
index a4fbba6ca2..bdc1e9afee 100644
--- a/tests/openvino/test_modeling.py
+++ b/tests/openvino/test_modeling.py
@@ -748,11 +748,6 @@ def test_compare_to_transformers(self, model_arch):
         )
 
         ov_outputs = ov_model.generate(**tokens, generation_config=gen_config)
-
-        # TODO: update _update_model_kwargs_for_generation so that it's compatibile with transformers >= v4.42.0
-        if model_arch not in ["chatglm", "glm4"] and is_transformers_version(">=", "4.42.0"):
-            return
-
         transformers_outputs = transformers_model.generate(**tokens, generation_config=gen_config)
         self.assertTrue(torch.allclose(ov_outputs, transformers_outputs))
 
diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
index f0f18830e3..1f71c9a9ed 100644
--- a/tests/openvino/test_quantization.py
+++ b/tests/openvino/test_quantization.py
@@ -75,8 +75,8 @@
 
 class OVQuantizerTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES_TORCH_MODEL = (
-        (OVModelForSequenceClassification, "bert", 22, 35),
-        (OVModelForCausalLM, "gpt2", 21, 3),
+        (OVModelForSequenceClassification, "bert", 32 if is_transformers_version("<", "4.41.0") else 22, 35),
+        (OVModelForCausalLM, "gpt2", 41 if is_transformers_version("<", "4.42.0") else 21, 3),
     )
     SUPPORTED_ARCHITECTURES_OV_MODEL = (
         (OVModelForSequenceClassification, "bert", 32, 35),
@@ -90,11 +90,6 @@ def test_automodel_static_quantization(self, model_cls, model_name, expected_fak
         dataset_name, dataset_config_name, column_name = _TASK_TO_DATASET[task]
         file_name = "openvino_quantized_model.xml"
 
-        if is_transformers_version("<", "4.41.0") and model_name == "bert":
-            expected_fake_quantize = 32
-        if is_transformers_version("<", "4.42.0") and model_name == "gpt2":
-            expected_fake_quantize = 41
-
         def preprocess_function(examples, tokenizer):
             return tokenizer(examples[column_name], padding="max_length", max_length=128, truncation=True)
 

From 65e1b040645840f3c008ac1daac0bca282b73900 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 4 Jul 2024 11:38:21 +0200
Subject: [PATCH 16/17] update setup

---
 .github/workflows/test_openvino.yml | 2 +-
 setup.py                            | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml
index 1ab2d5296f..6f9675cde7 100644
--- a/.github/workflows/test_openvino.yml
+++ b/.github/workflows/test_openvino.yml
@@ -21,7 +21,7 @@ jobs:
       fail-fast: false
       matrix:
         python-version: ["3.8", "3.12"]
-        transformers-version: ["4.36.0","4.42.*"]
+        transformers-version: ["4.36.0", "4.42.*"]
         os: [ubuntu-latest]
 
     runs-on: ${{ matrix.os }}
diff --git a/setup.py b/setup.py
index f4d9997298..8abcbfaf08 100644
--- a/setup.py
+++ b/setup.py
@@ -29,8 +29,7 @@
 INSTALL_REQUIRE = [
     "torch>=1.11",
     "transformers>=4.36.0,<4.43.0",
-    "optimum @ git+https://github.com/huggingface/optimum.git",
-    # "optimum>=1.21.2,<1.22.0",
+    "optimum>=1.21.2,<1.22.0",
     "datasets>=1.4.0",
     "sentencepiece",
     "setuptools",

From 52c3fc07ca5e114ab22698ed3f47de83181960c3 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Fri, 5 Jul 2024 11:13:13 +0200
Subject: [PATCH 17/17] update setup

---
 setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 8abcbfaf08..a1c83259a6 100644
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,8 @@
 INSTALL_REQUIRE = [
     "torch>=1.11",
     "transformers>=4.36.0,<4.43.0",
-    "optimum>=1.21.2,<1.22.0",
+    "optimum~=1.21",
+    # "optimum>=1.21.2,<1.22.0",
     "datasets>=1.4.0",
     "sentencepiece",
     "setuptools",