diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml index ff38fb41d..9f487dc86 100644 --- a/.github/workflows/test_openvino.yml +++ b/.github/workflows/test_openvino.yml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: python-version: ["3.8", "3.12"] - transformers-version: ["4.36.0", "4.41.*"] + transformers-version: ["4.37.0", "4.41.*"] os: [ubuntu-latest] runs-on: ${{ matrix.os }} diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index 927c98ac3..6c5fe5be0 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -25,7 +25,7 @@ from optimum.exporters.onnx.base import OnnxConfig from optimum.exporters.onnx.constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED from optimum.exporters.openvino.convert import export_from_model -from optimum.intel.utils.import_utils import is_openvino_tokenizers_available, is_transformers_version +from optimum.intel.utils.import_utils import is_openvino_tokenizers_available from optimum.utils.save_utils import maybe_load_preprocessors @@ -241,7 +241,7 @@ def main_export( f"Asked to export a {model_type} model for the task {task}{autodetected_message}, but the Optimum OpenVINO exporter only supports the tasks {', '.join(model_tasks.keys())} for {model_type}. Please use a supported task. Please open an issue at https://github.com/huggingface/optimum/issues if you would like the task {task} to be supported in the ONNX export for {model_type}." ) - if is_transformers_version(">=", "4.36") and model_type in SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED: + if model_type in SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED: loading_kwargs["attn_implementation"] = "eager" # there are some difference between remote and in library representation of past key values for some models, # for avoiding confusion we disable remote code for them diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 6ce7a658c..7ca429c32 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -55,11 +55,10 @@ def patch_model_with_bettertransformer(model): if hasattr(model, "use_bettertransformer") and model.use_bettertransformer is True: return model - if is_transformers_version("<", "4.36") or is_torch_version("<", "2.1.1"): + if is_torch_version("<", "2.1.1"): log.warn( - COLOR_RED - + "[WARNING] For good performance with stateful models, transformers>=4.36.2 and PyTorch>=2.1.1 are required. " - f"This Python environment has Transformers {_transformers_version} and PyTorch {_torch_version}. " + COLOR_RED + "[WARNING] For good performance with stateful models PyTorch>=2.1.1 is required. " + f"This Python environment has PyTorch {_torch_version}. " "Consider upgrading PyTorch and Transformers, for example by running " "`pip install --upgrade --upgrade-strategy eager optimum[openvino]`, and export the model again" + COLOR_RESET @@ -131,10 +130,7 @@ def _mixtral_sparse_moe_block_forward(self, hidden_states: torch.Tensor) -> torc # the current expert. We need to make sure to multiply the output hidden # states by `routing_weights` on the corresponding tokens (top-1 and top-2) current_state = hidden_states[None, top_x].reshape(-1, hidden_dim) - if is_transformers_version("<", "4.37.0"): - current_hidden_states = expert_layer(current_state, routing_weights[top_x, idx, None]) - else: - current_hidden_states = expert_layer(current_state) * routing_weights[top_x, idx, None] + current_hidden_states = expert_layer(current_state) * routing_weights[top_x, idx, None] final_hidden_states.index_add_(0, top_x, current_hidden_states.to(hidden_states.dtype)) final_hidden_states = final_hidden_states.reshape(batch_size, sequence_length, hidden_dim) diff --git a/optimum/intel/neural_compressor/trainer.py b/optimum/intel/neural_compressor/trainer.py index b6089746e..32a3bed0a 100644 --- a/optimum/intel/neural_compressor/trainer.py +++ b/optimum/intel/neural_compressor/trainer.py @@ -267,10 +267,7 @@ def _inner_training_loop( else: debug_overflow = DebugUnderflowOverflow(self.model) # noqa - is_fsdp_xla_enabled = ( - self.is_fsdp_xla_enabled if is_transformers_version(">=", "4.36.0") else self.fsdp is not None - ) - delay_optimizer_creation = is_sagemaker_mp_enabled() or is_fsdp_xla_enabled or self.is_fsdp_enabled + delay_optimizer_creation = is_sagemaker_mp_enabled() or self.is_fsdp_xla_enabled or self.is_fsdp_enabled if self.is_deepspeed_enabled: self.optimizer, self.lr_scheduler = deepspeed_init(self, num_training_steps=max_steps) @@ -316,8 +313,6 @@ def _inner_training_loop( use_accelerator_prepare = True if model is self.model else False if delay_optimizer_creation: - if is_transformers_version("<", "4.36.0") and use_accelerator_prepare: - self.model = self.accelerator.prepare(self.model) self.create_optimizer_and_scheduler(num_training_steps=max_steps) # prepare using `accelerator` prepare @@ -485,7 +480,7 @@ def _inner_training_loop( for step, inputs in enumerate(epoch_iterator): total_batched_samples += 1 - if is_transformers_version(">=", "4.36.0") and self.args.include_num_input_tokens_seen: + if self.args.include_num_input_tokens_seen: main_input_name = getattr(self.model, "main_input_name", "input_ids") if main_input_name not in inputs: logger.warning( diff --git a/optimum/intel/openvino/trainer.py b/optimum/intel/openvino/trainer.py index 0a1f5209a..30e87ad2a 100644 --- a/optimum/intel/openvino/trainer.py +++ b/optimum/intel/openvino/trainer.py @@ -362,10 +362,7 @@ def _inner_training_loop( else: debug_overflow = DebugUnderflowOverflow(self.model) # noqa - is_fsdp_xla_enabled = ( - self.is_fsdp_xla_enabled if is_transformers_version(">=", "4.36.0") else self.fsdp is not None - ) - delay_optimizer_creation = is_sagemaker_mp_enabled() or is_fsdp_xla_enabled or self.is_fsdp_enabled + delay_optimizer_creation = is_sagemaker_mp_enabled() or self.is_fsdp_xla_enabled or self.is_fsdp_enabled # We need to reset the scheduler, as its parameters may be different on subsequent calls if self._created_lr_scheduler: @@ -408,12 +405,9 @@ def _inner_training_loop( self.model.gradient_checkpointing_enable(gradient_checkpointing_kwargs=gradient_checkpointing_kwargs) - if is_transformers_version("<", "4.29.0"): - is_distributed = self.args.local_rank != -1 - else: - from accelerate.utils import DistributedType + from accelerate.utils import DistributedType - is_distributed = self.args.distributed_state.distributed_type != DistributedType.NO + is_distributed = self.args.distributed_state.distributed_type != DistributedType.NO if self.compression_controller is not None and is_distributed: self.compression_controller.distributed() @@ -426,8 +420,6 @@ def _inner_training_loop( use_accelerator_prepare = True if model is self.model else False if delay_optimizer_creation: - if is_transformers_version("<", "4.36.0") and use_accelerator_prepare: - self.model = self.accelerator.prepare(self.model) self.create_optimizer_and_scheduler(num_training_steps=max_steps) # prepare using `accelerator` prepare @@ -597,7 +589,7 @@ def _inner_training_loop( for step, inputs in enumerate(epoch_iterator): total_batched_samples += 1 - if is_transformers_version(">=", "4.36.0") and self.args.include_num_input_tokens_seen: + if self.args.include_num_input_tokens_seen: main_input_name = getattr(self.model, "main_input_name", "input_ids") if main_input_name not in inputs: logger.warning( diff --git a/setup.py b/setup.py index 23fce0f82..eca04c72e 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ INSTALL_REQUIRE = [ "torch>=1.11", - "transformers>=4.36.0,<4.42.0", + "transformers>=4.37.0,<4.42.0", "optimum~=1.20", "datasets>=1.4.0", "sentencepiece",