From 168be0509fcf34be3ad0259735916ff6ccd913fb Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Tue, 9 Jul 2024 02:23:50 -0400 Subject: [PATCH 1/2] upgrade ipex model to transformers 4.42.3 --- .github/workflows/test_ipex.yml | 2 +- optimum/exporters/ipex/model_patcher.py | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_ipex.yml b/.github/workflows/test_ipex.yml index 96ef047aaf..2ed91535f7 100644 --- a/.github/workflows/test_ipex.yml +++ b/.github/workflows/test_ipex.yml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: python-version: [3.8, 3.9] - transformers-version: [4.39.0, 4.41.2] + transformers-version: [4.39.0, 4.42.3] os: [ubuntu-latest] runs-on: ${{ matrix.os }} diff --git a/optimum/exporters/ipex/model_patcher.py b/optimum/exporters/ipex/model_patcher.py index e5299fb5c2..0d43152889 100644 --- a/optimum/exporters/ipex/model_patcher.py +++ b/optimum/exporters/ipex/model_patcher.py @@ -34,7 +34,7 @@ # Please also update in the setup.py and .github/workflows/test_ipex.yml if you change the transformers version _TRANSFORMERS_MIN_VERSION = "4.39.0" -_TRANSFORMERS_MAX_VERSION = "4.41.2" +_TRANSFORMERS_MAX_VERSION = "4.42.3" _IPEX_EXPORTED_GENERATION_TASKS = ("text-generation",) diff --git a/setup.py b/setup.py index 5bb79b4169..47398130ad 100644 --- a/setup.py +++ b/setup.py @@ -62,7 +62,7 @@ "neural-compressor": ["neural-compressor>=2.2.0", "onnxruntime<1.15.0", "accelerate"], "openvino": ["openvino>=2023.3", "nncf>=2.11.0", "openvino-tokenizers[transformers]"], "nncf": ["nncf>=2.11.0"], - "ipex": ["intel-extension-for-pytorch", "transformers>=4.39.0,<=4.41.2"], + "ipex": ["intel-extension-for-pytorch", "transformers>=4.39.0,<=4.42.3"], "diffusers": ["diffusers"], "quality": QUALITY_REQUIRE, "tests": TESTS_REQUIRE, From 6d489c1e81051707de60a78aab88e5ee60cbbdfa Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Tue, 9 Jul 2024 22:31:02 -0400 Subject: [PATCH 2/2] fix tests --- optimum/intel/ipex/modeling_base.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/optimum/intel/ipex/modeling_base.py b/optimum/intel/ipex/modeling_base.py index 36862b7502..3d39e944b3 100644 --- a/optimum/intel/ipex/modeling_base.py +++ b/optimum/intel/ipex/modeling_base.py @@ -134,7 +134,6 @@ class IPEXModel(OptimizedModel): base_model_prefix = "ipex_model" main_input_name = "input_ids" output_name = "last_hidden_state" - _supports_cache_class = False def __init__( self, @@ -428,6 +427,8 @@ def forward( class IPEXModelForCausalLM(IPEXModel, GenerationMixin): auto_model_class = AutoModelForCausalLM export_feature = "text-generation" + _supports_cache_class = False + _is_stateful = False def __init__( self, @@ -476,8 +477,8 @@ def __init__( else: self._reorder_cache = self.model_cls._reorder_cache.__get__(self) - if is_transformers_version(">=", "4.38.0") and model_type in {"llama", "phi", "persimmon"}: - self.prepare_inputs_for_generation = _prepare_inputs_for_generation_for_llama + if is_transformers_version(">=", "4.38.0") and model_type in {"llama", "phi", "persimmon", "mistral"}: + self.prepare_inputs_for_generation = _ipex_prepare_inputs_for_generation else: self.prepare_inputs_for_generation = self.model_cls.prepare_inputs_for_generation.__get__(self) @@ -613,7 +614,7 @@ def generate(self, *args, **kwargs): return super().generate(*args, **kwargs) -def _prepare_inputs_for_generation_for_llama( +def _ipex_prepare_inputs_for_generation( input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs ): from transformers.cache_utils import Cache