diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 5d13da6b69..b6536512b1 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -30,7 +30,13 @@ ) from optimum.utils.normalized_config import NormalizedTextConfig -from .model_patcher import ChatGLMModelPatcher, GemmaModelPatcher, MixtralModelPatcher, QwenModelPatcher +from .model_patcher import ( + BaichuanModelPatcher, + ChatGLMModelPatcher, + GemmaModelPatcher, + MixtralModelPatcher, + QwenModelPatcher, +) def init_model_configs(): @@ -74,13 +80,10 @@ class BaichaunOpenVINOConfig(TextDecoderOnnxConfig): num_layers="num_hidden_layers", num_attention_heads="num_attention_heads", hidden_size="hidden_size" ) - -@register_in_tasks_manager("jais", *["text-generation", "text-generation-with-past"], library_name="transformers") -class JaisOpenVINOConfig(TextDecoderOnnxConfig): - DEFAULT_ONNX_OPSET = 13 - NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args( - num_layers="n_layer", num_attention_heads="n_head", hidden_size="n_embd" - ) + def patch_model_for_export( + self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None + ) -> "ModelPatcher": + return BaichuanModelPatcher(self, model, model_kwargs=model_kwargs) @register_in_tasks_manager("qwen2", *["text-generation", "text-generation-with-past"], library_name="transformers") diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 2083bd4965..2e5153de71 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -477,3 +477,16 @@ def __exit__(self, exc_type, exc_value, traceback): block.attn.forward = block.attn._orig_forward self._model.config.bf16 = self.original_bf16 self._model.config.fp16 = self.original_fp16 + + +class BaichuanModelPatcher(DecoderModelPatcher): + def __init__( + self, + config: "OnnxConfig", + model: Union["PreTrainedModel", "TFPreTrainedModel"], + model_kwargs: Dict[str, Any], + ): + super().__init__(config, model, model_kwargs) + # model has first inference buffers initialization + if self._model.lm_head.first_flag: + self._model(torch.ones((1, 10), dtype=torch.int64), torch.ones((1, 10), dtype=torch.int64)) diff --git a/setup.py b/setup.py index cd57134542..0915ef427b 100644 --- a/setup.py +++ b/setup.py @@ -35,6 +35,8 @@ "timm", "invisible-watermark>=0.2.0", "auto-gptq", + "transformers_stream_generator", + "einops", ] QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241"] diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index c929f7e23a..9df6c73214 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -485,7 +485,6 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "gpt2", "gpt_neo", "gpt_neox", - "jais", "llama", # "llama_gptq", "marian", @@ -585,6 +584,8 @@ def test_pipeline(self, model_arch): def test_multiple_inputs(self, model_arch): model_id = MODEL_NAMES[model_arch] set_seed(SEED) + if model_arch == "qwen": + self.skipTest("Qwen tokenizer does not support padding") model_kwargs = {} if model_arch in self.REMOTE_CODE_MODELS: model_kwargs = { diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 0887402900..c0d4b3a9e0 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -49,7 +49,6 @@ "gptj": "hf-internal-testing/tiny-random-GPTJModel", "hubert": "hf-internal-testing/tiny-random-HubertModel", "ibert": "hf-internal-testing/tiny-random-ibert", - "jais": "katuni4ka/tiny-random-jais", "levit": "hf-internal-testing/tiny-random-LevitModel", "longt5": "hf-internal-testing/tiny-random-longt5", "llama": "fxmarty/tiny-llama-fast-tokenizer",