diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 0a8b92e2a4..f78c58589b 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -43,6 +43,7 @@ from .model_patcher import ( AquilaModelPatcher, + ArcticModelPatcher, BaichuanModelPatcher, ChatGLMModelPatcher, CodeGenModelPatcher, @@ -810,3 +811,11 @@ def patch_model_for_export( self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None ) -> "ModelPatcher": return JaisModelPatcher(self, model, model_kwargs=model_kwargs) + + +@register_in_tasks_manager("arctic", *["text-generation", "text-generation-with-past"], library_name="transformers") +class ArcticOpenVINOConfig(MixtralOpenVINOConfig): + def patch_model_for_export( + self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None + ) -> "ModelPatcher": + return ArcticModelPatcher(self, model, model_kwargs=model_kwargs) diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index eaba6812df..9fd05297b9 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -153,6 +153,22 @@ def __exit__(self, exc_type, exc_value, traceback): layer.block_sparse_moe.forward = layer.block_sparse_moe._unpatched_forward +class ArcticModelPatcher(MixtralModelPatcher): + def __enter__(self): + super().__enter__() + # model initialize some weights for matrix multiplication in bfloat16, that lead to inconsistency of dtype + try: + self._model.to(torch.float32) + except Exception as ex: + pass + + for layer in self._model.model.layers: + layer.block_sparse_moe._unpatched_forward = layer.block_sparse_moe.forward + layer.block_sparse_moe.forward = types.MethodType( + _mixtral_sparse_moe_block_forward, layer.block_sparse_moe + ) + + def _chatglm_transformer_forward( self, input_ids, diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 83dde2e603..8099705aed 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -565,6 +565,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "dbrx", "qwen2-moe", "jais", + "snowflake", ) GENERATION_LENGTH = 100 REMOTE_CODE_MODELS = ( @@ -582,6 +583,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "xverse", "internlm", "codegen2", + "snowflake", ) @parameterized.expand(SUPPORTED_ARCHITECTURES) @@ -623,7 +625,7 @@ def test_compare_to_transformers(self, model_arch): set_seed(SEED) transformers_model = AutoModelForCausalLM.from_pretrained(model_id, **model_kwargs) - if model_arch == "qwen": + if model_arch in ["qwen", "snowflake"]: transformers_model.to(torch.float32) with torch.no_grad(): @@ -870,6 +872,9 @@ def test_beam_search(self, model_arch): model_id, export=True, use_cache=True, stateful=False, **model_kwargs ) transformers_model = AutoModelForCausalLM.from_pretrained(model_id, **model_kwargs) + + if model_arch == "snowflake": + transformers_model.to(torch.float32) tokenizer.pad_token_id = tokenizer.eos_token_id tokens = tokenizer(["Today is a nice day and I am longer", "This is me"], return_tensors="pt", padding=True) tokens.pop("token_type_ids", None) diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 44dbccc2eb..f8ee4da6da 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -110,6 +110,7 @@ "latent-consistency": "echarlaix/tiny-random-latent-consistency", "sew": "hf-internal-testing/tiny-random-SEWModel", "sew_d": "asapp/sew-d-tiny-100k-ft-ls100h", + "snowflake": "katuni4ka/tiny-random-snowflake", "swin": "hf-internal-testing/tiny-random-SwinModel", "t5": "hf-internal-testing/tiny-random-t5", "trocr": "microsoft/trocr-small-handwritten",