snowflake export

huggingface · May 28, 2024 · ca27774 · ca27774
1 parent 97167a7
commit ca27774
Show file tree

Hide file tree

Showing 4 changed files with 32 additions and 1 deletion.
diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py
@@ -43,6 +43,7 @@
 
 from .model_patcher import (
     AquilaModelPatcher,
+    ArcticModelPatcher,
     BaichuanModelPatcher,
     ChatGLMModelPatcher,
     CodeGenModelPatcher,
@@ -810,3 +811,11 @@ def patch_model_for_export(
         self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
     ) -> "ModelPatcher":
         return JaisModelPatcher(self, model, model_kwargs=model_kwargs)
+
+
+@register_in_tasks_manager("arctic", *["text-generation", "text-generation-with-past"], library_name="transformers")
+class ArcticOpenVINOConfig(MixtralOpenVINOConfig):
+    def patch_model_for_export(
+        self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
+    ) -> "ModelPatcher":
+        return ArcticModelPatcher(self, model, model_kwargs=model_kwargs)
diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py
@@ -153,6 +153,22 @@ def __exit__(self, exc_type, exc_value, traceback):
             layer.block_sparse_moe.forward = layer.block_sparse_moe._unpatched_forward
 
 
+class ArcticModelPatcher(MixtralModelPatcher):
+    def __enter__(self):
+        super().__enter__()
+        # model initialize some weights for matrix multiplication in bfloat16, that lead to inconsistency of dtype
+        try:
+            self._model.to(torch.float32)
+        except Exception as ex:
+            pass
+
+        for layer in self._model.model.layers:
+            layer.block_sparse_moe._unpatched_forward = layer.block_sparse_moe.forward
+            layer.block_sparse_moe.forward = types.MethodType(
+                _mixtral_sparse_moe_block_forward, layer.block_sparse_moe
+            )
+
+
 def _chatglm_transformer_forward(
     self,
     input_ids,

diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
@@ -565,6 +565,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
         "dbrx",
         "qwen2-moe",
         "jais",
+        "snowflake",
     )
     GENERATION_LENGTH = 100
     REMOTE_CODE_MODELS = (
@@ -582,6 +583,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
         "xverse",
         "internlm",
         "codegen2",
+        "snowflake",
     )
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES)
@@ -623,7 +625,7 @@ def test_compare_to_transformers(self, model_arch):
 
         set_seed(SEED)
         transformers_model = AutoModelForCausalLM.from_pretrained(model_id, **model_kwargs)
-        if model_arch == "qwen":
+        if model_arch in ["qwen", "snowflake"]:
             transformers_model.to(torch.float32)
 
         with torch.no_grad():
@@ -870,6 +872,9 @@ def test_beam_search(self, model_arch):
             model_id, export=True, use_cache=True, stateful=False, **model_kwargs
         )
         transformers_model = AutoModelForCausalLM.from_pretrained(model_id, **model_kwargs)
+
+        if model_arch == "snowflake":
+            transformers_model.to(torch.float32)
         tokenizer.pad_token_id = tokenizer.eos_token_id
         tokens = tokenizer(["Today is a nice day and I am longer", "This is me"], return_tensors="pt", padding=True)
         tokens.pop("token_type_ids", None)

diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
@@ -110,6 +110,7 @@
     "latent-consistency": "echarlaix/tiny-random-latent-consistency",
     "sew": "hf-internal-testing/tiny-random-SEWModel",
     "sew_d": "asapp/sew-d-tiny-100k-ft-ls100h",
+    "snowflake": "katuni4ka/tiny-random-snowflake",
     "swin": "hf-internal-testing/tiny-random-SwinModel",
     "t5": "hf-internal-testing/tiny-random-t5",
     "trocr": "microsoft/trocr-small-handwritten",