Apply sdpa for mpt and internlm (#676)

* apply sdpa for mpt and internlm * fix bauchan-13b * fix accuracy * small refactoring * add test for baichuan 13b * add support output_attentions * code style
huggingface · Apr 30, 2024 · e1b6a59 · e1b6a59
1 parent b017856
commit e1b6a59
Show file tree

Hide file tree

Showing 5 changed files with 359 additions and 3 deletions.
diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py
@@ -358,6 +358,7 @@ def ts_patched_forward(*args, **kwargs):
 
             with patcher:
                 check_dummy_inputs_are_allowed(model, dummy_inputs)
+                sig = inspect.signature(model.forward) if hasattr(model, "forward") else inspect.signature(model.call)
                 inputs = config.ordered_inputs(model)
                 input_names = list(inputs.keys())
                 output_names = list(config.outputs.keys())
@@ -387,7 +388,6 @@ def ts_patched_forward(*args, **kwargs):
                 ov_config=ov_config,
             )
 
-        sig = inspect.signature(model.forward) if hasattr(model, "forward") else inspect.signature(model.call)
         ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs}
         if not ordered_dummy_inputs:
             ordered_dummy_inputs = dummy_inputs
@@ -403,7 +403,7 @@ def ts_patched_forward(*args, **kwargs):
             inp_tensor.get_tensor().set_names({input_name})
             inp_data = flatten_inputs[idx]
             static_shape = PartialShape(inp_data.shape)
-            dims = inputs[input_name]
+            dims = inputs.get(input_name, [])
             for dim in dims:
                 static_shape[dim] = -1
             inp_tensor.get_node().set_partial_shape(static_shape)

diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py
@@ -23,6 +23,7 @@
     FalconOnnxConfig,
     GemmaOnnxConfig,
     LlamaOnnxConfig,
+    MPTOnnxConfig,
     PhiOnnxConfig,
     UNetOnnxConfig,
     VaeDecoderOnnxConfig,
@@ -43,8 +44,10 @@
     BaichuanModelPatcher,
     ChatGLMModelPatcher,
     GemmaModelPatcher,
+    InternLMPatcher,
     LlamaModelPatcher,
     MixtralModelPatcher,
+    MPTModelPatcher,
     Phi3ModelPatcher,
     QwenModelPatcher,
 )
@@ -439,6 +442,11 @@ class InternLM2OpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
     DUMMY_PKV_GENERATOR_CLASS = MistralDummyPastKeyValuesGenerator
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
 
+    def patch_model_for_export(
+        self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
+    ) -> "ModelPatcher":
+        return InternLMPatcher(self, model, model_kwargs=model_kwargs)
+
 
 @register_in_tasks_manager("orion", *["text-generation", "text-generation-with-past"], library_name="transformers")
 class OrionOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
@@ -455,6 +463,16 @@ class OlmoOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
 
 
+@register_in_tasks_manager(
+    "mpt", *["text-generation", "text-generation-with-past", "text-classification"], library_name="transformers"
+)
+class MPTOpenVINOConfig(MPTOnnxConfig):
+    def patch_model_for_export(
+        self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
+    ) -> "ModelPatcher":
+        return MPTModelPatcher(self, model, model_kwargs=model_kwargs)
+
+
 @register_in_tasks_manager(
     "phi3",
     *[