diff --git a/docs/source/openvino/models.mdx b/docs/source/openvino/models.mdx index b9fb24840..a28621b28 100644 --- a/docs/source/openvino/models.mdx +++ b/docs/source/openvino/models.mdx @@ -58,6 +58,8 @@ Here is the list of the supported architectures : - GPT-NeoX-Japanese - Gemma - Gemma2 +- Granite +- GraniteMoE - Hubert - IBert - InternLM diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index ff4450877..4ff12edc4 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -3603,6 +3603,7 @@ def __exit__(self, exc_type, exc_value, traceback): block.attn.forward = block.attn._orig_forward +# copied from https://github.com/huggingface/transformers/blob/v4.47.1/src/transformers/models/granitemoe/modeling_granitemoe.py#L321 def _granite_moe_topk_gating_forward(self, hidden_states): # compute the top_k routing decision logits = self.layer(hidden_states).float() # [batch_size x seq_len, num_experts] @@ -3629,6 +3630,7 @@ def _granite_moe_topk_gating_forward(self, hidden_states): return index_sorted_experts, batch_index, batch_gates, expert_size, logits +# copied from https://github.com/huggingface/transformers/blob/v4.47.1/src/transformers/models/granitemoe/modeling_granitemoe.py#L281 def _granite_moe_parallel_experts_forward(self, inputs, expert_size): output_list = [] # difference with original diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 02b81bfdc..cf663f063 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -72,6 +72,8 @@ "gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM", "gpt_neox_japanese": "hf-internal-testing/tiny-random-GPTNeoXJapaneseForCausalLM", "gptj": "hf-internal-testing/tiny-random-GPTJModel", + "granite": "katuni4ka/tiny-random-granite", + "granite-moe": "katuni4ka/tiny-random-granite-moe", "hubert": "hf-internal-testing/tiny-random-HubertModel", "ibert": "hf-internal-testing/tiny-random-ibert", "internlm": "katuni4ka/tiny-random-internlm",