diff --git a/optimum/intel/openvino/modeling_seq2seq.py b/optimum/intel/openvino/modeling_seq2seq.py index c6fdeaf400..87cd18d875 100644 --- a/optimum/intel/openvino/modeling_seq2seq.py +++ b/optimum/intel/openvino/modeling_seq2seq.py @@ -385,7 +385,7 @@ def forward( self._compile() # Model inputs - inputs = {self.main_input_name: input_ids} + inputs = {self.main_input_name: input_ids if input_ids is not None else kwargs.get(self.main_input_name)} # Add the attention_mask inputs when needed if "attention_mask" in self.input_names: diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index da33eca733..bcc7c2908b 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -439,6 +439,9 @@ def _set_task(self): if self.task == "text2text-generation": raise ValueError("Seq2Seq models are currently not supported for post-training static quantization.") + if self.task == "image-to-text": + raise ValueError("Image2Text models are currently not supported for post-training static quantization.") + def get_calibration_dataset( self, dataset_name: str, diff --git a/optimum/intel/openvino/utils.py b/optimum/intel/openvino/utils.py index 8d65eae759..c05ba9e374 100644 --- a/optimum/intel/openvino/utils.py +++ b/optimum/intel/openvino/utils.py @@ -89,6 +89,7 @@ "audio-classification": "OVModelForAudioClassification", "stable-diffusion": "OVStableDiffusionPipeline", "stable-diffusion-xl": "OVStableDiffusionXLPipeline", + "pix2struct": "OVModelForPix2Struct", } diff --git a/optimum/intel/utils/constant.py b/optimum/intel/utils/constant.py index 413ccacca1..fe061a6b25 100644 --- a/optimum/intel/utils/constant.py +++ b/optimum/intel/utils/constant.py @@ -22,6 +22,7 @@ "seq2seq-lm": "text2text-generation", "summarization": "text2text-generation", "translation": "text2text-generation", + "visual-question-answering": "image-to-text", } _TASK_LEGACY = { diff --git a/optimum/intel/utils/dummy_openvino_objects.py b/optimum/intel/utils/dummy_openvino_objects.py index b7c4939a72..a6d62652d5 100644 --- a/optimum/intel/utils/dummy_openvino_objects.py +++ b/optimum/intel/utils/dummy_openvino_objects.py @@ -103,6 +103,17 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["openvino"]) +class OVModelForPix2Struct(metaclass=DummyObject): + _backends = ["openvino"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["openvino"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["openvino"]) + + class OVModelForQuestionAnswering(metaclass=DummyObject): _backends = ["openvino"]