Skip to content

Commit b502ee0

Browse files
committed
OpenVINO GPU plugin does not support int64 natively so i64 inputs are always converted to i32. To avoid runtime conversion, updated IO tensor precision to i32.
1 parent f935233 commit b502ee0

File tree

1 file changed

+10
-0
lines changed

1 file changed

+10
-0
lines changed

optimum/intel/openvino/modeling_decoder.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ def __init__(
133133
self._first_iter_beam_search = False
134134
self._second_iter_beam_search = False
135135
self.update_pkv_precision()
136+
if "GPU" in device:
137+
self.update_int_precision()
136138
if self.is_dynamic:
137139
self.model = self._reshape(self.model, -1, -1)
138140
is_stateful_supported = ensure_stateful_is_available(warn=False)
@@ -210,6 +212,14 @@ def update_pkv_precision(self, force_fp32=False):
210212
self.model = self._reshape(self.model, -1, -1)
211213
self.request = None
212214

215+
def update_int_precision(self):
216+
ppp = PrePostProcessor(self.model)
217+
for key in self.model.inputs:
218+
in_name = key.get_any_name()
219+
if key.get_element_type() == Type.i64 and ("input_ids" in in_name or "position_ids" in in_name or "attention_mask" in in_name):
220+
ppp.input(in_name).tensor().set_element_type(Type.i32)
221+
self.model = ppp.build()
222+
213223
def _save_pretrained(self, save_directory: Union[str, Path]):
214224
"""
215225
Saves the model to the OpenVINO IR format so that it can be re-loaded using the

0 commit comments

Comments
 (0)