Allow to infer VAE in f16 precision (#859)

nikita-savelyevv · web-flow · commit 13f28361c398 · 2024-08-14T16:00:02.000+04:00
* Added custom ov_configs for vae encoder and decoder

* Simplify usage
diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
@@ -718,7 +718,7 @@ def __call__(self, latent_sample: np.ndarray):
         return list(outputs.values())
 
     def _compile(self):
-        if "GPU" in self._device:
+        if "GPU" in self._device and "INFERENCE_PRECISION_HINT" not in self.ov_config:
             self.ov_config.update({"INFERENCE_PRECISION_HINT": "f32"})
         super()._compile()
 
@@ -739,7 +739,7 @@ def __call__(self, sample: np.ndarray):
         return list(outputs.values())
 
     def _compile(self):
-        if "GPU" in self._device:
+        if "GPU" in self._device and "INFERENCE_PRECISION_HINT" not in self.ov_config:
             self.ov_config.update({"INFERENCE_PRECISION_HINT": "f32"})
         super()._compile()