add zluda force sync

lshqqytiger · lshqqytiger · commit 6e412d406b44 · 2024-02-17T16:16:11.000+09:00
diff --git a/modules/shared.py b/modules/shared.py
@@ -402,6 +402,9 @@ def temp_disable_extensions():
     "olive_vae_encoder_float32": OptionInfo(False, 'Olive force FP32 for VAE Encoder'),
     "olive_static_dims": OptionInfo(True, 'Olive use static dimensions'),
     "olive_cache_optimized": OptionInfo(True, 'Olive cache optimized models'),
+
+    "zluda_sep": OptionInfo("<h2>ZLUDA</h2>(experimental)", "", gr.HTML, {"visible": devices.backend == "cuda"}),
+    "zluda_force_sync": OptionInfo(False, 'ZLUDA force synchronization (experimental, restart required)', gr.Checkbox, {"visible": devices.backend == "cuda"}),
 }))
 
 options_templates.update(options_section(('advanced', "Inference Settings"), {
diff --git a/modules/zluda.py b/modules/zluda.py
@@ -1,10 +1,23 @@
 import platform
 import torch
+from diffusers.models.attention_processor import AttnProcessor
 from modules import shared, devices
 
 
 def initialize_zluda():
     if platform.system() == "Windows" and devices.cuda_ok and torch.cuda.get_device_name(devices.get_optimal_device()).endswith("[ZLUDA]"):
+        shared.log.warning("Detected ZLUDA device. Currently, ZLUDA support is experimental and unstable.")
         torch.backends.cudnn.enabled = False
         if shared.opts.cross_attention_optimization == "Scaled-Dot-Product":
             shared.opts.cross_attention_optimization = "Batch matrix-matrix"
+        if shared.opts.zluda_force_sync:
+            patch_attention_processor(AttnProcessor)
+
+
+def patch_attention_processor(cls):
+    forward = cls.__call__
+    def patched(self, *args, **kwargs):
+        R = forward(self, *args, **kwargs)
+        torch.cuda.synchronize()
+        return R
+    cls.__call__ = patched