vladmandic · vladmandic · Jun 13, 2024 · Jun 13, 2024 · Jun 13, 2024 · Jun 13, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,7 +4,7 @@
 
 ### Highlights for 2024-06-13
 
-First, yes, it's here and supported: [**StabilityAI Stable Diffusion 3 Medium**](https://stability.ai/news/stable-diffusion-3-medium)  
+First, yes, it is here and supported: [**StabilityAI Stable Diffusion 3 Medium**](https://stability.ai/news/stable-diffusion-3-medium)  
 for details on how to download and use, see [Wiki](https://github.com/vladmandic/automatic/wiki/SD3)
 
 #### What else?
@@ -27,8 +27,8 @@ Plus tons of minor features such as optimized initial install experience, **T-Ga
 
 #### New Functionality
 
-- [MuLan](https://github.com/mulanai/MuLan) Multi-langunage prompts
-  write your prompts forin ~110 auto-detected languages!  
+- [MuLan](https://github.com/mulanai/MuLan) Multi-language prompts
+  write your prompts in ~110 auto-detected languages!  
   compatible with *SD15* and *SDXL*  
   enable in scripts -> MuLan and set encoder to `InternVL-14B-224px` encoder  
   *note*: right now this is more of a proof-of-concept before smaller and/or quantized models are released  
@@ -68,7 +68,7 @@ Plus tons of minor features such as optimized initial install experience, **T-Ga
 - reintroduce prompt attention normalization, disabled by default, enable in settings -> execution  
   this can drastically help with unbalanced prompts  
 - further work on improving python 3.12 functionality and remove experimental flag  
-  note: recommended version remains python 3.11 for all users except if you're using directml and then its python 3.10  
+  note: recommended version remains python 3.11 for all users, except if you are using directml in which case its python 3.10  
 - improved **installer** for initial installs  
   initial install will do single-pass install of all required packages with correct versions  
   subsequent runs will check package versions as necessary  

diff --git a/modules/model_sd3.py b/modules/model_sd3.py
@@ -1,4 +1,6 @@
+import io
 import os
+import contextlib
 import warnings
 import torch
 import diffusers
@@ -16,12 +18,17 @@ def hf_login():
     import huggingface_hub as hf
     from modules import shared
     if shared.opts.huggingface_token is not None and len(shared.opts.huggingface_token) > 2 and not loggedin:
-        shared.log.debug(f'HF login token found: {"x" * len(shared.opts.huggingface_token)}')
-        hf.login(shared.opts.huggingface_token)
+        stdout = io.StringIO()
+        with contextlib.redirect_stdout(stdout):
+            hf.login(shared.opts.huggingface_token)
+        text = stdout.getvalue() or ''
+        line = [l for l in text.split('\n') if 'Token' in l]
+        shared.log.info(f'HF login: {line[0] if len(line) > 0 else text}')
         loggedin = True
 
 
 def load_sd3(te3=None, fn=None, cache_dir=None, config=None):
+    from modules import devices
     hf_login()
     repo_id = 'stabilityai/stable-diffusion-3-medium-diffusers'
     model_id = 'stabilityai/stable-diffusion-3-medium-diffusers'
@@ -101,10 +108,12 @@ def load_sd3(te3=None, fn=None, cache_dir=None, config=None):
         )
     diffusers.pipelines.auto_pipeline.AUTO_TEXT2IMAGE_PIPELINES_MAPPING["stable-diffusion-3"] = diffusers.StableDiffusion3Pipeline
     diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["stable-diffusion-3"] = diffusers.StableDiffusion3Img2ImgPipeline
+    devices.torch_gc(force=True)
     return pipe
 
 
 def load_te3(pipe, te3=None, cache_dir=None):
+    from modules import devices
     hf_login()
     repo_id = 'stabilityai/stable-diffusion-3-medium-diffusers'
     if pipe is None or not hasattr(pipe, 'text_encoder_3'):
@@ -136,24 +145,7 @@ def load_te3(pipe, te3=None, cache_dir=None):
             subfolder='tokenizer_3',
             cache_dir=cache_dir,
         )
-
-
-def stats():
-    s = torch.cuda.mem_get_info()
-    system = { 'free': s[0], 'used': s[1] - s[0], 'total': s[1] }
-    s = dict(torch.cuda.memory_stats('cuda'))
-    allocated = { 'current': s['allocated_bytes.all.current'], 'peak': s['allocated_bytes.all.peak'] }
-    reserved = { 'current': s['reserved_bytes.all.current'], 'peak': s['reserved_bytes.all.peak'] }
-    active = { 'current': s['active_bytes.all.current'], 'peak': s['active_bytes.all.peak'] }
-    inactive = { 'current': s['inactive_split_bytes.all.current'], 'peak': s['inactive_split_bytes.all.peak'] }
-    cuda = {
-        'system': system,
-        'active': active,
-        'allocated': allocated,
-        'reserved': reserved,
-        'inactive': inactive,
-    }
-    return cuda
+    devices.torch_gc(force=True)
 
 
 if __name__ == '__main__':
@@ -168,7 +160,6 @@ def stats():
     # pipeline.to('cuda')
     t1 = time.time()
     log.info(f'Loaded: time={t1-t0:.3f}')
-    log.info(f'Stats: {stats()}')
 
     # pipeline.scheduler = diffusers.schedulers.EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config)
     log.info(f'Scheduler, {pipeline.scheduler}')
@@ -182,5 +173,4 @@ def stats():
     ).images[0]
     t2 = time.time()
     log.info(f'Generated: time={t2-t1:.3f}')
-    log.info(f'Stats: {stats()}')
     image.save("/tmp/sd3.png")
diff --git a/modules/sd_models.py b/modules/sd_models.py
@@ -1367,6 +1367,8 @@ def set_diffuser_pipe(pipe, new_pipe_type):
 
 
 def set_diffusers_attention(pipe):
+    import diffusers.models.attention_processor as p
+
     def set_attn(pipe, attention):
         if attention is None:
             return
@@ -1377,21 +1379,20 @@ def set_attn(pipe, attention):
         modules = [m for m in modules if isinstance(m, torch.nn.Module) and hasattr(m, "set_attn_processor")]
         for module in modules:
             if 'SD3Transformer2DModel' in module.__class__.__name__: # TODO SD3
-                continue
-            module.set_attn_processor(attention)
+                module.set_attn_processor(p.JointAttnProcessor2_0())
+            else:
+                module.set_attn_processor(attention)
 
     if shared.opts.cross_attention_optimization == "Disabled":
         pass # do nothing
     elif shared.opts.cross_attention_optimization == "Scaled-Dot-Product": # The default set by Diffusers
-        from diffusers.models.attention_processor import AttnProcessor2_0
-        set_attn(pipe, AttnProcessor2_0())
+        set_attn(pipe, p.AttnProcessor2_0())
     elif shared.opts.cross_attention_optimization == "xFormers" and hasattr(pipe, 'enable_xformers_memory_efficient_attention'):
         pipe.enable_xformers_memory_efficient_attention()
     elif shared.opts.cross_attention_optimization == "Split attention" and hasattr(pipe, "enable_attention_slicing"):
         pipe.enable_attention_slicing()
     elif shared.opts.cross_attention_optimization == "Batch matrix-matrix":
-        from diffusers.models.attention_processor import AttnProcessor
-        set_attn(pipe, AttnProcessor())
+        set_attn(pipe, p.AttnProcessor())
     elif shared.opts.cross_attention_optimization == "Dynamic Attention BMM":
         from modules.sd_hijack_dynamic_atten import DynamicAttnProcessorBMM
         set_attn(pipe, DynamicAttnProcessorBMM())

diff --git a/wiki b/wiki