Merge branch 'master' into image-cache

jn-jairo · Dec 2, 2023 · c92f3dc · c92f3dc
2 parents 006b24c + 2995a24
commit c92f3dc
Show file tree

Hide file tree

Showing 57 changed files with 5,583 additions and 919 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,9 @@
+{
+	"path-intellisense.mappings": {
+		"../": "${workspaceFolder}/web/extensions/core"
+	},
+	"[python]": {
+		"editor.defaultFormatter": "ms-python.autopep8"
+	},
+	"python.formatting.provider": "none"
+}
diff --git a/README.md b/README.md
@@ -11,7 +11,7 @@ This ui will let you design and execute advanced stable diffusion pipelines usin
 
 ## Features
 - Nodes/graph/flowchart interface to experiment and create complex Stable Diffusion workflows without needing to code anything.
-- Fully supports SD1.x, SD2.x and SDXL
+- Fully supports SD1.x, SD2.x, [SDXL](https://comfyanonymous.github.io/ComfyUI_examples/sdxl/) and [Stable Video Diffusion](https://comfyanonymous.github.io/ComfyUI_examples/video/)
 - Asynchronous Queue system
 - Many optimizations: Only re-executes the parts of the workflow that changes between executions.
 - Command line option: ```--lowvram``` to make it work on GPUs with less than 3GB vram (enabled automatically on GPUs with low vram)
@@ -30,6 +30,8 @@ This ui will let you design and execute advanced stable diffusion pipelines usin
 - [unCLIP Models](https://comfyanonymous.github.io/ComfyUI_examples/unclip/)
 - [GLIGEN](https://comfyanonymous.github.io/ComfyUI_examples/gligen/)
 - [Model Merging](https://comfyanonymous.github.io/ComfyUI_examples/model_merging/)
+- [LCM models and Loras](https://comfyanonymous.github.io/ComfyUI_examples/lcm/)
+- [SDXL Turbo](https://comfyanonymous.github.io/ComfyUI_examples/sdturbo/)
 - Latent previews with [TAESD](#how-to-show-high-quality-previews)
 - Starts up very fast.
 - Works fully offline: will never download anything.
@@ -43,6 +45,7 @@ Workflow examples can be found on the [Examples page](https://comfyanonymous.git
 |---------------------------|--------------------------------------------------------------------------------------------------------------------|
 | Ctrl + Enter              | Queue up current graph for generation                                                                              |
 | Ctrl + Shift + Enter      | Queue up current graph as first for generation                                                                     |
+| Ctrl + Z/Ctrl + Y         | Undo/Redo                                                                                                          |
 | Ctrl + S                  | Save workflow                                                                                                      |
 | Ctrl + O                  | Load workflow                                                                                                      |
 | Ctrl + A                  | Select all nodes                                                                                                   |
@@ -98,6 +101,7 @@ AMD users can install rocm and pytorch with pip if you don't have it already ins
 ```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.6```
 
 This is the command to install the nightly with ROCm 5.7 that might have some performance improvements:
+
 ```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm5.7```
 
 ### NVIDIA
@@ -190,7 +194,7 @@ To use a textual inversion concepts/embeddings in a text prompt put them in the
 
 Make sure you use the regular loaders/Load Checkpoint node to load checkpoints. It will auto pick the right settings depending on your GPU.
 
-You can set this command line setting to disable the upcasting to fp32 in some cross attention operations which will increase your speed. Note that this will very likely give you black images on SD2.x models. If you use xformers this option does not do anything.
+You can set this command line setting to disable the upcasting to fp32 in some cross attention operations which will increase your speed. Note that this will very likely give you black images on SD2.x models. If you use xformers or pytorch attention this option does not do anything.
 
 ```--dont-upcast-attention```
 

diff --git a/comfy/cldm/cldm.py b/comfy/cldm/cldm.py
@@ -54,6 +54,7 @@ def __init__(
         transformer_depth_output=None,
         device=None,
         operations=comfy.ops,
+        **kwargs,
     ):
         super().__init__()
         assert use_spatial_transformer == True, "use_spatial_transformer has to be true"

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
@@ -62,6 +62,13 @@ def __call__(self, parser, namespace, values, option_string=None):
 fpvae_group.add_argument("--fp32-vae", action="store_true", help="Run the VAE in full precision fp32.")
 fpvae_group.add_argument("--bf16-vae", action="store_true", help="Run the VAE in bf16.")
 
+fpte_group = parser.add_mutually_exclusive_group()
+fpte_group.add_argument("--fp8_e4m3fn-text-enc", action="store_true", help="Store text encoder weights in fp8 (e4m3fn variant).")
+fpte_group.add_argument("--fp8_e5m2-text-enc", action="store_true", help="Store text encoder weights in fp8 (e5m2 variant).")
+fpte_group.add_argument("--fp16-text-enc", action="store_true", help="Store text encoder weights in fp16.")
+fpte_group.add_argument("--fp32-text-enc", action="store_true", help="Store text encoder weights in fp32.")
+
+
 parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE", const=-1, help="Use torch-directml.")
 
 parser.add_argument("--disable-ipex-optimize", action="store_true", help="Disables ipex.optimize when loading models with Intel GPUs.")

diff --git a/comfy/controlnet.py b/comfy/controlnet.py
@@ -33,7 +33,7 @@ def __init__(self, device=None):
         self.cond_hint_original = None
         self.cond_hint = None
         self.strength = 1.0
-        self.timestep_percent_range = (1.0, 0.0)
+        self.timestep_percent_range = (0.0, 1.0)
         self.timestep_range = None
 
         if device is None:
@@ -42,7 +42,7 @@ def __init__(self, device=None):
         self.previous_controlnet = None
         self.global_average_pooling = False
 
-    def set_cond_hint(self, cond_hint, strength=1.0, timestep_percent_range=(1.0, 0.0)):
+    def set_cond_hint(self, cond_hint, strength=1.0, timestep_percent_range=(0.0, 1.0)):
         self.cond_hint_original = cond_hint
         self.strength = strength
         self.timestep_percent_range = timestep_percent_range

diff --git a/comfy/extra_samplers/uni_pc.py b/comfy/extra_samplers/uni_pc.py
@@ -858,7 +858,7 @@ def predict_eps_sigma(model, input, sigma_in, **kwargs):
     return  (input - model(input, sigma_in, **kwargs)) / sigma
 
 
-def sample_unipc(model, noise, image, sigmas, sampling_function, max_denoise, extra_args=None, callback=None, disable=False, noise_mask=None, variant='bh1'):
+def sample_unipc(model, noise, image, sigmas, max_denoise, extra_args=None, callback=None, disable=False, noise_mask=None, variant='bh1'):
         timesteps = sigmas.clone()
         if sigmas[-1] == 0:
             timesteps = sigmas[:]

diff --git a/comfy/k_diffusion/sampling.py b/comfy/k_diffusion/sampling.py
@@ -750,3 +750,61 @@ def sample_lcm(model, x, sigmas, extra_args=None, callback=None, disable=None, n
         if sigmas[i + 1] > 0:
             x += sigmas[i + 1] * noise_sampler(sigmas[i], sigmas[i + 1])
     return x
+
+
+
+@torch.no_grad()
+def sample_heunpp2(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., s_tmax=float('inf'), s_noise=1.):
+    # From MIT licensed: https://github.com/Carzit/sd-webui-samplers-scheduler/
+    extra_args = {} if extra_args is None else extra_args
+    s_in = x.new_ones([x.shape[0]])
+    s_end = sigmas[-1]
+    for i in trange(len(sigmas) - 1, disable=disable):
+        gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0.
+        eps = torch.randn_like(x) * s_noise
+        sigma_hat = sigmas[i] * (gamma + 1)
+        if gamma > 0:
+            x = x + eps * (sigma_hat ** 2 - sigmas[i] ** 2) ** 0.5
+        denoised = model(x, sigma_hat * s_in, **extra_args)
+        d = to_d(x, sigma_hat, denoised)
+        if callback is not None:
+            callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised})
+        dt = sigmas[i + 1] - sigma_hat
+        if sigmas[i + 1] == s_end:
+            # Euler method
+            x = x + d * dt
+        elif sigmas[i + 2] == s_end:
+
+            # Heun's method
+            x_2 = x + d * dt
+            denoised_2 = model(x_2, sigmas[i + 1] * s_in, **extra_args)
+            d_2 = to_d(x_2, sigmas[i + 1], denoised_2)
+
+            w = 2 * sigmas[0]
+            w2 = sigmas[i+1]/w
+            w1 = 1 - w2
+
+            d_prime = d * w1 + d_2 * w2
+
+
+            x = x + d_prime * dt
+
+        else:
+            # Heun++
+            x_2 = x + d * dt
+            denoised_2 = model(x_2, sigmas[i + 1] * s_in, **extra_args)
+            d_2 = to_d(x_2, sigmas[i + 1], denoised_2)
+            dt_2 = sigmas[i + 2] - sigmas[i + 1]
+
+            x_3 = x_2 + d_2 * dt_2
+            denoised_3 = model(x_3, sigmas[i + 2] * s_in, **extra_args)
+            d_3 = to_d(x_3, sigmas[i + 2], denoised_3)
+
+            w = 3 * sigmas[0]
+            w2 = sigmas[i + 1] / w
+            w3 = sigmas[i + 2] / w
+            w1 = 1 - w2 - w3
+
+            d_prime = w1 * d + w2 * d_2 + w3 * d_3
+            x = x + d_prime * dt
+    return x