Skip to content

Commit fe8f834

Browse files
committed
Merge branch 'master' into extension-device
2 parents f92ae30 + c837a17 commit fe8f834

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+8155
-848
lines changed

.github/workflows/test-ui.yaml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name: Tests CI
2+
3+
on: [push, pull_request]
4+
5+
jobs:
6+
test:
7+
runs-on: ubuntu-latest
8+
steps:
9+
- uses: actions/checkout@v4
10+
- uses: actions/setup-node@v3
11+
with:
12+
node-version: 18
13+
- uses: actions/setup-python@v4
14+
with:
15+
python-version: '3.10'
16+
- name: Install requirements
17+
run: |
18+
python -m pip install --upgrade pip
19+
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
20+
pip install -r requirements.txt
21+
- name: Run Tests
22+
run: |
23+
npm ci
24+
npm run test:generate
25+
npm test
26+
working-directory: ./tests-ui

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@ venv/
1414
/web/extensions/*
1515
!/web/extensions/logging.js.example
1616
!/web/extensions/core/
17+
/tests-ui/data/object_info.json

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ Workflow examples can be found on the [Examples page](https://comfyanonymous.git
4646
| Ctrl + S | Save workflow |
4747
| Ctrl + O | Load workflow |
4848
| Ctrl + A | Select all nodes |
49+
| Alt + C | Collapse/uncollapse selected nodes |
4950
| Ctrl + M | Mute/unmute selected nodes |
5051
| Ctrl + B | Bypass selected nodes (acts like the node was removed from the graph and the wires reconnected through) |
5152
| Delete/Backspace | Delete selected nodes |

comfy/cldm/cldm.py

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ def __init__(
2727
model_channels,
2828
hint_channels,
2929
num_res_blocks,
30-
attention_resolutions,
3130
dropout=0,
3231
channel_mult=(1, 2, 4, 8),
3332
conv_resample=True,
@@ -52,6 +51,7 @@ def __init__(
5251
use_linear_in_transformer=False,
5352
adm_in_channels=None,
5453
transformer_depth_middle=None,
54+
transformer_depth_output=None,
5555
device=None,
5656
operations=comfy.ops,
5757
):
@@ -79,29 +79,24 @@ def __init__(
7979
self.image_size = image_size
8080
self.in_channels = in_channels
8181
self.model_channels = model_channels
82-
if isinstance(transformer_depth, int):
83-
transformer_depth = len(channel_mult) * [transformer_depth]
84-
if transformer_depth_middle is None:
85-
transformer_depth_middle = transformer_depth[-1]
82+
8683
if isinstance(num_res_blocks, int):
8784
self.num_res_blocks = len(channel_mult) * [num_res_blocks]
8885
else:
8986
if len(num_res_blocks) != len(channel_mult):
9087
raise ValueError("provide num_res_blocks either as an int (globally constant) or "
9188
"as a list/tuple (per-level) with the same length as channel_mult")
9289
self.num_res_blocks = num_res_blocks
90+
9391
if disable_self_attentions is not None:
9492
# should be a list of booleans, indicating whether to disable self-attention in TransformerBlocks or not
9593
assert len(disable_self_attentions) == len(channel_mult)
9694
if num_attention_blocks is not None:
9795
assert len(num_attention_blocks) == len(self.num_res_blocks)
9896
assert all(map(lambda i: self.num_res_blocks[i] >= num_attention_blocks[i], range(len(num_attention_blocks))))
99-
print(f"Constructor of UNetModel received num_attention_blocks={num_attention_blocks}. "
100-
f"This option has LESS priority than attention_resolutions {attention_resolutions}, "
101-
f"i.e., in cases where num_attention_blocks[i] > 0 but 2**i not in attention_resolutions, "
102-
f"attention will still not be set.")
10397

104-
self.attention_resolutions = attention_resolutions
98+
transformer_depth = transformer_depth[:]
99+
105100
self.dropout = dropout
106101
self.channel_mult = channel_mult
107102
self.conv_resample = conv_resample
@@ -180,11 +175,14 @@ def __init__(
180175
dims=dims,
181176
use_checkpoint=use_checkpoint,
182177
use_scale_shift_norm=use_scale_shift_norm,
183-
operations=operations
178+
dtype=self.dtype,
179+
device=device,
180+
operations=operations,
184181
)
185182
]
186183
ch = mult * model_channels
187-
if ds in attention_resolutions:
184+
num_transformers = transformer_depth.pop(0)
185+
if num_transformers > 0:
188186
if num_head_channels == -1:
189187
dim_head = ch // num_heads
190188
else:
@@ -201,9 +199,9 @@ def __init__(
201199
if not exists(num_attention_blocks) or nr < num_attention_blocks[level]:
202200
layers.append(
203201
SpatialTransformer(
204-
ch, num_heads, dim_head, depth=transformer_depth[level], context_dim=context_dim,
202+
ch, num_heads, dim_head, depth=num_transformers, context_dim=context_dim,
205203
disable_self_attn=disabled_sa, use_linear=use_linear_in_transformer,
206-
use_checkpoint=use_checkpoint, operations=operations
204+
use_checkpoint=use_checkpoint, dtype=self.dtype, device=device, operations=operations
207205
)
208206
)
209207
self.input_blocks.append(TimestepEmbedSequential(*layers))
@@ -223,11 +221,13 @@ def __init__(
223221
use_checkpoint=use_checkpoint,
224222
use_scale_shift_norm=use_scale_shift_norm,
225223
down=True,
224+
dtype=self.dtype,
225+
device=device,
226226
operations=operations
227227
)
228228
if resblock_updown
229229
else Downsample(
230-
ch, conv_resample, dims=dims, out_channels=out_ch, operations=operations
230+
ch, conv_resample, dims=dims, out_channels=out_ch, dtype=self.dtype, device=device, operations=operations
231231
)
232232
)
233233
)
@@ -245,20 +245,23 @@ def __init__(
245245
if legacy:
246246
#num_heads = 1
247247
dim_head = ch // num_heads if use_spatial_transformer else num_head_channels
248-
self.middle_block = TimestepEmbedSequential(
248+
mid_block = [
249249
ResBlock(
250250
ch,
251251
time_embed_dim,
252252
dropout,
253253
dims=dims,
254254
use_checkpoint=use_checkpoint,
255255
use_scale_shift_norm=use_scale_shift_norm,
256+
dtype=self.dtype,
257+
device=device,
256258
operations=operations
257-
),
258-
SpatialTransformer( # always uses a self-attn
259+
)]
260+
if transformer_depth_middle >= 0:
261+
mid_block += [SpatialTransformer( # always uses a self-attn
259262
ch, num_heads, dim_head, depth=transformer_depth_middle, context_dim=context_dim,
260263
disable_self_attn=disable_middle_self_attn, use_linear=use_linear_in_transformer,
261-
use_checkpoint=use_checkpoint, operations=operations
264+
use_checkpoint=use_checkpoint, dtype=self.dtype, device=device, operations=operations
262265
),
263266
ResBlock(
264267
ch,
@@ -267,9 +270,11 @@ def __init__(
267270
dims=dims,
268271
use_checkpoint=use_checkpoint,
269272
use_scale_shift_norm=use_scale_shift_norm,
273+
dtype=self.dtype,
274+
device=device,
270275
operations=operations
271-
),
272-
)
276+
)]
277+
self.middle_block = TimestepEmbedSequential(*mid_block)
273278
self.middle_block_out = self.make_zero_conv(ch, operations=operations)
274279
self._feature_size += ch
275280

comfy/cli_args.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ def __call__(self, parser, namespace, values, option_string=None):
3636
parser.add_argument("--listen", type=str, default="127.0.0.1", metavar="IP", nargs="?", const="0.0.0.0", help="Specify the IP address to listen on (default: 127.0.0.1). If --listen is provided without an argument, it defaults to 0.0.0.0. (listens on all)")
3737
parser.add_argument("--port", type=int, default=8188, help="Set the listen port.")
3838
parser.add_argument("--enable-cors-header", type=str, default=None, metavar="ORIGIN", nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.")
39+
parser.add_argument("--max-upload-size", type=float, default=100, help="Set the maximum upload size in MB.")
40+
3941
parser.add_argument("--extra-model-paths-config", type=str, default=None, metavar="PATH", nargs='+', action='append', help="Load one or more extra_model_paths.yaml files.")
4042
parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.")
4143
parser.add_argument("--temp-directory", type=str, default=None, help="Set the ComfyUI temp directory (default is in the ComfyUI directory).")

comfy/clip_vision.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,24 @@
1-
from transformers import CLIPVisionModelWithProjection, CLIPVisionConfig, CLIPImageProcessor, modeling_utils
2-
from .utils import load_torch_file, transformers_convert
1+
from transformers import CLIPVisionModelWithProjection, CLIPVisionConfig, modeling_utils
2+
from .utils import load_torch_file, transformers_convert, common_upscale
33
import os
44
import torch
55
import contextlib
66

77
import comfy.ops
88
import comfy.model_patcher
99
import comfy.model_management
10+
import comfy.utils
11+
12+
def clip_preprocess(image, size=224):
13+
mean = torch.tensor([ 0.48145466,0.4578275,0.40821073], device=image.device, dtype=image.dtype)
14+
std = torch.tensor([0.26862954,0.26130258,0.27577711], device=image.device, dtype=image.dtype)
15+
scale = (size / min(image.shape[1], image.shape[2]))
16+
image = torch.nn.functional.interpolate(image.movedim(-1, 1), size=(round(scale * image.shape[1]), round(scale * image.shape[2])), mode="bicubic", antialias=True)
17+
h = (image.shape[2] - size)//2
18+
w = (image.shape[3] - size)//2
19+
image = image[:,:,h:h+size,w:w+size]
20+
image = torch.clip((255. * image), 0, 255).round() / 255.0
21+
return (image - mean.view([3,1,1])) / std.view([3,1,1])
1022

1123
class ClipVisionModel():
1224
def __init__(self, json_config):
@@ -23,25 +35,12 @@ def __init__(self, json_config):
2335
self.model.to(self.dtype)
2436

2537
self.patcher = comfy.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
26-
self.processor = CLIPImageProcessor(crop_size=224,
27-
do_center_crop=True,
28-
do_convert_rgb=True,
29-
do_normalize=True,
30-
do_resize=True,
31-
image_mean=[ 0.48145466,0.4578275,0.40821073],
32-
image_std=[0.26862954,0.26130258,0.27577711],
33-
resample=3, #bicubic
34-
size=224)
35-
3638
def load_sd(self, sd):
3739
return self.model.load_state_dict(sd, strict=False)
3840

3941
def encode_image(self, image):
40-
img = torch.clip((255. * image), 0, 255).round().int()
41-
img = list(map(lambda a: a, img))
42-
inputs = self.processor(images=img, return_tensors="pt")
4342
comfy.model_management.load_model_gpu(self.patcher)
44-
pixel_values = inputs['pixel_values'].to(self.load_device)
43+
pixel_values = clip_preprocess(image.to(self.load_device))
4544

4645
if self.dtype != torch.float32:
4746
precision_scope = torch.autocast
@@ -92,8 +91,11 @@ def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
9291
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_g.json")
9392
elif "vision_model.encoder.layers.30.layer_norm1.weight" in sd:
9493
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_h.json")
95-
else:
94+
elif "vision_model.encoder.layers.22.layer_norm1.weight" in sd:
9695
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl.json")
96+
else:
97+
return None
98+
9799
clip = ClipVisionModel(json_config)
98100
m, u = clip.load_sd(sd)
99101
if len(m) > 0:

comfy/conds.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import enum
2+
import torch
3+
import math
4+
import comfy.utils
5+
6+
7+
def lcm(a, b): #TODO: eventually replace by math.lcm (added in python3.9)
8+
return abs(a*b) // math.gcd(a, b)
9+
10+
class CONDRegular:
11+
def __init__(self, cond):
12+
self.cond = cond
13+
14+
def _copy_with(self, cond):
15+
return self.__class__(cond)
16+
17+
def process_cond(self, batch_size, device, **kwargs):
18+
return self._copy_with(comfy.utils.repeat_to_batch_size(self.cond, batch_size).to(device))
19+
20+
def can_concat(self, other):
21+
if self.cond.shape != other.cond.shape:
22+
return False
23+
return True
24+
25+
def concat(self, others):
26+
conds = [self.cond]
27+
for x in others:
28+
conds.append(x.cond)
29+
return torch.cat(conds)
30+
31+
class CONDNoiseShape(CONDRegular):
32+
def process_cond(self, batch_size, device, area, **kwargs):
33+
data = self.cond[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]]
34+
return self._copy_with(comfy.utils.repeat_to_batch_size(data, batch_size).to(device))
35+
36+
37+
class CONDCrossAttn(CONDRegular):
38+
def can_concat(self, other):
39+
s1 = self.cond.shape
40+
s2 = other.cond.shape
41+
if s1 != s2:
42+
if s1[0] != s2[0] or s1[2] != s2[2]: #these 2 cases should not happen
43+
return False
44+
45+
mult_min = lcm(s1[1], s2[1])
46+
diff = mult_min // min(s1[1], s2[1])
47+
if diff > 4: #arbitrary limit on the padding because it's probably going to impact performance negatively if it's too much
48+
return False
49+
return True
50+
51+
def concat(self, others):
52+
conds = [self.cond]
53+
crossattn_max_len = self.cond.shape[1]
54+
for x in others:
55+
c = x.cond
56+
crossattn_max_len = lcm(crossattn_max_len, c.shape[1])
57+
conds.append(c)
58+
59+
out = []
60+
for c in conds:
61+
if c.shape[1] < crossattn_max_len:
62+
c = c.repeat(1, crossattn_max_len // c.shape[1], 1) #padding with repeat doesn't change result
63+
out.append(c)
64+
return torch.cat(out)

comfy/controlnet.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ def get_control(self, x_noisy, t, cond, batched_number):
156156

157157

158158
context = cond['c_crossattn']
159-
y = cond.get('c_adm', None)
159+
y = cond.get('y', None)
160160
if y is not None:
161161
y = y.to(self.control_model.dtype)
162162
control = self.control_model(x=x_noisy.to(self.control_model.dtype), hint=self.cond_hint, timesteps=t, context=context.to(self.control_model.dtype), y=y)
@@ -416,7 +416,7 @@ def get_control(self, x_noisy, t, cond, batched_number):
416416
if control_prev is not None:
417417
return control_prev
418418
else:
419-
return {}
419+
return None
420420

421421
if self.cond_hint is None or x_noisy.shape[2] * 8 != self.cond_hint.shape[2] or x_noisy.shape[3] * 8 != self.cond_hint.shape[3]:
422422
if self.cond_hint is not None:

comfy/diffusers_load.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def load_diffusers(model_path, output_vae=True, output_clip=True, embedding_dire
3131

3232
vae = None
3333
if output_vae:
34-
vae = comfy.sd.VAE(ckpt_path=vae_path)
34+
sd = comfy.utils.load_torch_file(vae_path)
35+
vae = comfy.sd.VAE(sd=sd)
3536

3637
return (unet, clip, vae)

0 commit comments

Comments
 (0)