From 53443f6fb0ac023f86174ff3a1435c06251c7479 Mon Sep 17 00:00:00 2001 From: db0 Date: Sat, 22 Jun 2024 15:29:55 +0200 Subject: [PATCH 1/9] feat: layer diffuse --- hordelib/horde.py | 17 ++++ hordelib/nodes/comfyui-layerdiffuse | 1 + .../pipelines/pipeline_stable_diffusion.json | 34 ++++++- pyproject.toml | 2 +- requirements.txt | 1 + tests/test_horde_inference_layerdiffusion.py | 92 +++++++++++++++++++ 6 files changed, 145 insertions(+), 2 deletions(-) create mode 160000 hordelib/nodes/comfyui-layerdiffuse create mode 100644 tests/test_horde_inference_layerdiffusion.py diff --git a/hordelib/horde.py b/hordelib/horde.py index f9884f6c..bd14f329 100644 --- a/hordelib/horde.py +++ b/hordelib/horde.py @@ -174,6 +174,7 @@ class HordeLib: "extra_source_images": {"datatype": list, "default": []}, # Stable Cascade Remix "extra_texts": {"datatype": list, "default": []}, # QR Codes (for now) "workflow": {"datatype": str, "default": "auto_detect"}, + "transparent": {"datatype": bool, "default": False}, } EXTRA_IMAGES_SCHEMA = { @@ -1029,6 +1030,22 @@ def _final_pipeline_adjustments(self, payload, pipeline_data) -> tuple[dict, lis pipeline_params["controlnet_qr_model_loader.control_net_name"] = ( "control_v1p_sd15_qrcode_monster_v2.safetensors" ) + if payload.get("transparent") is True: + if SharedModelManager.manager.compvis: + model_details = SharedModelManager.manager.compvis.get_model_reference_info(payload["model_name"]) + # SD2, Cascade and SD3 not supported + if model_details and model_details.get("baseline") in ["stable diffusion 1", "stable_diffusion_xl"]: + self.generator.reconnect_input(pipeline_data, "sampler.model", "layer_diffuse_apply") + self.generator.reconnect_input(pipeline_data, "layer_diffuse_apply.model", "model_loader") + self.generator.reconnect_input(pipeline_data, "output_image.images", "layer_diffuse_decode_rgba") + self.generator.reconnect_input(pipeline_data, "layer_diffuse_decode_rgba.images", "vae_decode") + if model_details.get("baseline") == "stable diffusion 1": + pipeline_params["layer_diffuse_apply.config"] = "SD15, Attention Injection, attn_sharing" + pipeline_params["layer_diffuse_decode_rgba.sd_version"] = "SD15" + else: + pipeline_params["layer_diffuse_apply.config"] = "SDXL, Conv Injection" + pipeline_params["layer_diffuse_decode_rgba.sd_version"] = "SDXL" + return pipeline_params, faults def _get_appropriate_pipeline(self, params): diff --git a/hordelib/nodes/comfyui-layerdiffuse b/hordelib/nodes/comfyui-layerdiffuse new file mode 160000 index 00000000..c5f1c0aa --- /dev/null +++ b/hordelib/nodes/comfyui-layerdiffuse @@ -0,0 +1 @@ +Subproject commit c5f1c0aa45592d2f48764472db3f7d2da622b6f1 diff --git a/hordelib/pipelines/pipeline_stable_diffusion.json b/hordelib/pipelines/pipeline_stable_diffusion.json index 983dd28d..f27780eb 100644 --- a/hordelib/pipelines/pipeline_stable_diffusion.json +++ b/hordelib/pipelines/pipeline_stable_diffusion.json @@ -1,7 +1,7 @@ { "3": { "inputs": { - "seed": 62706718437716, + "seed": 325847265780417, "steps": 20, "cfg": 8, "sampler_name": "euler", @@ -155,5 +155,37 @@ "_meta": { "title": "repeat_image_batch" } + }, + "16": { + "inputs": { + "config": "SD15, Attention Injection, attn_sharing", + "weight": 1, + "model": [ + "4", + 0 + ] + }, + "class_type": "LayeredDiffusionApply", + "_meta": { + "title": "layer_diffuse_apply" + } + }, + "17": { + "inputs": { + "sd_version": "SD15", + "sub_batch_size": 16, + "samples": [ + "3", + 0 + ], + "images": [ + "14", + 0 + ] + }, + "class_type": "LayeredDiffusionDecodeRGBA", + "_meta": { + "title": "layer_diffuse_decode_rgba" + } } } diff --git a/pyproject.toml b/pyproject.toml index 14809ae8..8239c211 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,7 +77,7 @@ exclude = ''' [tool.ruff] # XXX this isn't part of CI yet line-length=119 -exclude=["ComfyUI", "comfy_controlnet_preprocessors", "facerestore", "comfy_qr", "build"] +exclude=["ComfyUI", "comfy_controlnet_preprocessors", "facerestore", "comfy_qr", "comfyui-layerdiffuse", "build"] ignore=[ # "F401", # imported but unused "E402", # Module level import not at top of file diff --git a/requirements.txt b/requirements.txt index bbc3bedf..1366bb3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ horde_sdk>=0.9.1 horde_model_reference>=0.5.2 pydantic +numpy==1.26.4 torch>=2.1.0 # xformers>=0.0.19 torchvision diff --git a/tests/test_horde_inference_layerdiffusion.py b/tests/test_horde_inference_layerdiffusion.py new file mode 100644 index 00000000..c4deec27 --- /dev/null +++ b/tests/test_horde_inference_layerdiffusion.py @@ -0,0 +1,92 @@ +# test_horde.py + +from PIL import Image + +from hordelib.horde import HordeLib + + +class TestHordeInferenceTransparent: + def test_layerdiffuse_sd15( + self, + hordelib_instance: HordeLib, + stable_diffusion_model_name_for_testing: str, + ): + data = { + "sampler_name": "k_euler", + "cfg_scale": 8, + "denoising_strength": 1.0, + "seed": 1312, + "height": 512, + "width": 512, + "karras": False, + "tiling": False, + "hires_fix": False, + "transparent": True, + "clip_skip": 1, + "control_type": None, + "image_is_control": False, + "return_control_map": False, + "prompt": "an ancient digital AI hydra monster###watermark, text", + "ddim_steps": 25, + "n_iter": 2, + "model": stable_diffusion_model_name_for_testing, + } + image_results = hordelib_instance.basic_inference(data) + + assert len(image_results) == 2 + + img_pairs_to_check = [] + + img_filename_base = "layer_diffusion_sd15_n_iter_{0}.png" + + for i, image_result in enumerate(image_results): + assert image_result.image is not None + assert isinstance(image_result.image, Image.Image) + + img_filename = img_filename_base.format(i) + + image_result.image.save(f"images/{img_filename}", quality=100) + img_pairs_to_check.append((f"images_expected/{img_filename}", image_result.image)) + + def test_layerdiffuse_sdxl( + self, + hordelib_instance: HordeLib, + sdxl_refined_model_name: str, + ): + data = { + "sampler_name": "k_euler", + "cfg_scale": 8, + "denoising_strength": 1.0, + "seed": 1312, + "height": 1024, + "width": 1024, + "karras": False, + "tiling": False, + "hires_fix": False, + "transparent": True, + "clip_skip": 1, + "control_type": None, + "image_is_control": False, + "return_control_map": False, + "prompt": "an ancient digital AI hydra monster###watermark, text", + "ddim_steps": 25, + "n_iter": 2, + "model": sdxl_refined_model_name, + } + + image_results = hordelib_instance.basic_inference(data) + + assert len(image_results) == 2 + + img_pairs_to_check = [] + + img_filename_base = "layer_diffusion_sdxl_n_iter_{0}.png" + + for i, image_result in enumerate(image_results): + assert image_result.image is not None + assert isinstance(image_result.image, Image.Image) + + img_filename = img_filename_base.format(i) + + image_result.image.save(f"images/{img_filename}", quality=100) + img_pairs_to_check.append((f"images_expected/{img_filename}", image_result.image)) From c07ee1d73051ba76515836d055dcaab71f780110 Mon Sep 17 00:00:00 2001 From: db0 Date: Sat, 22 Jun 2024 15:53:32 +0200 Subject: [PATCH 2/9] fix: commit layerdiffuse custom node correctly --- hordelib/nodes/comfyui-layerdiffuse | 1 - hordelib/nodes/comfyui_layerdiffuse/LICENSE | 201 ++++ hordelib/nodes/comfyui_layerdiffuse/README.md | 65 ++ .../nodes/comfyui_layerdiffuse/__init__.py | 3 + .../layer_diffusion_cond_example.json | 668 ++++++++++++ .../examples/layer_diffusion_cond_fg_all.json | 951 ++++++++++++++++++ .../layer_diffusion_cond_joint_bg.json | 723 +++++++++++++ .../layer_diffusion_cond_joint_fg.json | 480 +++++++++ .../examples/layer_diffusion_diff_bg.json | 750 ++++++++++++++ .../layer_diffusion_diff_bg_stop_at.json | 877 ++++++++++++++++ .../examples/layer_diffusion_diff_fg.json | 686 +++++++++++++ .../examples/layer_diffusion_fg_example.json | 733 ++++++++++++++ .../layer_diffusion_fg_example_rgba.json | 511 ++++++++++ .../examples/layer_diffusion_joint.json | 703 +++++++++++++ .../comfyui_layerdiffuse/layered_diffusion.py | 659 ++++++++++++ .../lib_layerdiffusion/__init__.py | 0 .../lib_layerdiffusion/attention_sharing.py | 327 ++++++ .../lib_layerdiffusion/enums.py | 23 + .../lib_layerdiffusion/models.py | 296 ++++++ .../lib_layerdiffusion/utils.py | 139 +++ .../nodes/comfyui_layerdiffuse/pyproject.toml | 15 + .../comfyui_layerdiffuse/requirements.txt | 2 + mypy.ini | 4 + pyproject.toml | 3 +- 24 files changed, 8818 insertions(+), 2 deletions(-) delete mode 160000 hordelib/nodes/comfyui-layerdiffuse create mode 100644 hordelib/nodes/comfyui_layerdiffuse/LICENSE create mode 100644 hordelib/nodes/comfyui_layerdiffuse/README.md create mode 100644 hordelib/nodes/comfyui_layerdiffuse/__init__.py create mode 100644 hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_cond_example.json create mode 100644 hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_cond_fg_all.json create mode 100644 hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_cond_joint_bg.json create mode 100644 hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_cond_joint_fg.json create mode 100644 hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_diff_bg.json create mode 100644 hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_diff_bg_stop_at.json create mode 100644 hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_diff_fg.json create mode 100644 hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_fg_example.json create mode 100644 hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_fg_example_rgba.json create mode 100644 hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_joint.json create mode 100644 hordelib/nodes/comfyui_layerdiffuse/layered_diffusion.py create mode 100644 hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/__init__.py create mode 100644 hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/attention_sharing.py create mode 100644 hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/enums.py create mode 100644 hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/models.py create mode 100644 hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/utils.py create mode 100644 hordelib/nodes/comfyui_layerdiffuse/pyproject.toml create mode 100644 hordelib/nodes/comfyui_layerdiffuse/requirements.txt diff --git a/hordelib/nodes/comfyui-layerdiffuse b/hordelib/nodes/comfyui-layerdiffuse deleted file mode 160000 index c5f1c0aa..00000000 --- a/hordelib/nodes/comfyui-layerdiffuse +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c5f1c0aa45592d2f48764472db3f7d2da622b6f1 diff --git a/hordelib/nodes/comfyui_layerdiffuse/LICENSE b/hordelib/nodes/comfyui_layerdiffuse/LICENSE new file mode 100644 index 00000000..b09cd785 --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/LICENSE @@ -0,0 +1,201 @@ +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/hordelib/nodes/comfyui_layerdiffuse/README.md b/hordelib/nodes/comfyui_layerdiffuse/README.md new file mode 100644 index 00000000..2e496ff1 --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/README.md @@ -0,0 +1,65 @@ +# ComfyUI-layerdiffuse +ComfyUI implementation of https://github.com/layerdiffusion/LayerDiffuse. + +## Installation +Download the repository and unpack it into the custom_nodes folder in the ComfyUI installation directory. + +Or clone via GIT, starting from ComfyUI installation directory: +```bash +cd custom_nodes +git clone git@github.com:huchenlei/ComfyUI-layerdiffuse.git +``` + +Run `pip install -r requirements.txt` to install python dependencies. You might experience version conflict on diffusers if you have other extensions that depend on other versions of diffusers. In this case, it is recommended to set up separate Python venvs. + +## Workflows +### [Generate foreground](https://github.com/huchenlei/ComfyUI-layerdiffuse/blob/main/examples/layer_diffusion_fg_example_rgba.json) +![rgba](https://github.com/huchenlei/ComfyUI-layerdiffuse/assets/20929282/5e6085e5-d997-4a0a-b589-257d65eb1eb2) + +### [Generate foreground (RGB + alpha)](https://github.com/huchenlei/ComfyUI-layerdiffuse/blob/main/examples/layer_diffusion_fg_example.json) +If you want more control of getting RGB images and alpha channel mask separately, you can use this workflow. +![readme1](https://github.com/huchenlei/ComfyUI-layerdiffuse/assets/20929282/4825b81c-7089-4806-bce7-777229421707) + +### [Blending (FG/BG)](https://github.com/huchenlei/ComfyUI-layerdiffuse/blob/main/examples/layer_diffusion_cond_example.json) +Blending given FG +![fg_cond](https://github.com/huchenlei/ComfyUI-layerdiffuse/assets/20929282/7f7dee80-6e57-4570-b304-d1f7e5dc3aad) + +Blending given BG +![bg_cond](https://github.com/huchenlei/ComfyUI-layerdiffuse/assets/20929282/e3a79218-6123-453b-a54b-2f338db1c12d) + +### [Extract FG from Blended + BG](https://github.com/huchenlei/ComfyUI-layerdiffuse/blob/main/examples/layer_diffusion_diff_fg.json) +![diff_bg](https://github.com/huchenlei/ComfyUI-layerdiffuse/assets/20929282/45c7207d-72ff-4fb0-9c91-687040781837) + +### [Extract BG from Blended + FG](https://github.com/huchenlei/ComfyUI-layerdiffuse/blob/main/examples/layer_diffusion_diff_bg.json) +[Forge impl's sanity check](https://github.com/layerdiffuse/sd-forge-layerdiffuse#sanity-check) sets `Stop at` to 0.5 to get better quality BG. +This workflow might be inferior compared to other object removal workflows. +![diff_fg](https://github.com/huchenlei/ComfyUI-layerdiffuse/assets/20929282/05a10add-68b0-473a-acee-5853e4720322) + +### [Extract BG from Blended + FG (Stop at 0.5)](https://github.com/huchenlei/ComfyUI-layerdiffuse/blob/main/examples/layer_diffusion_diff_bg_stop_at.json) +In [SD Forge impl](https://github.com/layerdiffuse/sd-forge-layerdiffuse), there is a `stop at` param that determines when +layer diffuse should stop in the denoising process. In the background, what this param does is unapply the LoRA and c_concat cond after a certain step +threshold. This is hard/risky to implement directly in ComfyUI as it requires manually loading a model that has every change except the layer diffusion +change applied. A workaround in ComfyUI is to have another img2img pass on the layer diffuse result to simulate the effect of `stop at` param. +![diff_fg_stop_at](https://github.com/huchenlei/ComfyUI-layerdiffuse/assets/20929282/e383c9d3-2d47-40c2-b764-b0bd48243ee8) + + +### [Generate FG from BG combined](https://github.com/huchenlei/ComfyUI-layerdiffuse/blob/main/examples/layer_diffusion_cond_fg_all.json) +Combines previous workflows to generate blended and FG given BG. We found that there are some color variations in the extracted FG. Need to confirm +with layer diffusion authors whether this is expected. +![fg_all](https://github.com/huchenlei/ComfyUI-layerdiffuse/assets/20929282/f4c18585-961a-473a-a616-aa3776bacd41) + +### [2024-3-9] [Generate FG + Blended given BG](https://github.com/huchenlei/ComfyUI-layerdiffuse/blob/main/examples/layer_diffusion_cond_joint_bg.json) +Need batch size = 2N. Currently only for SD15. +![sd15_cond_joint_bg](https://github.com/huchenlei/ComfyUI-layerdiffuse/assets/20929282/9bbfe5c1-14a0-421d-bf06-85e301bf8065) + +### [2024-3-9] [Generate BG + Blended given FG](https://github.com/huchenlei/ComfyUI-layerdiffuse/blob/main/examples/layer_diffusion_cond_joint_fg.json) +Need batch size = 2N. Currently only for SD15. +![sd15_cond_joint_fg](https://github.com/huchenlei/ComfyUI-layerdiffuse/assets/20929282/65af8b38-cf4c-4667-b76f-3013a0be0a48) + +### [2024-3-9] [Generate BG + FG + Blended together](https://github.com/huchenlei/ComfyUI-layerdiffuse/blob/main/examples/layer_diffusion_joint.json) +Need batch size = 3N. Currently only for SD15. +![sd15_joint](https://github.com/huchenlei/ComfyUI-layerdiffuse/assets/20929282/e5545809-e3fb-4683-acf5-8728195cb2bc) + +## Note +- Currently only SDXL/SD15 are supported. See https://github.com/layerdiffuse/sd-forge-layerdiffuse#model-notes for more details. +- To decode RGBA result, the generation dimension must be multiple of 64. Otherwise, you will get decode error: ![image](https://github.com/huchenlei/ComfyUI-layerdiffuse/assets/20929282/ff055f99-9297-4ff1-9a33-065aaadcf98e) diff --git a/hordelib/nodes/comfyui_layerdiffuse/__init__.py b/hordelib/nodes/comfyui_layerdiffuse/__init__.py new file mode 100644 index 00000000..15d99aff --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/__init__.py @@ -0,0 +1,3 @@ +from .layered_diffusion import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS + +__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"] diff --git a/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_cond_example.json b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_cond_example.json new file mode 100644 index 00000000..02e25597 --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_cond_example.json @@ -0,0 +1,668 @@ +{ + "last_node_id": 35, + "last_link_id": 52, + "nodes": [ + { + "id": 4, + "type": "CheckpointLoaderSimple", + "pos": [ + 5, + 479 + ], + "size": { + "0": 315, + "1": 98 + }, + "flags": {}, + "order": 0, + "mode": 0, + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 38 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 3, + 5 + ], + "slot_index": 1 + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 22, + 49 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "juggernautXL_v8Rundiffusion.safetensors" + ] + }, + { + "id": 29, + "type": "VAEEncode", + "pos": [ + 212, + -22 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "pixels", + "type": "IMAGE", + "link": 51 + }, + { + "name": "vae", + "type": "VAE", + "link": 49, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 47 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEEncode" + } + }, + { + "id": 30, + "type": "LoadImage", + "pos": [ + -363, + 209 + ], + "size": { + "0": 315, + "1": 314 + }, + "flags": {}, + "order": 1, + "mode": 0, + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 50 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "309219693-e7e2d80e-ffbe-4724-812a-5139a88027e3.png", + "image" + ] + }, + { + "id": 20, + "type": "PreviewImage", + "pos": [ + 1556, + 138 + ], + "size": { + "0": 611.2340087890625, + "1": 633.9354858398438 + }, + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 29 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": { + "0": 422.84503173828125, + "1": 164.31304931640625 + }, + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 3 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 39 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "old man sitting, high quality\n\n" + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": { + "0": 425.27801513671875, + "1": 180.6060791015625 + }, + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 5 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 40 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "text, watermark" + ] + }, + { + "id": 34, + "type": "PreviewImage", + "pos": [ + 213, + -346 + ], + "size": { + "0": 210, + "1": 246 + }, + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 52 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 14, + "type": "VAEDecode", + "pos": [ + 1275, + 198 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 21 + }, + { + "name": "vae", + "type": "VAE", + "link": 22, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 29 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 913, + 181 + ], + "size": { + "0": 315, + "1": 262 + }, + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 41 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 46 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 45 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 2 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 21 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 100676796092754, + "randomize", + 20, + 8, + "euler", + "normal", + 1 + ] + }, + { + "id": 5, + "type": "EmptyLatentImage", + "pos": [ + 475, + 666 + ], + "size": { + "0": 315, + "1": 106 + }, + "flags": {}, + "order": 2, + "mode": 0, + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 2 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "EmptyLatentImage" + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 33, + "type": "ImageResize+", + "pos": [ + -146, + -16 + ], + "size": { + "0": 315, + "1": 170 + }, + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 50 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 51, + 52 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "width", + "type": "INT", + "links": null, + "shape": 3 + }, + { + "name": "height", + "type": "INT", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "ImageResize+" + }, + "widgets_values": [ + 1024, + 1024, + "nearest", + false + ] + }, + { + "id": 28, + "type": "LayeredDiffusionCondApply", + "pos": [ + 465, + -26 + ], + "size": { + "0": 315, + "1": 142 + }, + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 38 + }, + { + "name": "cond", + "type": "CONDITIONING", + "link": 39 + }, + { + "name": "uncond", + "type": "CONDITIONING", + "link": 40 + }, + { + "name": "latent", + "type": "LATENT", + "link": 47 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 41 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 46 + ], + "shape": 3, + "slot_index": 1 + }, + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 45 + ], + "shape": 3, + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "LayeredDiffusionCondApply" + }, + "widgets_values": [ + "SDXL, Background", + 1 + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "links": [ + [ + 2, + 5, + 0, + 3, + 3, + "LATENT" + ], + [ + 3, + 4, + 1, + 6, + 0, + "CLIP" + ], + [ + 5, + 4, + 1, + 7, + 0, + "CLIP" + ], + [ + 21, + 3, + 0, + 14, + 0, + "LATENT" + ], + [ + 22, + 4, + 2, + 14, + 1, + "VAE" + ], + [ + 29, + 14, + 0, + 20, + 0, + "IMAGE" + ], + [ + 38, + 4, + 0, + 28, + 0, + "MODEL" + ], + [ + 39, + 6, + 0, + 28, + 1, + "CONDITIONING" + ], + [ + 40, + 7, + 0, + 28, + 2, + "CONDITIONING" + ], + [ + 41, + 28, + 0, + 3, + 0, + "MODEL" + ], + [ + 45, + 28, + 2, + 3, + 2, + "CONDITIONING" + ], + [ + 46, + 28, + 1, + 3, + 1, + "CONDITIONING" + ], + [ + 47, + 29, + 0, + 28, + 3, + "LATENT" + ], + [ + 49, + 4, + 2, + 29, + 1, + "VAE" + ], + [ + 50, + 30, + 0, + 33, + 0, + "IMAGE" + ], + [ + 51, + 33, + 0, + 29, + 0, + "IMAGE" + ], + [ + 52, + 33, + 0, + 34, + 0, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} diff --git a/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_cond_fg_all.json b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_cond_fg_all.json new file mode 100644 index 00000000..e2134a9e --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_cond_fg_all.json @@ -0,0 +1,951 @@ +{ + "last_node_id": 56, + "last_link_id": 104, + "nodes": [ + { + "id": 14, + "type": "VAEDecode", + "pos": [ + 1286, + 187 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 21 + }, + { + "name": "vae", + "type": "VAE", + "link": 22, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 65 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 40, + "type": "LayeredDiffusionDecodeRGBA", + "pos": [ + 1533, + 189 + ], + "size": { + "0": 243.60000610351562, + "1": 102 + }, + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 67 + }, + { + "name": "images", + "type": "IMAGE", + "link": 65 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 66 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LayeredDiffusionDecodeRGBA" + }, + "widgets_values": [ + "SDXL", + 16 + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 47, + "type": "VAEDecode", + "pos": [ + 1360, + 900 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 74 + }, + { + "name": "vae", + "type": "VAE", + "link": 104, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 96 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 49, + "type": "VAEEncode", + "pos": [ + 280, + 690 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "pixels", + "type": "IMAGE", + "link": 77 + }, + { + "name": "vae", + "type": "VAE", + "link": 102, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 89, + 97 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEEncode" + } + }, + { + "id": 56, + "type": "PreviewImage", + "pos": [ + 1800, + 900 + ], + "size": { + "0": 611.2340087890625, + "1": 633.9354858398438 + }, + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 96 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": { + "0": 422.84503173828125, + "1": 164.31304931640625 + }, + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 3 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 56, + 99 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "an old man sitting, high quality\n\n" + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": { + "0": 425.27801513671875, + "1": 180.6060791015625 + }, + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 5 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 57, + 100 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "text, watermark" + ] + }, + { + "id": 42, + "type": "KSampler", + "pos": [ + 990, + 850 + ], + "size": { + "0": 315, + "1": 262 + }, + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 93 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 94 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 95 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 71 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 74, + 98 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 216474886443753, + "randomize", + 20, + 8, + "euler", + "normal", + 1 + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 913, + 182 + ], + "size": { + "0": 315, + "1": 262 + }, + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 62 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 63 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 64 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 101, + "slot_index": 3 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 21, + 67 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 137168876920770, + "randomize", + 20, + 8, + "euler", + "normal", + 1 + ] + }, + { + "id": 4, + "type": "CheckpointLoaderSimple", + "pos": [ + -54, + 488 + ], + "size": { + "0": 315, + "1": 98 + }, + "flags": {}, + "order": 0, + "mode": 0, + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 55, + 103 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 3, + 5 + ], + "slot_index": 1 + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 22, + 102, + 104 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "juggernautXL_v8Rundiffusion.safetensors" + ] + }, + { + "id": 50, + "type": "LoadImage", + "pos": [ + -59, + 686 + ], + "size": { + "0": 315, + "1": 314 + }, + "flags": {}, + "order": 1, + "mode": 0, + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 77 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "chair.png", + "image" + ] + }, + { + "id": 44, + "type": "EmptyLatentImage", + "pos": [ + 524, + 944 + ], + "size": { + "0": 315, + "1": 106 + }, + "flags": {}, + "order": 2, + "mode": 0, + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 71, + 101 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "EmptyLatentImage" + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 55, + "type": "LayeredDiffusionCondApply", + "pos": [ + 530, + 680 + ], + "size": { + "0": 315, + "1": 142 + }, + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 103, + "slot_index": 0 + }, + { + "name": "cond", + "type": "CONDITIONING", + "link": 99 + }, + { + "name": "uncond", + "type": "CONDITIONING", + "link": 100 + }, + { + "name": "latent", + "type": "LATENT", + "link": 89, + "slot_index": 3 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 93 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 94 + ], + "shape": 3, + "slot_index": 1 + }, + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 95 + ], + "shape": 3, + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "LayeredDiffusionCondApply" + }, + "widgets_values": [ + "SDXL, Background", + 1 + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 37, + "type": "LayeredDiffusionDiffApply", + "pos": [ + 457, + -37 + ], + "size": { + "0": 342.5999755859375, + "1": 162 + }, + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 55, + "slot_index": 0 + }, + { + "name": "cond", + "type": "CONDITIONING", + "link": 56, + "slot_index": 1 + }, + { + "name": "uncond", + "type": "CONDITIONING", + "link": 57, + "slot_index": 2 + }, + { + "name": "blended_latent", + "type": "LATENT", + "link": 98 + }, + { + "name": "latent", + "type": "LATENT", + "link": 97 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 62 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 63 + ], + "shape": 3, + "slot_index": 1 + }, + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 64 + ], + "shape": 3, + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "LayeredDiffusionDiffApply" + }, + "widgets_values": [ + "SDXL, Background", + 1 + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 20, + "type": "PreviewImage", + "pos": [ + 1815, + 194 + ], + "size": { + "0": 611.2340087890625, + "1": 633.9354858398438 + }, + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 66 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + } + ], + "links": [ + [ + 3, + 4, + 1, + 6, + 0, + "CLIP" + ], + [ + 5, + 4, + 1, + 7, + 0, + "CLIP" + ], + [ + 21, + 3, + 0, + 14, + 0, + "LATENT" + ], + [ + 22, + 4, + 2, + 14, + 1, + "VAE" + ], + [ + 55, + 4, + 0, + 37, + 0, + "MODEL" + ], + [ + 56, + 6, + 0, + 37, + 1, + "CONDITIONING" + ], + [ + 57, + 7, + 0, + 37, + 2, + "CONDITIONING" + ], + [ + 62, + 37, + 0, + 3, + 0, + "MODEL" + ], + [ + 63, + 37, + 1, + 3, + 1, + "CONDITIONING" + ], + [ + 64, + 37, + 2, + 3, + 2, + "CONDITIONING" + ], + [ + 65, + 14, + 0, + 40, + 1, + "IMAGE" + ], + [ + 66, + 40, + 0, + 20, + 0, + "IMAGE" + ], + [ + 67, + 3, + 0, + 40, + 0, + "LATENT" + ], + [ + 71, + 44, + 0, + 42, + 3, + "LATENT" + ], + [ + 74, + 42, + 0, + 47, + 0, + "LATENT" + ], + [ + 77, + 50, + 0, + 49, + 0, + "IMAGE" + ], + [ + 89, + 49, + 0, + 55, + 3, + "LATENT" + ], + [ + 93, + 55, + 0, + 42, + 0, + "MODEL" + ], + [ + 94, + 55, + 1, + 42, + 1, + "CONDITIONING" + ], + [ + 95, + 55, + 2, + 42, + 2, + "CONDITIONING" + ], + [ + 96, + 47, + 0, + 56, + 0, + "IMAGE" + ], + [ + 97, + 49, + 0, + 37, + 4, + "LATENT" + ], + [ + 98, + 42, + 0, + 37, + 3, + "LATENT" + ], + [ + 99, + 6, + 0, + 55, + 1, + "CONDITIONING" + ], + [ + 100, + 7, + 0, + 55, + 2, + "CONDITIONING" + ], + [ + 101, + 44, + 0, + 3, + 3, + "LATENT" + ], + [ + 102, + 4, + 2, + 49, + 1, + "VAE" + ], + [ + 103, + 4, + 0, + 55, + 0, + "MODEL" + ], + [ + 104, + 4, + 2, + 47, + 1, + "VAE" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} diff --git a/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_cond_joint_bg.json b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_cond_joint_bg.json new file mode 100644 index 00000000..57a90d08 --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_cond_joint_bg.json @@ -0,0 +1,723 @@ +{ + "last_node_id": 53, + "last_link_id": 88, + "nodes": [ + { + "id": 33, + "type": "ImageResize+", + "pos": [ + 50, + -10 + ], + "size": { + "0": 315, + "1": 170 + }, + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 50 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 52, + 54 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "width", + "type": "INT", + "links": null, + "shape": 3 + }, + { + "name": "height", + "type": "INT", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "ImageResize+" + }, + "widgets_values": [ + 512, + 512, + "nearest", + false + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": { + "0": 422.84503173828125, + "1": 164.31304931640625 + }, + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 3 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 64 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "old man sitting, high quality\n\n" + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": { + "0": 425.27801513671875, + "1": 180.6060791015625 + }, + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 5 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 65 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "text, watermark" + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 915, + 176 + ], + "size": { + "0": 315, + "1": 262 + }, + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 56 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 64 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 65 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 2 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 70, + 84 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 674865838825506, + "randomize", + 20, + 8, + "euler", + "normal", + 1 + ] + }, + { + "id": 50, + "type": "PreviewImage", + "pos": [ + 2040, + -120 + ], + "size": { + "0": 210, + "1": 246 + }, + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 85 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 5, + "type": "EmptyLatentImage", + "pos": [ + 475, + 666 + ], + "size": { + "0": 315, + "1": 106 + }, + "flags": {}, + "order": 0, + "mode": 0, + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 2 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "EmptyLatentImage" + }, + "widgets_values": [ + 512, + 512, + 4 + ] + }, + { + "id": 44, + "type": "VAEDecode", + "pos": [ + 1260, + 180 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 70 + }, + { + "name": "vae", + "type": "VAE", + "link": 88, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 75, + 83 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 4, + "type": "CheckpointLoaderSimple", + "pos": [ + 5, + 479 + ], + "size": { + "0": 315, + "1": 98 + }, + "flags": {}, + "order": 1, + "mode": 0, + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 55 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 3, + 5 + ], + "slot_index": 1 + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 88 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "realisticVisionV20_v20.safetensors" + ] + }, + { + "id": 46, + "type": "PreviewImage", + "pos": [ + 1460, + 410 + ], + "size": [ + 406.59525756835933, + 340.5699157714844 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 75 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 34, + "type": "PreviewImage", + "pos": [ + 471, + -337 + ], + "size": { + "0": 210, + "1": 246 + }, + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 52 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 37, + "type": "LayeredDiffusionCondJointApply", + "pos": [ + 429, + -13 + ], + "size": { + "0": 388, + "1": 138 + }, + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 55, + "slot_index": 0 + }, + { + "name": "image", + "type": "IMAGE", + "link": 54, + "slot_index": 1 + }, + { + "name": "cond", + "type": "CONDITIONING", + "link": null + }, + { + "name": "blended_cond", + "type": "CONDITIONING", + "link": null + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 56 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LayeredDiffusionCondJointApply" + }, + "widgets_values": [ + "SD15, Background, attn_sharing, Batch size (2N)" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 52, + "type": "LayeredDiffusionDecodeSplit", + "pos": [ + 1544, + 177 + ], + "size": { + "0": 315, + "1": 146 + }, + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 84 + }, + { + "name": "images", + "type": "IMAGE", + "link": 83 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 85 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 86 + ], + "shape": 3, + "slot_index": 1 + }, + { + "name": "IMAGE", + "type": "IMAGE", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "LayeredDiffusionDecodeSplit" + }, + "widgets_values": [ + 2, + "SDXL", + 16 + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 51, + "type": "PreviewImage", + "pos": [ + 2040, + 201 + ], + "size": { + "0": 210, + "1": 246 + }, + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 86 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 30, + "type": "LoadImage", + "pos": [ + -313, + -10 + ], + "size": { + "0": 315, + "1": 314 + }, + "flags": {}, + "order": 2, + "mode": 0, + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 50 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "309219693-e7e2d80e-ffbe-4724-812a-5139a88027e3.png", + "image" + ] + } + ], + "links": [ + [ + 2, + 5, + 0, + 3, + 3, + "LATENT" + ], + [ + 3, + 4, + 1, + 6, + 0, + "CLIP" + ], + [ + 5, + 4, + 1, + 7, + 0, + "CLIP" + ], + [ + 50, + 30, + 0, + 33, + 0, + "IMAGE" + ], + [ + 52, + 33, + 0, + 34, + 0, + "IMAGE" + ], + [ + 54, + 33, + 0, + 37, + 1, + "IMAGE" + ], + [ + 55, + 4, + 0, + 37, + 0, + "MODEL" + ], + [ + 56, + 37, + 0, + 3, + 0, + "MODEL" + ], + [ + 64, + 6, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 65, + 7, + 0, + 3, + 2, + "CONDITIONING" + ], + [ + 70, + 3, + 0, + 44, + 0, + "LATENT" + ], + [ + 75, + 44, + 0, + 46, + 0, + "IMAGE" + ], + [ + 83, + 44, + 0, + 52, + 1, + "IMAGE" + ], + [ + 84, + 3, + 0, + 52, + 0, + "LATENT" + ], + [ + 85, + 52, + 0, + 50, + 0, + "IMAGE" + ], + [ + 86, + 52, + 1, + 51, + 0, + "IMAGE" + ], + [ + 88, + 4, + 2, + 44, + 1, + "VAE" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} diff --git a/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_cond_joint_fg.json b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_cond_joint_fg.json new file mode 100644 index 00000000..d60eb677 --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_cond_joint_fg.json @@ -0,0 +1,480 @@ +{ + "last_node_id": 53, + "last_link_id": 90, + "nodes": [ + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": { + "0": 425.27801513671875, + "1": 180.6060791015625 + }, + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 5 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 65 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "text, watermark" + ] + }, + { + "id": 4, + "type": "CheckpointLoaderSimple", + "pos": [ + 5, + 479 + ], + "size": { + "0": 315, + "1": 98 + }, + "flags": {}, + "order": 0, + "mode": 0, + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 55 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 3, + 5 + ], + "slot_index": 1 + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 88 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "realisticVisionV20_v20.safetensors" + ] + }, + { + "id": 46, + "type": "PreviewImage", + "pos": [ + 1525, + 183 + ], + "size": [ + 406.59525756835933, + 340.5699157714844 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 75 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 37, + "type": "LayeredDiffusionCondJointApply", + "pos": [ + 436, + -13 + ], + "size": { + "0": 388, + "1": 138 + }, + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 55, + "slot_index": 0 + }, + { + "name": "image", + "type": "IMAGE", + "link": 90, + "slot_index": 1 + }, + { + "name": "cond", + "type": "CONDITIONING", + "link": null + }, + { + "name": "blended_cond", + "type": "CONDITIONING", + "link": null + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 56 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LayeredDiffusionCondJointApply" + }, + "widgets_values": [ + "SD15, Foreground, attn_sharing, Batch size (2N)" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 5, + "type": "EmptyLatentImage", + "pos": [ + 465, + 671 + ], + "size": { + "0": 315, + "1": 106 + }, + "flags": {}, + "order": 1, + "mode": 0, + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 2 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "EmptyLatentImage" + }, + "widgets_values": [ + 512, + 512, + 4 + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": { + "0": 422.84503173828125, + "1": 164.31304931640625 + }, + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 3 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 64 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "\n" + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 903, + 180 + ], + "size": { + "0": 315, + "1": 262 + }, + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 56 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 64 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 65 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 2 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 70 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 748570836161213, + "randomize", + 20, + 8, + "euler", + "normal", + 1 + ] + }, + { + "id": 44, + "type": "VAEDecode", + "pos": [ + 1258, + 184 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 70 + }, + { + "name": "vae", + "type": "VAE", + "link": 88, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 75 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 30, + "type": "LoadImage", + "pos": [ + 6, + 5 + ], + "size": { + "0": 315, + "1": 314 + }, + "flags": {}, + "order": 2, + "mode": 0, + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 90 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "dog (2).png", + "image" + ] + } + ], + "links": [ + [ + 2, + 5, + 0, + 3, + 3, + "LATENT" + ], + [ + 3, + 4, + 1, + 6, + 0, + "CLIP" + ], + [ + 5, + 4, + 1, + 7, + 0, + "CLIP" + ], + [ + 55, + 4, + 0, + 37, + 0, + "MODEL" + ], + [ + 56, + 37, + 0, + 3, + 0, + "MODEL" + ], + [ + 64, + 6, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 65, + 7, + 0, + 3, + 2, + "CONDITIONING" + ], + [ + 70, + 3, + 0, + 44, + 0, + "LATENT" + ], + [ + 75, + 44, + 0, + 46, + 0, + "IMAGE" + ], + [ + 88, + 4, + 2, + 44, + 1, + "VAE" + ], + [ + 90, + 30, + 0, + 37, + 1, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} diff --git a/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_diff_bg.json b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_diff_bg.json new file mode 100644 index 00000000..32f66511 --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_diff_bg.json @@ -0,0 +1,750 @@ +{ + "last_node_id": 40, + "last_link_id": 67, + "nodes": [ + { + "id": 4, + "type": "CheckpointLoaderSimple", + "pos": [ + 5, + 479 + ], + "size": { + "0": 315, + "1": 98 + }, + "flags": {}, + "order": 0, + "mode": 0, + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 55 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 3, + 5 + ], + "slot_index": 1 + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 22, + 49, + 58 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "juggernautXL_v8Rundiffusion.safetensors" + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": { + "0": 425.27801513671875, + "1": 180.6060791015625 + }, + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 5 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 57 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "text, watermark" + ] + }, + { + "id": 5, + "type": "EmptyLatentImage", + "pos": [ + 475, + 666 + ], + "size": { + "0": 315, + "1": 106 + }, + "flags": {}, + "order": 1, + "mode": 0, + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 2 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "EmptyLatentImage" + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 39, + "type": "VAEEncode", + "pos": [ + 201, + -391 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "pixels", + "type": "IMAGE", + "link": 59, + "slot_index": 0 + }, + { + "name": "vae", + "type": "VAE", + "link": 58, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 60 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEEncode" + } + }, + { + "id": 29, + "type": "VAEEncode", + "pos": [ + 210, + -20 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "pixels", + "type": "IMAGE", + "link": 53 + }, + { + "name": "vae", + "type": "VAE", + "link": 49, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 61 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEEncode" + } + }, + { + "id": 38, + "type": "LoadImage", + "pos": [ + -137, + -388 + ], + "size": { + "0": 288.47406005859375, + "1": 317.46051025390625 + }, + "flags": {}, + "order": 2, + "mode": 0, + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 59 + ], + "shape": 3 + }, + { + "name": "MASK", + "type": "MASK", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "old_man.png", + "image" + ] + }, + { + "id": 30, + "type": "LoadImage", + "pos": [ + -146, + -22 + ], + "size": { + "0": 315, + "1": 314 + }, + "flags": {}, + "order": 3, + "mode": 0, + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 53 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "chair.png", + "image" + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": { + "0": 422.84503173828125, + "1": 164.31304931640625 + }, + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 3 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 56 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "an old man sitting, high quality\n\n" + ] + }, + { + "id": 14, + "type": "VAEDecode", + "pos": [ + 1286, + 187 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 21 + }, + { + "name": "vae", + "type": "VAE", + "link": 22, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 65 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 913, + 182 + ], + "size": { + "0": 315, + "1": 262 + }, + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 62 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 63 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 64 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 2 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 21, + 67 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 462370085958750, + "fixed", + 20, + 8, + "euler", + "normal", + 1 + ] + }, + { + "id": 20, + "type": "PreviewImage", + "pos": [ + 1800, + 190 + ], + "size": { + "0": 611.2340087890625, + "1": 633.9354858398438 + }, + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 66 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 37, + "type": "LayeredDiffusionDiffApply", + "pos": [ + 457, + -37 + ], + "size": { + "0": 342.5999755859375, + "1": 162 + }, + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 55, + "slot_index": 0 + }, + { + "name": "cond", + "type": "CONDITIONING", + "link": 56, + "slot_index": 1 + }, + { + "name": "uncond", + "type": "CONDITIONING", + "link": 57, + "slot_index": 2 + }, + { + "name": "blended_latent", + "type": "LATENT", + "link": 60 + }, + { + "name": "latent", + "type": "LATENT", + "link": 61 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 62 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 63 + ], + "shape": 3, + "slot_index": 1 + }, + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 64 + ], + "shape": 3, + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "LayeredDiffusionDiffApply" + }, + "widgets_values": [ + "SDXL, Background", + 1 + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 40, + "type": "LayeredDiffusionDecodeRGBA", + "pos": [ + 1533, + 189 + ], + "size": { + "0": 243.60000610351562, + "1": 102 + }, + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 67 + }, + { + "name": "images", + "type": "IMAGE", + "link": 65 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 66 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LayeredDiffusionDecodeRGBA" + }, + "widgets_values": [ + "SDXL", + 16 + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "links": [ + [ + 2, + 5, + 0, + 3, + 3, + "LATENT" + ], + [ + 3, + 4, + 1, + 6, + 0, + "CLIP" + ], + [ + 5, + 4, + 1, + 7, + 0, + "CLIP" + ], + [ + 21, + 3, + 0, + 14, + 0, + "LATENT" + ], + [ + 22, + 4, + 2, + 14, + 1, + "VAE" + ], + [ + 49, + 4, + 2, + 29, + 1, + "VAE" + ], + [ + 53, + 30, + 0, + 29, + 0, + "IMAGE" + ], + [ + 55, + 4, + 0, + 37, + 0, + "MODEL" + ], + [ + 56, + 6, + 0, + 37, + 1, + "CONDITIONING" + ], + [ + 57, + 7, + 0, + 37, + 2, + "CONDITIONING" + ], + [ + 58, + 4, + 2, + 39, + 1, + "VAE" + ], + [ + 59, + 38, + 0, + 39, + 0, + "IMAGE" + ], + [ + 60, + 39, + 0, + 37, + 3, + "LATENT" + ], + [ + 61, + 29, + 0, + 37, + 4, + "LATENT" + ], + [ + 62, + 37, + 0, + 3, + 0, + "MODEL" + ], + [ + 63, + 37, + 1, + 3, + 1, + "CONDITIONING" + ], + [ + 64, + 37, + 2, + 3, + 2, + "CONDITIONING" + ], + [ + 65, + 14, + 0, + 40, + 1, + "IMAGE" + ], + [ + 66, + 40, + 0, + 20, + 0, + "IMAGE" + ], + [ + 67, + 3, + 0, + 40, + 0, + "LATENT" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} diff --git a/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_diff_bg_stop_at.json b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_diff_bg_stop_at.json new file mode 100644 index 00000000..18d9d9e3 --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_diff_bg_stop_at.json @@ -0,0 +1,877 @@ +{ + "last_node_id": 45, + "last_link_id": 86, + "nodes": [ + { + "id": 5, + "type": "EmptyLatentImage", + "pos": [ + 475, + 666 + ], + "size": { + "0": 315, + "1": 106 + }, + "flags": {}, + "order": 0, + "mode": 0, + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 2 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "EmptyLatentImage" + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 38, + "type": "LoadImage", + "pos": [ + -137, + -388 + ], + "size": { + "0": 288.47406005859375, + "1": 317.46051025390625 + }, + "flags": {}, + "order": 1, + "mode": 0, + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 59 + ], + "shape": 3 + }, + { + "name": "MASK", + "type": "MASK", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "blended.png", + "image" + ] + }, + { + "id": 39, + "type": "VAEEncode", + "pos": [ + 201, + -391 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "pixels", + "type": "IMAGE", + "link": 59, + "slot_index": 0 + }, + { + "name": "vae", + "type": "VAE", + "link": 58, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 60 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEEncode" + } + }, + { + "id": 29, + "type": "VAEEncode", + "pos": [ + 210, + -20 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "pixels", + "type": "IMAGE", + "link": 53 + }, + { + "name": "vae", + "type": "VAE", + "link": 49, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 61 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEEncode" + } + }, + { + "id": 30, + "type": "LoadImage", + "pos": [ + -146, + -22 + ], + "size": { + "0": 315, + "1": 314 + }, + "flags": {}, + "order": 2, + "mode": 0, + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 53 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "dog.png", + "image" + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": { + "0": 422.84503173828125, + "1": 164.31304931640625 + }, + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 3 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 56, + 81 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a room, high quality\n\n" + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": { + "0": 425.27801513671875, + "1": 180.6060791015625 + }, + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 5 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 57, + 82 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "text, watermark" + ] + }, + { + "id": 42, + "type": "PreviewImage", + "pos": [ + 1830, + -500 + ], + "size": { + "0": 611.2340087890625, + "1": 633.9354858398438 + }, + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 76 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 20, + "type": "PreviewImage", + "pos": [ + 1830, + 186 + ], + "size": { + "0": 611.2340087890625, + "1": 633.9354858398438 + }, + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 29 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 4, + "type": "CheckpointLoaderSimple", + "pos": [ + 5, + 479 + ], + "size": { + "0": 315, + "1": 98 + }, + "flags": {}, + "order": 3, + "mode": 0, + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 55, + 80 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 3, + 5 + ], + "slot_index": 1 + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 22, + 49, + 58, + 75 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "juggernautXL_v8Rundiffusion.safetensors" + ] + }, + { + "id": 41, + "type": "VAEDecode", + "pos": [ + 1600, + -500 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 77 + }, + { + "name": "vae", + "type": "VAE", + "link": 75, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 76 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 14, + "type": "VAEDecode", + "pos": [ + 1588, + 186 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 85 + }, + { + "name": "vae", + "type": "VAE", + "link": 22, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 29 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 913, + 181 + ], + "size": { + "0": 315, + "1": 262 + }, + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 62 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 63 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 64 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 2 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 77, + 86 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 462370085958750, + "fixed", + 20, + 8, + "euler", + "normal", + 1 + ] + }, + { + "id": 45, + "type": "KSamplerAdvanced", + "pos": [ + 1249, + 179 + ], + "size": { + "0": 315, + "1": 334 + }, + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 80 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 81 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 82 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 86 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 85 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSamplerAdvanced" + }, + "widgets_values": [ + "enable", + 0, + "fixed", + 20, + 8, + "euler", + "normal", + 10, + 10000, + "disable" + ] + }, + { + "id": 37, + "type": "LayeredDiffusionDiffApply", + "pos": [ + 456, + -44 + ], + "size": { + "0": 342.5999755859375, + "1": 186 + }, + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 55, + "slot_index": 0 + }, + { + "name": "cond", + "type": "CONDITIONING", + "link": 56, + "slot_index": 1 + }, + { + "name": "uncond", + "type": "CONDITIONING", + "link": 57, + "slot_index": 2 + }, + { + "name": "blended_latent", + "type": "LATENT", + "link": 60 + }, + { + "name": "latent", + "type": "LATENT", + "link": 61 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 62 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 63 + ], + "shape": 3, + "slot_index": 1 + }, + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 64 + ], + "shape": 3, + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "LayeredDiffusionDiffApply" + }, + "widgets_values": [ + "SDXL, Foreground", + 1 + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "links": [ + [ + 2, + 5, + 0, + 3, + 3, + "LATENT" + ], + [ + 3, + 4, + 1, + 6, + 0, + "CLIP" + ], + [ + 5, + 4, + 1, + 7, + 0, + "CLIP" + ], + [ + 22, + 4, + 2, + 14, + 1, + "VAE" + ], + [ + 29, + 14, + 0, + 20, + 0, + "IMAGE" + ], + [ + 49, + 4, + 2, + 29, + 1, + "VAE" + ], + [ + 53, + 30, + 0, + 29, + 0, + "IMAGE" + ], + [ + 55, + 4, + 0, + 37, + 0, + "MODEL" + ], + [ + 56, + 6, + 0, + 37, + 1, + "CONDITIONING" + ], + [ + 57, + 7, + 0, + 37, + 2, + "CONDITIONING" + ], + [ + 58, + 4, + 2, + 39, + 1, + "VAE" + ], + [ + 59, + 38, + 0, + 39, + 0, + "IMAGE" + ], + [ + 60, + 39, + 0, + 37, + 3, + "LATENT" + ], + [ + 61, + 29, + 0, + 37, + 4, + "LATENT" + ], + [ + 62, + 37, + 0, + 3, + 0, + "MODEL" + ], + [ + 63, + 37, + 1, + 3, + 1, + "CONDITIONING" + ], + [ + 64, + 37, + 2, + 3, + 2, + "CONDITIONING" + ], + [ + 75, + 4, + 2, + 41, + 1, + "VAE" + ], + [ + 76, + 41, + 0, + 42, + 0, + "IMAGE" + ], + [ + 77, + 3, + 0, + 41, + 0, + "LATENT" + ], + [ + 80, + 4, + 0, + 45, + 0, + "MODEL" + ], + [ + 81, + 6, + 0, + 45, + 1, + "CONDITIONING" + ], + [ + 82, + 7, + 0, + 45, + 2, + "CONDITIONING" + ], + [ + 85, + 45, + 0, + 14, + 0, + "LATENT" + ], + [ + 86, + 3, + 0, + 45, + 3, + "LATENT" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} diff --git a/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_diff_fg.json b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_diff_fg.json new file mode 100644 index 00000000..c5f45271 --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_diff_fg.json @@ -0,0 +1,686 @@ +{ + "last_node_id": 39, + "last_link_id": 64, + "nodes": [ + { + "id": 4, + "type": "CheckpointLoaderSimple", + "pos": [ + 5, + 479 + ], + "size": { + "0": 315, + "1": 98 + }, + "flags": {}, + "order": 0, + "mode": 0, + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 55 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 3, + 5 + ], + "slot_index": 1 + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 22, + 49, + 58 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "juggernautXL_v8Rundiffusion.safetensors" + ] + }, + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": { + "0": 425.27801513671875, + "1": 180.6060791015625 + }, + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 5 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 57 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "text, watermark" + ] + }, + { + "id": 5, + "type": "EmptyLatentImage", + "pos": [ + 475, + 666 + ], + "size": { + "0": 315, + "1": 106 + }, + "flags": {}, + "order": 1, + "mode": 0, + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 2 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "EmptyLatentImage" + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 14, + "type": "VAEDecode", + "pos": [ + 1286, + 187 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 21 + }, + { + "name": "vae", + "type": "VAE", + "link": 22, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 29 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 20, + "type": "PreviewImage", + "pos": [ + 1558, + 189 + ], + "size": { + "0": 611.2340087890625, + "1": 633.9354858398438 + }, + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 29 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 913, + 181 + ], + "size": { + "0": 315, + "1": 262 + }, + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 62 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 63 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 64 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 2 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 21 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 462370085958750, + "fixed", + 20, + 8, + "euler", + "normal", + 1 + ] + }, + { + "id": 38, + "type": "LoadImage", + "pos": [ + -137, + -388 + ], + "size": { + "0": 288.47406005859375, + "1": 317.46051025390625 + }, + "flags": {}, + "order": 2, + "mode": 0, + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 59 + ], + "shape": 3 + }, + { + "name": "MASK", + "type": "MASK", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "blended.png", + "image" + ] + }, + { + "id": 39, + "type": "VAEEncode", + "pos": [ + 201, + -391 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "pixels", + "type": "IMAGE", + "link": 59, + "slot_index": 0 + }, + { + "name": "vae", + "type": "VAE", + "link": 58, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 60 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEEncode" + } + }, + { + "id": 29, + "type": "VAEEncode", + "pos": [ + 210, + -20 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "pixels", + "type": "IMAGE", + "link": 53 + }, + { + "name": "vae", + "type": "VAE", + "link": 49, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 61 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEEncode" + } + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": { + "0": 422.84503173828125, + "1": 164.31304931640625 + }, + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 3 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 56 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a room, high quality\n\n" + ] + }, + { + "id": 30, + "type": "LoadImage", + "pos": [ + -146, + -22 + ], + "size": { + "0": 315, + "1": 314 + }, + "flags": {}, + "order": 3, + "mode": 0, + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 53 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "dog.png", + "image" + ] + }, + { + "id": 37, + "type": "LayeredDiffusionDiffApply", + "pos": [ + 456, + -44 + ], + "size": { + "0": 342.5999755859375, + "1": 162 + }, + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 55, + "slot_index": 0 + }, + { + "name": "cond", + "type": "CONDITIONING", + "link": 56, + "slot_index": 1 + }, + { + "name": "uncond", + "type": "CONDITIONING", + "link": 57, + "slot_index": 2 + }, + { + "name": "blended_latent", + "type": "LATENT", + "link": 60 + }, + { + "name": "latent", + "type": "LATENT", + "link": 61 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 62 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 63 + ], + "shape": 3, + "slot_index": 1 + }, + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 64 + ], + "shape": 3, + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "LayeredDiffusionDiffApply" + }, + "widgets_values": [ + "SDXL, Foreground", + 1 + ], + "color": "#232", + "bgcolor": "#353" + } + ], + "links": [ + [ + 2, + 5, + 0, + 3, + 3, + "LATENT" + ], + [ + 3, + 4, + 1, + 6, + 0, + "CLIP" + ], + [ + 5, + 4, + 1, + 7, + 0, + "CLIP" + ], + [ + 21, + 3, + 0, + 14, + 0, + "LATENT" + ], + [ + 22, + 4, + 2, + 14, + 1, + "VAE" + ], + [ + 29, + 14, + 0, + 20, + 0, + "IMAGE" + ], + [ + 49, + 4, + 2, + 29, + 1, + "VAE" + ], + [ + 53, + 30, + 0, + 29, + 0, + "IMAGE" + ], + [ + 55, + 4, + 0, + 37, + 0, + "MODEL" + ], + [ + 56, + 6, + 0, + 37, + 1, + "CONDITIONING" + ], + [ + 57, + 7, + 0, + 37, + 2, + "CONDITIONING" + ], + [ + 58, + 4, + 2, + 39, + 1, + "VAE" + ], + [ + 59, + 38, + 0, + 39, + 0, + "IMAGE" + ], + [ + 60, + 39, + 0, + 37, + 3, + "LATENT" + ], + [ + 61, + 29, + 0, + 37, + 4, + "LATENT" + ], + [ + 62, + 37, + 0, + 3, + 0, + "MODEL" + ], + [ + 63, + 37, + 1, + 3, + 1, + "CONDITIONING" + ], + [ + 64, + 37, + 2, + 3, + 2, + "CONDITIONING" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} diff --git a/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_fg_example.json b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_fg_example.json new file mode 100644 index 00000000..84aea975 --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_fg_example.json @@ -0,0 +1,733 @@ +{ + "last_node_id": 29, + "last_link_id": 40, + "nodes": [ + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": { + "0": 425.27801513671875, + "1": 180.6060791015625 + }, + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 5 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 6 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "text, watermark" + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": { + "0": 422.84503173828125, + "1": 164.31304931640625 + }, + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 3 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 4 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "beautiful scenery nature glass bottle landscape, , purple galaxy bottle," + ] + }, + { + "id": 4, + "type": "CheckpointLoaderSimple", + "pos": [ + 5, + 479 + ], + "size": { + "0": 315, + "1": 98 + }, + "flags": {}, + "order": 0, + "mode": 0, + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 18 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 3, + 5 + ], + "slot_index": 1 + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 22 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "juggernautXL_v8Rundiffusion.safetensors" + ] + }, + { + "id": 14, + "type": "VAEDecode", + "pos": [ + 1275, + 198 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 21 + }, + { + "name": "vae", + "type": "VAE", + "link": 22, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 24, + 29 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 20, + "type": "PreviewImage", + "pos": [ + 1547, + 472 + ], + "size": { + "0": 289.6058349609375, + "1": 299.6588134765625 + }, + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 29 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 5, + "type": "EmptyLatentImage", + "pos": [ + 475, + 707 + ], + "size": { + "0": 315, + "1": 106 + }, + "flags": {}, + "order": 1, + "mode": 0, + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 2 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "EmptyLatentImage" + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 911, + 198 + ], + "size": { + "0": 315, + "1": 262 + }, + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 19 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 4 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 6 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 2 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 21, + 23 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 984560333937969, + "randomize", + 20, + 8, + "euler", + "normal", + 1 + ] + }, + { + "id": 25, + "type": "PreviewImage", + "pos": [ + 2244, + 194 + ], + "size": { + "0": 289.6058349609375, + "1": 299.6588134765625 + }, + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 33 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 24, + "type": "MaskToImage", + "pos": [ + 1921, + 192 + ], + "size": { + "0": 210, + "1": 26 + }, + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "mask", + "type": "MASK", + "link": 32 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 33 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "MaskToImage" + } + }, + { + "id": 23, + "type": "PreviewImage", + "pos": [ + 1965, + 479 + ], + "size": { + "0": 289.6058349609375, + "1": 299.6588134765625 + }, + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 31 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 27, + "type": "PreviewImage", + "pos": [ + 2243, + -164 + ], + "size": { + "0": 289.6058349609375, + "1": 299.6588134765625 + }, + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 40 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 15, + "type": "LayeredDiffusionDecode", + "pos": [ + 1586, + 195 + ], + "size": { + "0": 210, + "1": 102 + }, + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 23 + }, + { + "name": "images", + "type": "IMAGE", + "link": 24 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 31, + 37 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": [ + 32, + 38 + ], + "shape": 3, + "slot_index": 1 + } + ], + "properties": { + "Node name for S&R": "LayeredDiffusionDecode" + }, + "widgets_values": [ + "SDXL", + 16 + ] + }, + { + "id": 28, + "type": "JoinImageWithAlpha", + "pos": [ + 1928, + -59 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 37 + }, + { + "name": "alpha", + "type": "MASK", + "link": 39 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 40 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "JoinImageWithAlpha" + } + }, + { + "id": 29, + "type": "InvertMask", + "pos": [ + 1931, + 44 + ], + "size": { + "0": 210, + "1": 26 + }, + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "mask", + "type": "MASK", + "link": 38 + } + ], + "outputs": [ + { + "name": "MASK", + "type": "MASK", + "links": [ + 39 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "InvertMask" + } + }, + { + "id": 13, + "type": "LayeredDiffusionApply", + "pos": [ + 468, + -2 + ], + "size": { + "0": 327.8314208984375, + "1": 106.42147827148438 + }, + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 18 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 19 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LayeredDiffusionApply" + }, + "widgets_values": [ + "SDXL, Conv Injection", + 1 + ] + } + ], + "links": [ + [ + 2, + 5, + 0, + 3, + 3, + "LATENT" + ], + [ + 3, + 4, + 1, + 6, + 0, + "CLIP" + ], + [ + 4, + 6, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 5, + 4, + 1, + 7, + 0, + "CLIP" + ], + [ + 6, + 7, + 0, + 3, + 2, + "CONDITIONING" + ], + [ + 18, + 4, + 0, + 13, + 0, + "MODEL" + ], + [ + 19, + 13, + 0, + 3, + 0, + "MODEL" + ], + [ + 21, + 3, + 0, + 14, + 0, + "LATENT" + ], + [ + 22, + 4, + 2, + 14, + 1, + "VAE" + ], + [ + 23, + 3, + 0, + 15, + 0, + "LATENT" + ], + [ + 24, + 14, + 0, + 15, + 1, + "IMAGE" + ], + [ + 29, + 14, + 0, + 20, + 0, + "IMAGE" + ], + [ + 31, + 15, + 0, + 23, + 0, + "IMAGE" + ], + [ + 32, + 15, + 1, + 24, + 0, + "MASK" + ], + [ + 33, + 24, + 0, + 25, + 0, + "IMAGE" + ], + [ + 37, + 15, + 0, + 28, + 0, + "IMAGE" + ], + [ + 38, + 15, + 1, + 29, + 0, + "MASK" + ], + [ + 39, + 29, + 0, + 28, + 1, + "MASK" + ], + [ + 40, + 28, + 0, + 27, + 0, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} diff --git a/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_fg_example_rgba.json b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_fg_example_rgba.json new file mode 100644 index 00000000..dd68f9da --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_fg_example_rgba.json @@ -0,0 +1,511 @@ +{ + "last_node_id": 36, + "last_link_id": 51, + "nodes": [ + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": { + "0": 425.27801513671875, + "1": 180.6060791015625 + }, + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 5 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 6 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "text, watermark" + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 415, + 186 + ], + "size": { + "0": 422.84503173828125, + "1": 164.31304931640625 + }, + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 3 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 4 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "beautiful scenery nature glass bottle landscape, , purple galaxy bottle," + ] + }, + { + "id": 4, + "type": "CheckpointLoaderSimple", + "pos": [ + 5, + 479 + ], + "size": { + "0": 315, + "1": 98 + }, + "flags": {}, + "order": 0, + "mode": 0, + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 18 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 3, + 5 + ], + "slot_index": 1 + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 22 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "juggernautXL_v8Rundiffusion.safetensors" + ] + }, + { + "id": 14, + "type": "VAEDecode", + "pos": [ + 1275, + 198 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 21 + }, + { + "name": "vae", + "type": "VAE", + "link": 22, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 29, + 50 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 911, + 198 + ], + "size": { + "0": 315, + "1": 262 + }, + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 19 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 4 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 6 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 2 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 21, + 49 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 1029477926308287, + "randomize", + 20, + 8, + "euler", + "normal", + 1 + ] + }, + { + "id": 5, + "type": "EmptyLatentImage", + "pos": [ + 480, + 691 + ], + "size": { + "0": 315, + "1": 106 + }, + "flags": {}, + "order": 1, + "mode": 0, + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 2 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "EmptyLatentImage" + }, + "widgets_values": [ + 1024, + 1024, + 1 + ] + }, + { + "id": 36, + "type": "LayeredDiffusionDecodeRGBA", + "pos": [ + 1589, + 199 + ], + "size": { + "0": 243.60000610351562, + "1": 102 + }, + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 49 + }, + { + "name": "images", + "type": "IMAGE", + "link": 50 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 51 + ], + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "LayeredDiffusionDecodeRGBA" + }, + "widgets_values": [ + "SDXL", + 16 + ] + }, + { + "id": 27, + "type": "PreviewImage", + "pos": [ + 1930, + 197 + ], + "size": { + "0": 289.6058349609375, + "1": 299.6588134765625 + }, + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 51, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 20, + "type": "PreviewImage", + "pos": [ + 1570, + 365 + ], + "size": { + "0": 289.6058349609375, + "1": 299.6588134765625 + }, + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 29 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 13, + "type": "LayeredDiffusionApply", + "pos": [ + 468, + -2 + ], + "size": { + "0": 327.8314208984375, + "1": 106.42147827148438 + }, + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 18 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 19 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LayeredDiffusionApply" + }, + "widgets_values": [ + "SDXL, Conv Injection", + 1 + ] + } + ], + "links": [ + [ + 2, + 5, + 0, + 3, + 3, + "LATENT" + ], + [ + 3, + 4, + 1, + 6, + 0, + "CLIP" + ], + [ + 4, + 6, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 5, + 4, + 1, + 7, + 0, + "CLIP" + ], + [ + 6, + 7, + 0, + 3, + 2, + "CONDITIONING" + ], + [ + 18, + 4, + 0, + 13, + 0, + "MODEL" + ], + [ + 19, + 13, + 0, + 3, + 0, + "MODEL" + ], + [ + 21, + 3, + 0, + 14, + 0, + "LATENT" + ], + [ + 22, + 4, + 2, + 14, + 1, + "VAE" + ], + [ + 29, + 14, + 0, + 20, + 0, + "IMAGE" + ], + [ + 49, + 3, + 0, + 36, + 0, + "LATENT" + ], + [ + 50, + 14, + 0, + 36, + 1, + "IMAGE" + ], + [ + 51, + 36, + 0, + 27, + 0, + "IMAGE" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} diff --git a/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_joint.json b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_joint.json new file mode 100644 index 00000000..832bb670 --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/examples/layer_diffusion_joint.json @@ -0,0 +1,703 @@ +{ + "last_node_id": 27, + "last_link_id": 42, + "nodes": [ + { + "id": 7, + "type": "CLIPTextEncode", + "pos": [ + 413, + 389 + ], + "size": { + "0": 425.27801513671875, + "1": 180.6060791015625 + }, + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 5 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 6 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "text, watermark" + ] + }, + { + "id": 3, + "type": "KSampler", + "pos": [ + 891, + 192 + ], + "size": { + "0": 315, + "1": 262 + }, + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 32 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 4 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 6 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 2 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 21, + 33 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "KSampler" + }, + "widgets_values": [ + 960762378448318, + "randomize", + 20, + 8, + "euler", + "normal", + 1 + ] + }, + { + "id": 14, + "type": "VAEDecode", + "pos": [ + 1275, + 198 + ], + "size": { + "0": 210, + "1": 46 + }, + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 21 + }, + { + "name": "vae", + "type": "VAE", + "link": 22, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 34 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "VAEDecode" + } + }, + { + "id": 4, + "type": "CheckpointLoaderSimple", + "pos": [ + 5, + 479 + ], + "size": { + "0": 315, + "1": 98 + }, + "flags": {}, + "order": 0, + "mode": 0, + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 31 + ], + "slot_index": 0 + }, + { + "name": "CLIP", + "type": "CLIP", + "links": [ + 3, + 5, + 41, + 42 + ], + "slot_index": 1 + }, + { + "name": "VAE", + "type": "VAE", + "links": [ + 22 + ], + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "realisticVisionV20_v20.safetensors" + ] + }, + { + "id": 27, + "type": "CLIPTextEncode", + "pos": [ + -20, + -20 + ], + "size": { + "0": 422.84503173828125, + "1": 164.31304931640625 + }, + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 42, + "slot_index": 0 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 40 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a cozy room" + ] + }, + { + "id": 21, + "type": "LayeredDiffusionJointApply", + "pos": [ + 469, + -9 + ], + "size": { + "0": 315, + "1": 118 + }, + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 31, + "slot_index": 0 + }, + { + "name": "fg_cond", + "type": "CONDITIONING", + "link": 39 + }, + { + "name": "bg_cond", + "type": "CONDITIONING", + "link": 40 + }, + { + "name": "blended_cond", + "type": "CONDITIONING", + "link": 38 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 32 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "LayeredDiffusionJointApply" + }, + "widgets_values": [ + "SD15, attn_sharing, Batch size (3N)" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 22, + "type": "LayeredDiffusionDecodeSplit", + "pos": [ + 1534, + 193 + ], + "size": { + "0": 315, + "1": 146 + }, + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 33, + "slot_index": 0 + }, + { + "name": "images", + "type": "IMAGE", + "link": 34 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 35 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 36 + ], + "shape": 3, + "slot_index": 1 + }, + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 37 + ], + "shape": 3, + "slot_index": 2 + } + ], + "properties": { + "Node name for S&R": "LayeredDiffusionDecodeSplit" + }, + "widgets_values": [ + 3, + "SD15", + 16 + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 5, + "type": "EmptyLatentImage", + "pos": [ + 466, + 645 + ], + "size": { + "0": 315, + "1": 106 + }, + "flags": {}, + "order": 1, + "mode": 0, + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 2 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "EmptyLatentImage" + }, + "widgets_values": [ + 512, + 512, + 6 + ] + }, + { + "id": 26, + "type": "CLIPTextEncode", + "pos": [ + -20, + -235 + ], + "size": { + "0": 422.84503173828125, + "1": 164.31304931640625 + }, + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 41, + "slot_index": 0 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 39 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "a sitting dog" + ] + }, + { + "id": 6, + "type": "CLIPTextEncode", + "pos": [ + 413, + 172 + ], + "size": { + "0": 422.84503173828125, + "1": 164.31304931640625 + }, + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 3 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 4, + 38 + ], + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "A dog sitting in a cozy room" + ] + }, + { + "id": 23, + "type": "PreviewImage", + "pos": [ + 1931, + -98 + ], + "size": [ + 522.1710021972658, + 259.90739746093755 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 35 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 24, + "type": "PreviewImage", + "pos": [ + 1933, + 222 + ], + "size": [ + 517.9710037231448, + 258.9074523925782 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 36 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 25, + "type": "PreviewImage", + "pos": [ + 1933, + 536 + ], + "size": [ + 516.8710037231444, + 270.5074523925782 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 37 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + } + ], + "links": [ + [ + 2, + 5, + 0, + 3, + 3, + "LATENT" + ], + [ + 3, + 4, + 1, + 6, + 0, + "CLIP" + ], + [ + 4, + 6, + 0, + 3, + 1, + "CONDITIONING" + ], + [ + 5, + 4, + 1, + 7, + 0, + "CLIP" + ], + [ + 6, + 7, + 0, + 3, + 2, + "CONDITIONING" + ], + [ + 21, + 3, + 0, + 14, + 0, + "LATENT" + ], + [ + 22, + 4, + 2, + 14, + 1, + "VAE" + ], + [ + 31, + 4, + 0, + 21, + 0, + "MODEL" + ], + [ + 32, + 21, + 0, + 3, + 0, + "MODEL" + ], + [ + 33, + 3, + 0, + 22, + 0, + "LATENT" + ], + [ + 34, + 14, + 0, + 22, + 1, + "IMAGE" + ], + [ + 35, + 22, + 0, + 23, + 0, + "IMAGE" + ], + [ + 36, + 22, + 1, + 24, + 0, + "IMAGE" + ], + [ + 37, + 22, + 2, + 25, + 0, + "IMAGE" + ], + [ + 38, + 6, + 0, + 21, + 3, + "CONDITIONING" + ], + [ + 39, + 26, + 0, + 21, + 1, + "CONDITIONING" + ], + [ + 40, + 27, + 0, + 21, + 2, + "CONDITIONING" + ], + [ + 41, + 4, + 1, + 26, + 0, + "CLIP" + ], + [ + 42, + 4, + 1, + 27, + 0, + "CLIP" + ] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} diff --git a/hordelib/nodes/comfyui_layerdiffuse/layered_diffusion.py b/hordelib/nodes/comfyui_layerdiffuse/layered_diffusion.py new file mode 100644 index 00000000..6cedf08f --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/layered_diffusion.py @@ -0,0 +1,659 @@ +import copy +import functools +import os +from dataclasses import dataclass +from enum import Enum + +import comfy.model_base +import comfy.model_management +import comfy.supported_models +import comfy.supported_models_base +import folder_paths +import torch +from comfy.conds import CONDRegular +from comfy.model_patcher import ModelPatcher +from comfy.utils import load_torch_file +from comfy_extras.nodes_compositing import JoinImageWithAlpha +from folder_paths import get_folder_paths + +from .lib_layerdiffusion.attention_sharing import AttentionSharingPatcher +from .lib_layerdiffusion.enums import StableDiffusionVersion +from .lib_layerdiffusion.models import TransparentVAEDecoder +from .lib_layerdiffusion.utils import ( + load_file_from_url, + to_lora_patch_dict, +) + +if "layer_model" in folder_paths.folder_names_and_paths: + layer_model_root = get_folder_paths("layer_model")[0] +else: + layer_model_root = os.path.join(folder_paths.models_dir, "layer_model") +load_layer_model_state_dict = load_torch_file + + +# ------------ Start patching ComfyUI ------------ +def calculate_weight_adjust_channel(func): + """Patches ComfyUI's LoRA weight application to accept multi-channel inputs.""" + + @functools.wraps(func) + def calculate_weight(self: ModelPatcher, patches, weight: torch.Tensor, key: str) -> torch.Tensor: + weight = func(self, patches, weight, key) + + for p in patches: + alpha = p[0] + v = p[1] + + # The recursion call should be handled in the main func call. + if isinstance(v, list): + continue + + if len(v) == 1: + patch_type = "diff" + elif len(v) == 2: + patch_type = v[0] + v = v[1] + + if patch_type == "diff": + w1 = v[0] + if all( + ( + alpha != 0.0, + w1.shape != weight.shape, + w1.ndim == weight.ndim == 4, + ), + ): + new_shape = [max(n, m) for n, m in zip(weight.shape, w1.shape, strict=False)] + print(f"Merged with {key} channel changed from {weight.shape} to {new_shape}") + new_diff = alpha * comfy.model_management.cast_to_device(w1, weight.device, weight.dtype) + new_weight = torch.zeros(size=new_shape).to(weight) + new_weight[ + : weight.shape[0], + : weight.shape[1], + : weight.shape[2], + : weight.shape[3], + ] = weight + new_weight[ + : new_diff.shape[0], + : new_diff.shape[1], + : new_diff.shape[2], + : new_diff.shape[3], + ] += new_diff + new_weight = new_weight.contiguous().clone() + weight = new_weight + return weight + + return calculate_weight + + +ModelPatcher.calculate_weight = calculate_weight_adjust_channel(ModelPatcher.calculate_weight) + +# ------------ End patching ComfyUI ------------ + + +class LayeredDiffusionDecode: + """ + Decode alpha channel value from pixel value. + [B, C=3, H, W] => [B, C=4, H, W] + Outputs RGB image + Alpha mask. + """ + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "samples": ("LATENT",), + "images": ("IMAGE",), + "sd_version": ( + [ + StableDiffusionVersion.SD1x.value, + StableDiffusionVersion.SDXL.value, + ], + { + "default": StableDiffusionVersion.SDXL.value, + }, + ), + "sub_batch_size": ( + "INT", + {"default": 16, "min": 1, "max": 4096, "step": 1}, + ), + }, + } + + RETURN_TYPES = ("IMAGE", "MASK") + FUNCTION = "decode" + CATEGORY = "layer_diffuse" + + def __init__(self) -> None: + self.vae_transparent_decoder = {} + + def decode(self, samples, images, sd_version: str, sub_batch_size: int): + """ + sub_batch_size: How many images to decode in a single pass. + See https://github.com/huchenlei/ComfyUI-layerdiffuse/pull/4 for more + context. + """ + sd_version = StableDiffusionVersion(sd_version) + if sd_version == StableDiffusionVersion.SD1x: + url = "https://huggingface.co/LayerDiffusion/layerdiffusion-v1/resolve/main/layer_sd15_vae_transparent_decoder.safetensors" + file_name = "layer_sd15_vae_transparent_decoder.safetensors" + elif sd_version == StableDiffusionVersion.SDXL: + url = "https://huggingface.co/LayerDiffusion/layerdiffusion-v1/resolve/main/vae_transparent_decoder.safetensors" + file_name = "vae_transparent_decoder.safetensors" + + if not self.vae_transparent_decoder.get(sd_version): + model_path = load_file_from_url(url=url, model_dir=layer_model_root, file_name=file_name) + self.vae_transparent_decoder[sd_version] = TransparentVAEDecoder( + load_torch_file(model_path), + device=comfy.model_management.get_torch_device(), + dtype=(torch.float16 if comfy.model_management.should_use_fp16() else torch.float32), + ) + pixel = images.movedim(-1, 1) # [B, H, W, C] => [B, C, H, W] + + # Decoder requires dimension to be 64-aligned. + B, C, H, W = pixel.shape + assert H % 64 == 0, f"Height({H}) is not multiple of 64." + assert W % 64 == 0, f"Height({W}) is not multiple of 64." + + decoded = [] + for start_idx in range(0, samples["samples"].shape[0], sub_batch_size): + decoded.append( + self.vae_transparent_decoder[sd_version].decode_pixel( + pixel[start_idx : start_idx + sub_batch_size], + samples["samples"][start_idx : start_idx + sub_batch_size], + ), + ) + pixel_with_alpha = torch.cat(decoded, dim=0) + + # [B, C, H, W] => [B, H, W, C] + pixel_with_alpha = pixel_with_alpha.movedim(1, -1) + image = pixel_with_alpha[..., 1:] + alpha = pixel_with_alpha[..., 0] + return (image, alpha) + + +class LayeredDiffusionDecodeRGBA(LayeredDiffusionDecode): + """ + Decode alpha channel value from pixel value. + [B, C=3, H, W] => [B, C=4, H, W] + Outputs RGBA image. + """ + + RETURN_TYPES = ("IMAGE",) + + def decode(self, samples, images, sd_version: str, sub_batch_size: int): + image, mask = super().decode(samples, images, sd_version, sub_batch_size) + alpha = 1.0 - mask + return JoinImageWithAlpha().join_image_with_alpha(image, alpha) + + +class LayeredDiffusionDecodeSplit(LayeredDiffusionDecodeRGBA): + """Decode RGBA every N images.""" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "samples": ("LATENT",), + "images": ("IMAGE",), + # Do RGBA decode every N output images. + "frames": ( + "INT", + {"default": 2, "min": 2, "max": s.MAX_FRAMES, "step": 1}, + ), + "sd_version": ( + [ + StableDiffusionVersion.SD1x.value, + StableDiffusionVersion.SDXL.value, + ], + { + "default": StableDiffusionVersion.SDXL.value, + }, + ), + "sub_batch_size": ( + "INT", + {"default": 16, "min": 1, "max": 4096, "step": 1}, + ), + }, + } + + MAX_FRAMES = 3 + RETURN_TYPES = ("IMAGE",) * MAX_FRAMES + + def decode( + self, + samples, + images: torch.Tensor, + frames: int, + sd_version: str, + sub_batch_size: int, + ): + sliced_samples = copy.copy(samples) + sliced_samples["samples"] = sliced_samples["samples"][::frames] + return tuple( + + ( + super(LayeredDiffusionDecodeSplit, self).decode(sliced_samples, imgs, sd_version, sub_batch_size)[ + 0 + ] + if i == 0 + else imgs + ) + for i in range(frames) + for imgs in (images[i::frames],) + + ) + (None,) * (self.MAX_FRAMES - frames) + + +class LayerMethod(Enum): + ATTN = "Attention Injection" + CONV = "Conv Injection" + + +class LayerType(Enum): + FG = "Foreground" + BG = "Background" + + +@dataclass +class LayeredDiffusionBase: + model_file_name: str + model_url: str + sd_version: StableDiffusionVersion + attn_sharing: bool = False + injection_method: LayerMethod | None = None + cond_type: LayerType | None = None + # Number of output images per run. + frames: int = 1 + + @property + def config_string(self) -> str: + injection_method = self.injection_method.value if self.injection_method else "" + cond_type = self.cond_type.value if self.cond_type else "" + attn_sharing = "attn_sharing" if self.attn_sharing else "" + frames = f"Batch size ({self.frames}N)" if self.frames != 1 else "" + return ", ".join( + x + for x in ( + self.sd_version.value, + injection_method, + cond_type, + attn_sharing, + frames, + ) + if x + ) + + def apply_c_concat(self, cond, uncond, c_concat): + """Set foreground/background concat condition.""" + + def write_c_concat(cond): + new_cond = [] + for t in cond: + n = [t[0], t[1].copy()] + if "model_conds" not in n[1]: + n[1]["model_conds"] = {} + n[1]["model_conds"]["c_concat"] = CONDRegular(c_concat) + new_cond.append(n) + return new_cond + + return (write_c_concat(cond), write_c_concat(uncond)) + + def apply_layered_diffusion( + self, + model: ModelPatcher, + weight: float, + ): + """Patch model""" + model_path = load_file_from_url( + url=self.model_url, + model_dir=layer_model_root, + file_name=self.model_file_name, + ) + layer_lora_state_dict = load_layer_model_state_dict(model_path) + layer_lora_patch_dict = to_lora_patch_dict(layer_lora_state_dict) + work_model = model.clone() + work_model.add_patches(layer_lora_patch_dict, weight) + return (work_model,) + + def apply_layered_diffusion_attn_sharing( + self, + model: ModelPatcher, + control_img: torch.TensorType | None = None, + ): + """Patch model with attn sharing""" + model_path = load_file_from_url( + url=self.model_url, + model_dir=layer_model_root, + file_name=self.model_file_name, + ) + layer_lora_state_dict = load_layer_model_state_dict(model_path) + work_model = model.clone() + patcher = AttentionSharingPatcher(work_model, self.frames, use_control=control_img is not None) + patcher.load_state_dict(layer_lora_state_dict, strict=True) + if control_img is not None: + patcher.set_control(control_img) + return (work_model,) + + +def get_model_sd_version(model: ModelPatcher) -> StableDiffusionVersion: + """Get model's StableDiffusionVersion.""" + base: comfy.model_base.BaseModel = model.model + model_config: comfy.supported_models.supported_models_base.BASE = base.model_config + if isinstance(model_config, comfy.supported_models.SDXL): + return StableDiffusionVersion.SDXL + elif isinstance(model_config, (comfy.supported_models.SD15, comfy.supported_models.SD20)): + # SD15 and SD20 are compatible with each other. + return StableDiffusionVersion.SD1x + else: + raise Exception(f"Unsupported SD Version: {type(model_config)}.") + + +class LayeredDiffusionFG: + """Generate foreground with transparent background.""" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "model": ("MODEL",), + "config": ([c.config_string for c in s.MODELS],), + "weight": ( + "FLOAT", + {"default": 1.0, "min": -1, "max": 3, "step": 0.05}, + ), + }, + } + + RETURN_TYPES = ("MODEL",) + FUNCTION = "apply_layered_diffusion" + CATEGORY = "layer_diffuse" + MODELS = ( + LayeredDiffusionBase( + model_file_name="layer_xl_transparent_attn.safetensors", + model_url="https://huggingface.co/LayerDiffusion/layerdiffusion-v1/resolve/main/layer_xl_transparent_attn.safetensors", + sd_version=StableDiffusionVersion.SDXL, + injection_method=LayerMethod.ATTN, + ), + LayeredDiffusionBase( + model_file_name="layer_xl_transparent_conv.safetensors", + model_url="https://huggingface.co/LayerDiffusion/layerdiffusion-v1/resolve/main/layer_xl_transparent_conv.safetensors", + sd_version=StableDiffusionVersion.SDXL, + injection_method=LayerMethod.CONV, + ), + LayeredDiffusionBase( + model_file_name="layer_sd15_transparent_attn.safetensors", + model_url="https://huggingface.co/LayerDiffusion/layerdiffusion-v1/resolve/main/layer_sd15_transparent_attn.safetensors", + sd_version=StableDiffusionVersion.SD1x, + injection_method=LayerMethod.ATTN, + attn_sharing=True, + ), + ) + + def apply_layered_diffusion( + self, + model: ModelPatcher, + config: str, + weight: float, + ): + ld_model = [m for m in self.MODELS if m.config_string == config][0] + assert get_model_sd_version(model) == ld_model.sd_version + if ld_model.attn_sharing: + return ld_model.apply_layered_diffusion_attn_sharing(model) + else: + return ld_model.apply_layered_diffusion(model, weight) + + +class LayeredDiffusionJoint: + """Generate FG + BG + Blended in one inference batch. Batch size = 3N.""" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "model": ("MODEL",), + "config": ([c.config_string for c in s.MODELS],), + }, + "optional": { + "fg_cond": ("CONDITIONING",), + "bg_cond": ("CONDITIONING",), + "blended_cond": ("CONDITIONING",), + }, + } + + RETURN_TYPES = ("MODEL",) + FUNCTION = "apply_layered_diffusion" + CATEGORY = "layer_diffuse" + MODELS = ( + LayeredDiffusionBase( + model_file_name="layer_sd15_joint.safetensors", + model_url="https://huggingface.co/LayerDiffusion/layerdiffusion-v1/resolve/main/layer_sd15_joint.safetensors", + sd_version=StableDiffusionVersion.SD1x, + attn_sharing=True, + frames=3, + ), + ) + + def apply_layered_diffusion( + self, + model: ModelPatcher, + config: str, + fg_cond: list[list[torch.TensorType]] | None = None, + bg_cond: list[list[torch.TensorType]] | None = None, + blended_cond: list[list[torch.TensorType]] | None = None, + ): + ld_model = [m for m in self.MODELS if m.config_string == config][0] + assert get_model_sd_version(model) == ld_model.sd_version + assert ld_model.attn_sharing + work_model = ld_model.apply_layered_diffusion_attn_sharing(model)[0] + work_model.model_options.setdefault("transformer_options", {}) + work_model.model_options["transformer_options"]["cond_overwrite"] = [ + cond[0][0] if cond is not None else None + for cond in ( + fg_cond, + bg_cond, + blended_cond, + ) + ] + return (work_model,) + + +class LayeredDiffusionCond: + """Generate foreground + background given background / foreground. + - FG => Blended + - BG => Blended + """ + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "model": ("MODEL",), + "cond": ("CONDITIONING",), + "uncond": ("CONDITIONING",), + "latent": ("LATENT",), + "config": ([c.config_string for c in s.MODELS],), + "weight": ( + "FLOAT", + {"default": 1.0, "min": -1, "max": 3, "step": 0.05}, + ), + }, + } + + RETURN_TYPES = ("MODEL", "CONDITIONING", "CONDITIONING") + FUNCTION = "apply_layered_diffusion" + CATEGORY = "layer_diffuse" + MODELS = ( + LayeredDiffusionBase( + model_file_name="layer_xl_fg2ble.safetensors", + model_url="https://huggingface.co/LayerDiffusion/layerdiffusion-v1/resolve/main/layer_xl_fg2ble.safetensors", + sd_version=StableDiffusionVersion.SDXL, + cond_type=LayerType.FG, + ), + LayeredDiffusionBase( + model_file_name="layer_xl_bg2ble.safetensors", + model_url="https://huggingface.co/LayerDiffusion/layerdiffusion-v1/resolve/main/layer_xl_bg2ble.safetensors", + sd_version=StableDiffusionVersion.SDXL, + cond_type=LayerType.BG, + ), + ) + + def apply_layered_diffusion( + self, + model: ModelPatcher, + cond, + uncond, + latent, + config: str, + weight: float, + ): + ld_model = [m for m in self.MODELS if m.config_string == config][0] + assert get_model_sd_version(model) == ld_model.sd_version + c_concat = model.model.latent_format.process_in(latent["samples"]) + return ld_model.apply_layered_diffusion(model, weight) + ld_model.apply_c_concat(cond, uncond, c_concat) + + +class LayeredDiffusionCondJoint: + """Generate fg/bg + blended given fg/bg. + - FG => Blended + BG + - BG => Blended + FG + """ + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "model": ("MODEL",), + "image": ("IMAGE",), + "config": ([c.config_string for c in s.MODELS],), + }, + "optional": { + "cond": ("CONDITIONING",), + "blended_cond": ("CONDITIONING",), + }, + } + + RETURN_TYPES = ("MODEL",) + FUNCTION = "apply_layered_diffusion" + CATEGORY = "layer_diffuse" + MODELS = ( + LayeredDiffusionBase( + model_file_name="layer_sd15_fg2bg.safetensors", + model_url="https://huggingface.co/LayerDiffusion/layerdiffusion-v1/resolve/main/layer_sd15_fg2bg.safetensors", + sd_version=StableDiffusionVersion.SD1x, + attn_sharing=True, + frames=2, + cond_type=LayerType.FG, + ), + LayeredDiffusionBase( + model_file_name="layer_sd15_bg2fg.safetensors", + model_url="https://huggingface.co/LayerDiffusion/layerdiffusion-v1/resolve/main/layer_sd15_bg2fg.safetensors", + sd_version=StableDiffusionVersion.SD1x, + attn_sharing=True, + frames=2, + cond_type=LayerType.BG, + ), + ) + + def apply_layered_diffusion( + self, + model: ModelPatcher, + image, + config: str, + cond: list[list[torch.TensorType]] | None = None, + blended_cond: list[list[torch.TensorType]] | None = None, + ): + ld_model = [m for m in self.MODELS if m.config_string == config][0] + assert get_model_sd_version(model) == ld_model.sd_version + assert ld_model.attn_sharing + work_model = ld_model.apply_layered_diffusion_attn_sharing(model, control_img=image.movedim(-1, 1))[0] + work_model.model_options.setdefault("transformer_options", {}) + work_model.model_options["transformer_options"]["cond_overwrite"] = [ + cond[0][0] if cond is not None else None + for cond in ( + cond, + blended_cond, + ) + ] + return (work_model,) + + +class LayeredDiffusionDiff: + """Extract FG/BG from blended image. + - Blended + FG => BG + - Blended + BG => FG + """ + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "model": ("MODEL",), + "cond": ("CONDITIONING",), + "uncond": ("CONDITIONING",), + "blended_latent": ("LATENT",), + "latent": ("LATENT",), + "config": ([c.config_string for c in s.MODELS],), + "weight": ( + "FLOAT", + {"default": 1.0, "min": -1, "max": 3, "step": 0.05}, + ), + }, + } + + RETURN_TYPES = ("MODEL", "CONDITIONING", "CONDITIONING") + FUNCTION = "apply_layered_diffusion" + CATEGORY = "layer_diffuse" + MODELS = ( + LayeredDiffusionBase( + model_file_name="layer_xl_fgble2bg.safetensors", + model_url="https://huggingface.co/LayerDiffusion/layerdiffusion-v1/resolve/main/layer_xl_fgble2bg.safetensors", + sd_version=StableDiffusionVersion.SDXL, + cond_type=LayerType.FG, + ), + LayeredDiffusionBase( + model_file_name="layer_xl_bgble2fg.safetensors", + model_url="https://huggingface.co/LayerDiffusion/layerdiffusion-v1/resolve/main/layer_xl_bgble2fg.safetensors", + sd_version=StableDiffusionVersion.SDXL, + cond_type=LayerType.BG, + ), + ) + + def apply_layered_diffusion( + self, + model: ModelPatcher, + cond, + uncond, + blended_latent, + latent, + config: str, + weight: float, + ): + ld_model = [m for m in self.MODELS if m.config_string == config][0] + assert get_model_sd_version(model) == ld_model.sd_version + c_concat = model.model.latent_format.process_in( + torch.cat([latent["samples"], blended_latent["samples"]], dim=1), + ) + return ld_model.apply_layered_diffusion(model, weight) + ld_model.apply_c_concat(cond, uncond, c_concat) + + +NODE_CLASS_MAPPINGS = { + "LayeredDiffusionApply": LayeredDiffusionFG, + "LayeredDiffusionJointApply": LayeredDiffusionJoint, + "LayeredDiffusionCondApply": LayeredDiffusionCond, + "LayeredDiffusionCondJointApply": LayeredDiffusionCondJoint, + "LayeredDiffusionDiffApply": LayeredDiffusionDiff, + "LayeredDiffusionDecode": LayeredDiffusionDecode, + "LayeredDiffusionDecodeRGBA": LayeredDiffusionDecodeRGBA, + "LayeredDiffusionDecodeSplit": LayeredDiffusionDecodeSplit, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "LayeredDiffusionApply": "Layer Diffuse Apply", + "LayeredDiffusionJointApply": "Layer Diffuse Joint Apply", + "LayeredDiffusionCondApply": "Layer Diffuse Cond Apply", + "LayeredDiffusionCondJointApply": "Layer Diffuse Cond Joint Apply", + "LayeredDiffusionDiffApply": "Layer Diffuse Diff Apply", + "LayeredDiffusionDecode": "Layer Diffuse Decode", + "LayeredDiffusionDecodeRGBA": "Layer Diffuse Decode (RGBA)", + "LayeredDiffusionDecodeSplit": "Layer Diffuse Decode (Split)", +} diff --git a/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/__init__.py b/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/attention_sharing.py b/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/attention_sharing.py new file mode 100644 index 00000000..a511f745 --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/attention_sharing.py @@ -0,0 +1,327 @@ +# Currently only sd15 + +import functools + +import einops +import torch +from comfy import model_management, utils +from comfy.ldm.modules.attention import optimized_attention + +module_mapping_sd15 = { + 0: "input_blocks.1.1.transformer_blocks.0.attn1", + 1: "input_blocks.1.1.transformer_blocks.0.attn2", + 2: "input_blocks.2.1.transformer_blocks.0.attn1", + 3: "input_blocks.2.1.transformer_blocks.0.attn2", + 4: "input_blocks.4.1.transformer_blocks.0.attn1", + 5: "input_blocks.4.1.transformer_blocks.0.attn2", + 6: "input_blocks.5.1.transformer_blocks.0.attn1", + 7: "input_blocks.5.1.transformer_blocks.0.attn2", + 8: "input_blocks.7.1.transformer_blocks.0.attn1", + 9: "input_blocks.7.1.transformer_blocks.0.attn2", + 10: "input_blocks.8.1.transformer_blocks.0.attn1", + 11: "input_blocks.8.1.transformer_blocks.0.attn2", + 12: "output_blocks.3.1.transformer_blocks.0.attn1", + 13: "output_blocks.3.1.transformer_blocks.0.attn2", + 14: "output_blocks.4.1.transformer_blocks.0.attn1", + 15: "output_blocks.4.1.transformer_blocks.0.attn2", + 16: "output_blocks.5.1.transformer_blocks.0.attn1", + 17: "output_blocks.5.1.transformer_blocks.0.attn2", + 18: "output_blocks.6.1.transformer_blocks.0.attn1", + 19: "output_blocks.6.1.transformer_blocks.0.attn2", + 20: "output_blocks.7.1.transformer_blocks.0.attn1", + 21: "output_blocks.7.1.transformer_blocks.0.attn2", + 22: "output_blocks.8.1.transformer_blocks.0.attn1", + 23: "output_blocks.8.1.transformer_blocks.0.attn2", + 24: "output_blocks.9.1.transformer_blocks.0.attn1", + 25: "output_blocks.9.1.transformer_blocks.0.attn2", + 26: "output_blocks.10.1.transformer_blocks.0.attn1", + 27: "output_blocks.10.1.transformer_blocks.0.attn2", + 28: "output_blocks.11.1.transformer_blocks.0.attn1", + 29: "output_blocks.11.1.transformer_blocks.0.attn2", + 30: "middle_block.1.transformer_blocks.0.attn1", + 31: "middle_block.1.transformer_blocks.0.attn2", +} + + +def compute_cond_mark(cond_or_uncond, sigmas): + cond_or_uncond_size = int(sigmas.shape[0]) + + cond_mark = [] + for cx in cond_or_uncond: + cond_mark += [cx] * cond_or_uncond_size + + cond_mark = torch.Tensor(cond_mark).to(sigmas) + return cond_mark + + +class LoRALinearLayer(torch.nn.Module): + def __init__(self, in_features: int, out_features: int, rank: int = 256, org=None): + super().__init__() + self.down = torch.nn.Linear(in_features, rank, bias=False) + self.up = torch.nn.Linear(rank, out_features, bias=False) + self.org = [org] + + def forward(self, h): + org_weight = self.org[0].weight.to(h) + org_bias = self.org[0].bias.to(h) if self.org[0].bias is not None else None + down_weight = self.down.weight + up_weight = self.up.weight + final_weight = org_weight + torch.mm(up_weight, down_weight) + return torch.nn.functional.linear(h, final_weight, org_bias) + + +class AttentionSharingUnit(torch.nn.Module): + # `transformer_options` passed to the most recent BasicTransformerBlock.forward + # call. + transformer_options: dict = {} + + def __init__(self, module, frames=2, use_control=True, rank=256): + super().__init__() + + self.heads = module.heads + self.frames = frames + self.original_module = [module] + q_in_channels, q_out_channels = ( + module.to_q.in_features, + module.to_q.out_features, + ) + k_in_channels, k_out_channels = ( + module.to_k.in_features, + module.to_k.out_features, + ) + v_in_channels, v_out_channels = ( + module.to_v.in_features, + module.to_v.out_features, + ) + o_in_channels, o_out_channels = ( + module.to_out[0].in_features, + module.to_out[0].out_features, + ) + + hidden_size = k_out_channels + + self.to_q_lora = [ + LoRALinearLayer(q_in_channels, q_out_channels, rank, module.to_q) for _ in range(self.frames) + ] + self.to_k_lora = [ + LoRALinearLayer(k_in_channels, k_out_channels, rank, module.to_k) for _ in range(self.frames) + ] + self.to_v_lora = [ + LoRALinearLayer(v_in_channels, v_out_channels, rank, module.to_v) for _ in range(self.frames) + ] + self.to_out_lora = [ + LoRALinearLayer(o_in_channels, o_out_channels, rank, module.to_out[0]) for _ in range(self.frames) + ] + + self.to_q_lora = torch.nn.ModuleList(self.to_q_lora) + self.to_k_lora = torch.nn.ModuleList(self.to_k_lora) + self.to_v_lora = torch.nn.ModuleList(self.to_v_lora) + self.to_out_lora = torch.nn.ModuleList(self.to_out_lora) + + self.temporal_i = torch.nn.Linear(in_features=hidden_size, out_features=hidden_size) + self.temporal_n = torch.nn.LayerNorm(hidden_size, elementwise_affine=True, eps=1e-6) + self.temporal_q = torch.nn.Linear(in_features=hidden_size, out_features=hidden_size) + self.temporal_k = torch.nn.Linear(in_features=hidden_size, out_features=hidden_size) + self.temporal_v = torch.nn.Linear(in_features=hidden_size, out_features=hidden_size) + self.temporal_o = torch.nn.Linear(in_features=hidden_size, out_features=hidden_size) + + self.control_convs = None + + if use_control: + self.control_convs = [ + torch.nn.Sequential( + torch.nn.Conv2d(256, 256, kernel_size=3, padding=1, stride=1), + torch.nn.SiLU(), + torch.nn.Conv2d(256, hidden_size, kernel_size=1), + ) + for _ in range(self.frames) + ] + self.control_convs = torch.nn.ModuleList(self.control_convs) + + self.control_signals = None + + def forward(self, h, context=None, value=None): + transformer_options = self.transformer_options + + modified_hidden_states = einops.rearrange(h, "(b f) d c -> f b d c", f=self.frames) + + if self.control_convs is not None: + context_dim = int(modified_hidden_states.shape[2]) + control_outs = [] + for f in range(self.frames): + control_signal = self.control_signals[context_dim].to(modified_hidden_states) + control = self.control_convs[f](control_signal) + control = einops.rearrange(control, "b c h w -> b (h w) c") + control_outs.append(control) + control_outs = torch.stack(control_outs, dim=0) + modified_hidden_states = modified_hidden_states + control_outs.to(modified_hidden_states) + + if context is None: + framed_context = modified_hidden_states + else: + framed_context = einops.rearrange(context, "(b f) d c -> f b d c", f=self.frames) + + framed_cond_mark = einops.rearrange( + compute_cond_mark( + transformer_options["cond_or_uncond"], + transformer_options["sigmas"], + ), + "(b f) -> f b", + f=self.frames, + ).to(modified_hidden_states) + + attn_outs = [] + for f in range(self.frames): + fcf = framed_context[f] + + if context is not None: + cond_overwrite = transformer_options.get("cond_overwrite", []) + if len(cond_overwrite) > f: + cond_overwrite = cond_overwrite[f] + else: + cond_overwrite = None + if cond_overwrite is not None: + cond_mark = framed_cond_mark[f][:, None, None] + fcf = cond_overwrite.to(fcf) * (1.0 - cond_mark) + fcf * cond_mark + + q = self.to_q_lora[f](modified_hidden_states[f]) + k = self.to_k_lora[f](fcf) + v = self.to_v_lora[f](fcf) + o = optimized_attention(q, k, v, self.heads) + o = self.to_out_lora[f](o) + o = self.original_module[0].to_out[1](o) + attn_outs.append(o) + + attn_outs = torch.stack(attn_outs, dim=0) + modified_hidden_states = modified_hidden_states + attn_outs.to(modified_hidden_states) + modified_hidden_states = einops.rearrange(modified_hidden_states, "f b d c -> (b f) d c", f=self.frames) + + x = modified_hidden_states + x = self.temporal_n(x) + x = self.temporal_i(x) + d = x.shape[1] + + x = einops.rearrange(x, "(b f) d c -> (b d) f c", f=self.frames) + + q = self.temporal_q(x) + k = self.temporal_k(x) + v = self.temporal_v(x) + + x = optimized_attention(q, k, v, self.heads) + x = self.temporal_o(x) + x = einops.rearrange(x, "(b d) f c -> (b f) d c", d=d) + + modified_hidden_states = modified_hidden_states + x + + return modified_hidden_states - h + + @classmethod + def hijack_transformer_block(cls): + def register_get_transformer_options(func): + @functools.wraps(func) + def forward(self, x, context=None, transformer_options={}): + cls.transformer_options = transformer_options + return func(self, x, context, transformer_options) + + return forward + + from comfy.ldm.modules.attention import BasicTransformerBlock + + BasicTransformerBlock.forward = register_get_transformer_options(BasicTransformerBlock.forward) + + +AttentionSharingUnit.hijack_transformer_block() + + +class AdditionalAttentionCondsEncoder(torch.nn.Module): + def __init__(self): + super().__init__() + + self.blocks_0 = torch.nn.Sequential( + torch.nn.Conv2d(3, 32, kernel_size=3, padding=1, stride=1), + torch.nn.SiLU(), + torch.nn.Conv2d(32, 32, kernel_size=3, padding=1, stride=1), + torch.nn.SiLU(), + torch.nn.Conv2d(32, 64, kernel_size=3, padding=1, stride=2), + torch.nn.SiLU(), + torch.nn.Conv2d(64, 64, kernel_size=3, padding=1, stride=1), + torch.nn.SiLU(), + torch.nn.Conv2d(64, 128, kernel_size=3, padding=1, stride=2), + torch.nn.SiLU(), + torch.nn.Conv2d(128, 128, kernel_size=3, padding=1, stride=1), + torch.nn.SiLU(), + torch.nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2), + torch.nn.SiLU(), + torch.nn.Conv2d(256, 256, kernel_size=3, padding=1, stride=1), + torch.nn.SiLU(), + ) # 64*64*256 + + self.blocks_1 = torch.nn.Sequential( + torch.nn.Conv2d(256, 256, kernel_size=3, padding=1, stride=2), + torch.nn.SiLU(), + torch.nn.Conv2d(256, 256, kernel_size=3, padding=1, stride=1), + torch.nn.SiLU(), + ) # 32*32*256 + + self.blocks_2 = torch.nn.Sequential( + torch.nn.Conv2d(256, 256, kernel_size=3, padding=1, stride=2), + torch.nn.SiLU(), + torch.nn.Conv2d(256, 256, kernel_size=3, padding=1, stride=1), + torch.nn.SiLU(), + ) # 16*16*256 + + self.blocks_3 = torch.nn.Sequential( + torch.nn.Conv2d(256, 256, kernel_size=3, padding=1, stride=2), + torch.nn.SiLU(), + torch.nn.Conv2d(256, 256, kernel_size=3, padding=1, stride=1), + torch.nn.SiLU(), + ) # 8*8*256 + + self.blks = [self.blocks_0, self.blocks_1, self.blocks_2, self.blocks_3] + + def __call__(self, h): + results = {} + for b in self.blks: + h = b(h) + results[int(h.shape[2]) * int(h.shape[3])] = h + return results + + +class HookerLayers(torch.nn.Module): + def __init__(self, layer_list): + super().__init__() + self.layers = torch.nn.ModuleList(layer_list) + + +class AttentionSharingPatcher(torch.nn.Module): + def __init__(self, unet, frames=2, use_control=True, rank=256): + super().__init__() + model_management.unload_model_clones(unet) + + units = [] + for i in range(32): + real_key = module_mapping_sd15[i] + attn_module = utils.get_attr(unet.model.diffusion_model, real_key) + u = AttentionSharingUnit(attn_module, frames=frames, use_control=use_control, rank=rank) + units.append(u) + unet.add_object_patch("diffusion_model." + real_key, u) + + self.hookers = HookerLayers(units) + + if use_control: + self.kwargs_encoder = AdditionalAttentionCondsEncoder() + else: + self.kwargs_encoder = None + + self.dtype = torch.float32 + if model_management.should_use_fp16(model_management.get_torch_device()): + self.dtype = torch.float16 + self.hookers.half() + return + + def set_control(self, img): + img = img.cpu().float() * 2.0 - 1.0 + signals = self.kwargs_encoder(img) + for m in self.hookers.layers: + m.control_signals = signals + return diff --git a/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/enums.py b/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/enums.py new file mode 100644 index 00000000..c4623e3c --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/enums.py @@ -0,0 +1,23 @@ +from enum import Enum + + +class ResizeMode(Enum): + RESIZE = "Just Resize" + CROP_AND_RESIZE = "Crop and Resize" + RESIZE_AND_FILL = "Resize and Fill" + + def int_value(self): + if self == ResizeMode.RESIZE: + return 0 + elif self == ResizeMode.CROP_AND_RESIZE: + return 1 + elif self == ResizeMode.RESIZE_AND_FILL: + return 2 + return 0 + + +class StableDiffusionVersion(Enum): + """The version family of stable diffusion model.""" + + SD1x = "SD15" + SDXL = "SDXL" diff --git a/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/models.py b/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/models.py new file mode 100644 index 00000000..84e5d877 --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/models.py @@ -0,0 +1,296 @@ +import importlib.metadata + +import cv2 +import numpy as np +import torch +import torch.nn as nn +from diffusers.configuration_utils import ConfigMixin, register_to_config +from diffusers.models.modeling_utils import ModelMixin +from packaging.version import parse +from tqdm import tqdm + +diffusers_version = importlib.metadata.version("diffusers") + + +def check_diffusers_version(min_version="0.25.0"): + assert parse(diffusers_version) >= parse( + min_version, + ), f"diffusers>={min_version} requirement not satisfied. Please install correct diffusers version." + + +check_diffusers_version() + +if parse(diffusers_version) >= parse("0.29.0"): + from diffusers.models.unets.unet_2d_blocks import UNetMidBlock2D, get_down_block, get_up_block +else: + from diffusers.models.unet_2d_blocks import UNetMidBlock2D, get_down_block, get_up_block + + +def zero_module(module): + """ + Zero out the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().zero_() + return module + + +class LatentTransparencyOffsetEncoder(torch.nn.Module): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.blocks = torch.nn.Sequential( + torch.nn.Conv2d(4, 32, kernel_size=3, padding=1, stride=1), + nn.SiLU(), + torch.nn.Conv2d(32, 32, kernel_size=3, padding=1, stride=1), + nn.SiLU(), + torch.nn.Conv2d(32, 64, kernel_size=3, padding=1, stride=2), + nn.SiLU(), + torch.nn.Conv2d(64, 64, kernel_size=3, padding=1, stride=1), + nn.SiLU(), + torch.nn.Conv2d(64, 128, kernel_size=3, padding=1, stride=2), + nn.SiLU(), + torch.nn.Conv2d(128, 128, kernel_size=3, padding=1, stride=1), + nn.SiLU(), + torch.nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2), + nn.SiLU(), + torch.nn.Conv2d(256, 256, kernel_size=3, padding=1, stride=1), + nn.SiLU(), + zero_module(torch.nn.Conv2d(256, 4, kernel_size=3, padding=1, stride=1)), + ) + + def __call__(self, x): + return self.blocks(x) + + +# 1024 * 1024 * 3 -> 16 * 16 * 512 -> 1024 * 1024 * 3 +class UNet1024(ModelMixin, ConfigMixin): + @register_to_config + def __init__( + self, + in_channels: int = 3, + out_channels: int = 3, + down_block_types: tuple[str] = ( + "DownBlock2D", + "DownBlock2D", + "DownBlock2D", + "DownBlock2D", + "AttnDownBlock2D", + "AttnDownBlock2D", + "AttnDownBlock2D", + ), + up_block_types: tuple[str] = ( + "AttnUpBlock2D", + "AttnUpBlock2D", + "AttnUpBlock2D", + "UpBlock2D", + "UpBlock2D", + "UpBlock2D", + "UpBlock2D", + ), + block_out_channels: tuple[int] = (32, 32, 64, 128, 256, 512, 512), + layers_per_block: int = 2, + mid_block_scale_factor: float = 1, + downsample_padding: int = 1, + downsample_type: str = "conv", + upsample_type: str = "conv", + dropout: float = 0.0, + act_fn: str = "silu", + attention_head_dim: int | None = 8, + norm_num_groups: int = 4, + norm_eps: float = 1e-5, + ): + super().__init__() + + # input + self.conv_in = nn.Conv2d(in_channels, block_out_channels[0], kernel_size=3, padding=(1, 1)) + self.latent_conv_in = zero_module(nn.Conv2d(4, block_out_channels[2], kernel_size=1)) + + self.down_blocks = nn.ModuleList([]) + self.mid_block = None + self.up_blocks = nn.ModuleList([]) + + # down + output_channel = block_out_channels[0] + for i, down_block_type in enumerate(down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + down_block = get_down_block( + down_block_type, + num_layers=layers_per_block, + in_channels=input_channel, + out_channels=output_channel, + temb_channels=None, + add_downsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + resnet_groups=norm_num_groups, + attention_head_dim=(attention_head_dim if attention_head_dim is not None else output_channel), + downsample_padding=downsample_padding, + resnet_time_scale_shift="default", + downsample_type=downsample_type, + dropout=dropout, + ) + self.down_blocks.append(down_block) + + # mid + self.mid_block = UNetMidBlock2D( + in_channels=block_out_channels[-1], + temb_channels=None, + dropout=dropout, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + output_scale_factor=mid_block_scale_factor, + resnet_time_scale_shift="default", + attention_head_dim=(attention_head_dim if attention_head_dim is not None else block_out_channels[-1]), + resnet_groups=norm_num_groups, + attn_groups=None, + add_attention=True, + ) + + # up + reversed_block_out_channels = list(reversed(block_out_channels)) + output_channel = reversed_block_out_channels[0] + for i, up_block_type in enumerate(up_block_types): + prev_output_channel = output_channel + output_channel = reversed_block_out_channels[i] + input_channel = reversed_block_out_channels[min(i + 1, len(block_out_channels) - 1)] + + is_final_block = i == len(block_out_channels) - 1 + + up_block = get_up_block( + up_block_type, + num_layers=layers_per_block + 1, + in_channels=input_channel, + out_channels=output_channel, + prev_output_channel=prev_output_channel, + temb_channels=None, + add_upsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + resnet_groups=norm_num_groups, + attention_head_dim=(attention_head_dim if attention_head_dim is not None else output_channel), + resnet_time_scale_shift="default", + upsample_type=upsample_type, + dropout=dropout, + ) + self.up_blocks.append(up_block) + prev_output_channel = output_channel + + # out + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[0], num_groups=norm_num_groups, eps=norm_eps) + self.conv_act = nn.SiLU() + self.conv_out = nn.Conv2d(block_out_channels[0], out_channels, kernel_size=3, padding=1) + + def forward(self, x, latent): + sample_latent = self.latent_conv_in(latent) + sample = self.conv_in(x) + emb = None + + down_block_res_samples = (sample,) + for i, downsample_block in enumerate(self.down_blocks): + if i == 3: + sample = sample + sample_latent + + sample, res_samples = downsample_block(hidden_states=sample, temb=emb) + down_block_res_samples += res_samples + + sample = self.mid_block(sample, emb) + + for upsample_block in self.up_blocks: + res_samples = down_block_res_samples[-len(upsample_block.resnets) :] + down_block_res_samples = down_block_res_samples[: -len(upsample_block.resnets)] + sample = upsample_block(sample, res_samples, emb) + + sample = self.conv_norm_out(sample) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + return sample + + +def checkerboard(shape): + return np.indices(shape).sum(axis=0) % 2 + + +def fill_checkerboard_bg(y: torch.Tensor) -> torch.Tensor: + alpha = y[..., :1] + fg = y[..., 1:] + B, H, W, C = fg.shape + cb = checkerboard(shape=(H // 64, W // 64)) + cb = cv2.resize(cb, (W, H), interpolation=cv2.INTER_NEAREST) + cb = (0.5 + (cb - 0.5) * 0.1)[None, ..., None] + cb = torch.from_numpy(cb).to(fg) + vis = fg * alpha + cb * (1 - alpha) + return vis + + +class TransparentVAEDecoder: + def __init__(self, sd, device, dtype): + self.load_device = device + self.dtype = dtype + + model = UNet1024(in_channels=3, out_channels=4) + model.load_state_dict(sd, strict=True) + model.to(self.load_device, dtype=self.dtype) + model.eval() + self.model = model + + @torch.no_grad() + def estimate_single_pass(self, pixel, latent): + y = self.model(pixel, latent) + return y + + @torch.no_grad() + def estimate_augmented(self, pixel, latent): + args = [ + [False, 0], + [False, 1], + [False, 2], + [False, 3], + [True, 0], + [True, 1], + [True, 2], + [True, 3], + ] + + result = [] + + for flip, rok in tqdm(args): + feed_pixel = pixel.clone() + feed_latent = latent.clone() + + if flip: + feed_pixel = torch.flip(feed_pixel, dims=(3,)) + feed_latent = torch.flip(feed_latent, dims=(3,)) + + feed_pixel = torch.rot90(feed_pixel, k=rok, dims=(2, 3)) + feed_latent = torch.rot90(feed_latent, k=rok, dims=(2, 3)) + + eps = self.estimate_single_pass(feed_pixel, feed_latent).clip(0, 1) + eps = torch.rot90(eps, k=-rok, dims=(2, 3)) + + if flip: + eps = torch.flip(eps, dims=(3,)) + + result += [eps] + + result = torch.stack(result, dim=0) + median = torch.median(result, dim=0).values + return median + + @torch.no_grad() + def decode_pixel(self, pixel: torch.TensorType, latent: torch.TensorType) -> torch.TensorType: + # pixel.shape = [B, C=3, H, W] + assert pixel.shape[1] == 3 + pixel_device = pixel.device + pixel_dtype = pixel.dtype + + pixel = pixel.to(device=self.load_device, dtype=self.dtype) + latent = latent.to(device=self.load_device, dtype=self.dtype) + # y.shape = [B, C=4, H, W] + y = self.estimate_augmented(pixel, latent) + y = y.clip(0, 1) + assert y.shape[1] == 4 + # Restore image to original device of input image. + return y.to(pixel_device, dtype=pixel_dtype) diff --git a/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/utils.py b/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/utils.py new file mode 100644 index 00000000..4387d4c7 --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/utils.py @@ -0,0 +1,139 @@ +import os +from urllib.parse import urlparse + +import cv2 +import numpy as np +import torch + +from .enums import ResizeMode + + +def rgba2rgbfp32(x): + rgb = x[..., :3].astype(np.float32) / 255.0 + a = x[..., 3:4].astype(np.float32) / 255.0 + return 0.5 + (rgb - 0.5) * a + + +def to255unit8(x): + return (x * 255.0).clip(0, 255).astype(np.uint8) + + +def safe_numpy(x): + # A very safe method to make sure that Apple/Mac works + y = x + + # below is very boring but do not change these. If you change these Apple or Mac may fail. + y = y.copy() + y = np.ascontiguousarray(y) + y = y.copy() + return y + + +def high_quality_resize(x, size): + if x.shape[0] != size[1] or x.shape[1] != size[0]: + if (size[0] * size[1]) < (x.shape[0] * x.shape[1]): + interpolation = cv2.INTER_AREA + else: + interpolation = cv2.INTER_LANCZOS4 + + y = cv2.resize(x, size, interpolation=interpolation) + else: + y = x + return y + + +def crop_and_resize_image(detected_map, resize_mode, h, w): + if resize_mode == ResizeMode.RESIZE: + detected_map = high_quality_resize(detected_map, (w, h)) + detected_map = safe_numpy(detected_map) + return detected_map + + old_h, old_w, _ = detected_map.shape + old_w = float(old_w) + old_h = float(old_h) + k0 = float(h) / old_h + k1 = float(w) / old_w + + def safeint(x): + return int(np.round(x)) + + if resize_mode == ResizeMode.RESIZE_AND_FILL: + k = min(k0, k1) + borders = np.concatenate( + [detected_map[0, :, :], detected_map[-1, :, :], detected_map[:, 0, :], detected_map[:, -1, :]], axis=0, + ) + high_quality_border_color = np.median(borders, axis=0).astype(detected_map.dtype) + high_quality_background = np.tile(high_quality_border_color[None, None], [h, w, 1]) + detected_map = high_quality_resize(detected_map, (safeint(old_w * k), safeint(old_h * k))) + new_h, new_w, _ = detected_map.shape + pad_h = max(0, (h - new_h) // 2) + pad_w = max(0, (w - new_w) // 2) + high_quality_background[pad_h : pad_h + new_h, pad_w : pad_w + new_w] = detected_map + detected_map = high_quality_background + detected_map = safe_numpy(detected_map) + return detected_map + else: + k = max(k0, k1) + detected_map = high_quality_resize(detected_map, (safeint(old_w * k), safeint(old_h * k))) + new_h, new_w, _ = detected_map.shape + pad_h = max(0, (new_h - h) // 2) + pad_w = max(0, (new_w - w) // 2) + detected_map = detected_map[pad_h : pad_h + h, pad_w : pad_w + w] + detected_map = safe_numpy(detected_map) + return detected_map + + +def pytorch_to_numpy(x): + return [np.clip(255.0 * y.cpu().numpy(), 0, 255).astype(np.uint8) for y in x] + + +def numpy_to_pytorch(x): + y = x.astype(np.float32) / 255.0 + y = y[None] + y = np.ascontiguousarray(y.copy()) + y = torch.from_numpy(y).float() + return y + + +def load_file_from_url( + url: str, + *, + model_dir: str, + progress: bool = True, + file_name: str | None = None, +) -> str: + """Download a file from `url` into `model_dir`, using the file present if possible. + + Returns the path to the downloaded file. + """ + os.makedirs(model_dir, exist_ok=True) + if not file_name: + parts = urlparse(url) + file_name = os.path.basename(parts.path) + cached_file = os.path.abspath(os.path.join(model_dir, file_name)) + if not os.path.exists(cached_file): + print(f'Downloading: "{url}" to {cached_file}\n') + from torch.hub import download_url_to_file + + download_url_to_file(url, cached_file, progress=progress) + return cached_file + + +def to_lora_patch_dict(state_dict: dict) -> dict: + """Convert raw lora state_dict to patch_dict that can be applied on + modelpatcher.""" + patch_dict = {} + for k, w in state_dict.items(): + model_key, patch_type, weight_index = k.split("::") + if model_key not in patch_dict: + patch_dict[model_key] = {} + if patch_type not in patch_dict[model_key]: + patch_dict[model_key][patch_type] = [None] * 16 + patch_dict[model_key][patch_type][int(weight_index)] = w + + patch_flat = {} + for model_key, v in patch_dict.items(): + for patch_type, weight_list in v.items(): + patch_flat[model_key] = (patch_type, weight_list) + + return patch_flat diff --git a/hordelib/nodes/comfyui_layerdiffuse/pyproject.toml b/hordelib/nodes/comfyui_layerdiffuse/pyproject.toml new file mode 100644 index 00000000..231fb6ec --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/pyproject.toml @@ -0,0 +1,15 @@ +[project] +name = "comfyui-layerdiffuse" +description = "Transparent Image Layer Diffusion using Latent Transparency" +version = "1.0.1" +license = "LICENSE" +dependencies = ["diffusers>=0.25.0", "opencv-python"] + +[project.urls] +Repository = "https://github.com/huchenlei/ComfyUI-layerdiffuse" + +# Used by Comfy Registry https://comfyregistry.org +[tool.comfy] +PublisherId = "huchenlei" +DisplayName = "ComfyUI-layerdiffuse" +Icon = "" diff --git a/hordelib/nodes/comfyui_layerdiffuse/requirements.txt b/hordelib/nodes/comfyui_layerdiffuse/requirements.txt new file mode 100644 index 00000000..47497d7d --- /dev/null +++ b/hordelib/nodes/comfyui_layerdiffuse/requirements.txt @@ -0,0 +1,2 @@ +diffusers>=0.25.0 +opencv-python diff --git a/mypy.ini b/mypy.ini index a0a5b2e5..10a57f6c 100644 --- a/mypy.ini +++ b/mypy.ini @@ -6,6 +6,10 @@ exclude = (build|dist|ComfyUI|comfy_controlnet_preprocessors|facerestore|comfy_h ignore_errors = True ignore_missing_imports = True +[mypy-hordelib.nodes.comfyui_layerdiffuse.*] +ignore_errors = True +ignore_missing_imports = True + [mypy-xformers.*] ignore_missing_imports = True diff --git a/pyproject.toml b/pyproject.toml index 8239c211..4837a77c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,7 @@ exclude = ''' | \.tox | comfy_controlnet_preprocessors | comfy_qr + | comfyui_layerdiffuse | facerestore | build )/ @@ -77,7 +78,7 @@ exclude = ''' [tool.ruff] # XXX this isn't part of CI yet line-length=119 -exclude=["ComfyUI", "comfy_controlnet_preprocessors", "facerestore", "comfy_qr", "comfyui-layerdiffuse", "build"] +exclude=["ComfyUI", "comfy_controlnet_preprocessors", "facerestore", "comfy_qr", "comfyui_layerdiffuse", "build"] ignore=[ # "F401", # imported but unused "E402", # Module level import not at top of file From 756f1c7536491524c5696edf9ac5b799e27f7f23 Mon Sep 17 00:00:00 2001 From: db0 Date: Sun, 23 Jun 2024 16:44:01 +0200 Subject: [PATCH 3/9] wip: new mm --- hordelib/consts.py | 4 ++++ hordelib/model_manager/hyper.py | 8 ++++++++ hordelib/model_manager/miscellaneous.py | 15 +++++++++++++++ .../lib_layerdiffusion/utils.py | 8 +++++++- hordelib/nodes/node_model_loader.py | 3 +++ 5 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 hordelib/model_manager/miscellaneous.py diff --git a/hordelib/consts.py b/hordelib/consts.py index ed26b847..3c335967 100644 --- a/hordelib/consts.py +++ b/hordelib/consts.py @@ -42,6 +42,7 @@ class MODEL_CATEGORY_NAMES(StrEnum): ti = auto() blip = auto() clip = auto() + miscellaneous = auto() # Default model managers to load @@ -55,6 +56,7 @@ class MODEL_CATEGORY_NAMES(StrEnum): # MODEL_CATEGORY_NAMES.safety_checker: True, MODEL_CATEGORY_NAMES.lora: True, MODEL_CATEGORY_NAMES.ti: True, + MODEL_CATEGORY_NAMES.miscellaneous: True, } """The default model managers to load.""" # XXX Clarify @@ -68,6 +70,7 @@ class MODEL_CATEGORY_NAMES(StrEnum): MODEL_CATEGORY_NAMES.safety_checker: MODEL_CATEGORY_NAMES.safety_checker, MODEL_CATEGORY_NAMES.lora: MODEL_CATEGORY_NAMES.lora, MODEL_CATEGORY_NAMES.ti: MODEL_CATEGORY_NAMES.ti, + MODEL_CATEGORY_NAMES.miscellaneous: MODEL_CATEGORY_NAMES.miscellaneous, } """The name of the json file (without the extension) of the corresponding model database.""" @@ -81,5 +84,6 @@ class MODEL_CATEGORY_NAMES(StrEnum): MODEL_CATEGORY_NAMES.safety_checker: MODEL_CATEGORY_NAMES.safety_checker, MODEL_CATEGORY_NAMES.lora: MODEL_CATEGORY_NAMES.lora, MODEL_CATEGORY_NAMES.ti: MODEL_CATEGORY_NAMES.ti, + MODEL_CATEGORY_NAMES.miscellaneous: MODEL_CATEGORY_NAMES.miscellaneous, } """The folder name on disk where the models are stored in AIWORKER_CACHE_HOME.""" diff --git a/hordelib/model_manager/hyper.py b/hordelib/model_manager/hyper.py index 9be2ad7a..41504202 100644 --- a/hordelib/model_manager/hyper.py +++ b/hordelib/model_manager/hyper.py @@ -18,6 +18,7 @@ from hordelib.model_manager.esrgan import EsrganModelManager from hordelib.model_manager.gfpgan import GfpganModelManager from hordelib.model_manager.lora import LoraModelManager +from hordelib.model_manager.miscellaneous import MiscellaneousModelManager from hordelib.model_manager.safety_checker import SafetyCheckerModelManager from hordelib.model_manager.ti import TextualInversionModelManager @@ -31,6 +32,7 @@ MODEL_CATEGORY_NAMES.safety_checker: SafetyCheckerModelManager, MODEL_CATEGORY_NAMES.lora: LoraModelManager, MODEL_CATEGORY_NAMES.ti: TextualInversionModelManager, + MODEL_CATEGORY_NAMES.miscellaneous: MiscellaneousModelManager, } """A lookup table for the `BaseModelManager` types.""" @@ -91,6 +93,12 @@ def ti(self) -> TextualInversionModelManager | None: found_mm = self.get_model_manager_instance(TextualInversionModelManager) return found_mm if isinstance(found_mm, TextualInversionModelManager) else None + @property + def miscellaneous(self) -> MiscellaneousModelManager | None: + """The miscellaneous model manager instance. Returns `None` if not loaded.""" + found_mm = self.get_model_manager_instance(MiscellaneousModelManager) + return found_mm if isinstance(found_mm, MiscellaneousModelManager) else None + @property def models(self) -> dict: """All model manager's internal dictionaries of models, loaded from model database JSON files.""" diff --git a/hordelib/model_manager/miscellaneous.py b/hordelib/model_manager/miscellaneous.py new file mode 100644 index 00000000..605579e1 --- /dev/null +++ b/hordelib/model_manager/miscellaneous.py @@ -0,0 +1,15 @@ +from hordelib.consts import MODEL_CATEGORY_NAMES +from hordelib.model_manager.base import BaseModelManager + + +class MiscellaneousModelManager(BaseModelManager): # FIXME # TODO? + def __init__( + self, + download_reference=False, + **kwargs, + ): + super().__init__( + model_category_name=MODEL_CATEGORY_NAMES.miscellaneous, + download_reference=download_reference, + **kwargs, + ) diff --git a/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/utils.py b/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/utils.py index 4387d4c7..538b4569 100644 --- a/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/utils.py +++ b/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/utils.py @@ -6,6 +6,7 @@ import torch from .enums import ResizeMode +from hordelib.shared_model_manager import SharedModelManager def rgba2rgbfp32(x): @@ -94,7 +95,10 @@ def numpy_to_pytorch(x): y = torch.from_numpy(y).float() return y - +# IMPORTANT: Edited by hordelib devs to use model loader! +# To keep things more maintainable in case we need to update the code +# We let the same incoming variables, but ignore them in favour of our own +# model manager paths def load_file_from_url( url: str, *, @@ -106,6 +110,8 @@ def load_file_from_url( Returns the path to the downloaded file. """ + from loguru import logger # Debug + logger.error(SharedModelManager.manager.miscellaneous.is_file_available(file_name)) os.makedirs(model_dir, exist_ok=True) if not file_name: parts = urlparse(url) diff --git a/hordelib/nodes/node_model_loader.py b/hordelib/nodes/node_model_loader.py index 87b89950..4f545d37 100644 --- a/hordelib/nodes/node_model_loader.py +++ b/hordelib/nodes/node_model_loader.py @@ -12,6 +12,9 @@ from hordelib.shared_model_manager import SharedModelManager +# Don't let the name fool you, this class is trying to load all the files that will be necessary +# for a given comfyUI workflow. That includes loras, etc. +# TODO: Rename to HordeWorkflowModelsLoader ;) class HordeCheckpointLoader: @classmethod def INPUT_TYPES(s): From 90f7fb2f258d9e0e9b91f5d969150f753294e85d Mon Sep 17 00:00:00 2001 From: tazlin Date: Sun, 23 Jun 2024 11:06:40 -0400 Subject: [PATCH 4/9] fix: compat. version of `horde_model_reference` --- hordelib/model_manager/base.py | 1 + requirements.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hordelib/model_manager/base.py b/hordelib/model_manager/base.py index afb7856f..eca5042c 100644 --- a/hordelib/model_manager/base.py +++ b/hordelib/model_manager/base.py @@ -30,6 +30,7 @@ MODEL_CATEGORY_NAMES.esrgan: MODEL_REFERENCE_CATEGORY.esrgan, MODEL_CATEGORY_NAMES.gfpgan: MODEL_REFERENCE_CATEGORY.gfpgan, MODEL_CATEGORY_NAMES.safety_checker: MODEL_REFERENCE_CATEGORY.safety_checker, + MODEL_CATEGORY_NAMES.miscellaneous: MODEL_REFERENCE_CATEGORY.miscellaneous, } diff --git a/requirements.txt b/requirements.txt index 1366bb3d..9e86509f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ # Add this in for tox, comment out for build --extra-index-url https://download.pytorch.org/whl/cu121 horde_sdk>=0.9.1 -horde_model_reference>=0.5.2 +horde_model_reference>=0.7.0 pydantic numpy==1.26.4 torch>=2.1.0 From 28eeb5fef09a4785a1129f449f5e0ff7f4811ab1 Mon Sep 17 00:00:00 2001 From: tazlin Date: Sun, 23 Jun 2024 11:20:56 -0400 Subject: [PATCH 5/9] fix: use compat. version of `horde_sdk` --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9e86509f..25c28934 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ # Add this in for tox, comment out for build --extra-index-url https://download.pytorch.org/whl/cu121 -horde_sdk>=0.9.1 +horde_sdk>=0.12.0 horde_model_reference>=0.7.0 pydantic numpy==1.26.4 From 1ba356dcd45b7eeb1d1809c1d70d4f87a51a6c60 Mon Sep 17 00:00:00 2001 From: tazlin Date: Sun, 23 Jun 2024 11:27:05 -0400 Subject: [PATCH 6/9] fix: latest horde_sdk in `pre-commit` mypy deps --- .pre-commit-config.yaml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 84ab31b5..c4ef3fc2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,4 +19,20 @@ repos: hooks: - id: mypy exclude: ^examples/.*$ # FIXME - additional_dependencies: [pydantic, strenum, types-colorama, types-docutils, types-Pillow, types-psutil, types-Pygments, types-pywin32, types-PyYAML, types-regex, types-requests, types-setuptools, types-tabulate, types-tqdm, types-urllib3, horde_sdk==0.9.1] + additional_dependencies: [ + pydantic, + strenum, + types-colorama, + types-docutils, + types-Pillow, types-psutil, + types-Pygments, + types-pywin32, + types-PyYAML, + types-regex, + types-requests, + types-setuptools, + types-tabulate, + types-tqdm, + types-urllib3, + horde_sdk==0.12.0, + ] From 2fd77cf84a96b126b82eb7b1b0408f55b6f43c38 Mon Sep 17 00:00:00 2001 From: db0 Date: Thu, 27 Jun 2024 21:02:40 +0200 Subject: [PATCH 7/9] load layerdiffuse models from model manager --- .pre-commit-config.yaml | 26 +++++++++---------- .../lib_layerdiffusion/utils.py | 14 +--------- tests/conftest.py | 2 ++ 3 files changed, 16 insertions(+), 26 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c4ef3fc2..fe2d3b21 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,19 +20,19 @@ repos: - id: mypy exclude: ^examples/.*$ # FIXME additional_dependencies: [ - pydantic, - strenum, - types-colorama, - types-docutils, - types-Pillow, types-psutil, - types-Pygments, - types-pywin32, - types-PyYAML, - types-regex, - types-requests, - types-setuptools, - types-tabulate, + pydantic, + strenum, + types-colorama, + types-docutils, + types-Pillow, types-psutil, + types-Pygments, + types-pywin32, + types-PyYAML, + types-regex, + types-requests, + types-setuptools, + types-tabulate, types-tqdm, - types-urllib3, + types-urllib3, horde_sdk==0.12.0, ] diff --git a/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/utils.py b/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/utils.py index 538b4569..f75a48ba 100644 --- a/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/utils.py +++ b/hordelib/nodes/comfyui_layerdiffuse/lib_layerdiffusion/utils.py @@ -110,19 +110,7 @@ def load_file_from_url( Returns the path to the downloaded file. """ - from loguru import logger # Debug - logger.error(SharedModelManager.manager.miscellaneous.is_file_available(file_name)) - os.makedirs(model_dir, exist_ok=True) - if not file_name: - parts = urlparse(url) - file_name = os.path.basename(parts.path) - cached_file = os.path.abspath(os.path.join(model_dir, file_name)) - if not os.path.exists(cached_file): - print(f'Downloading: "{url}" to {cached_file}\n') - from torch.hub import download_url_to_file - - download_url_to_file(url, cached_file, progress=progress) - return cached_file + return str(SharedModelManager.manager.miscellaneous.model_folder_path / file_name) def to_lora_patch_dict(state_dict: dict) -> dict: diff --git a/tests/conftest.py b/tests/conftest.py index d8e4ca9e..2b9bbab0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -116,6 +116,8 @@ def shared_model_manager( assert SharedModelManager.manager is not None assert SharedModelManager.manager.codeformer is not None assert SharedModelManager.manager.codeformer.download_all_models() + assert SharedModelManager.manager.miscellaneous is not None + assert SharedModelManager.manager.miscellaneous.download_all_models() assert SharedModelManager.manager.compvis is not None assert SharedModelManager.manager.download_model("Deliberate") From cc95df17167e56f4431f240d82b89bbabd88dcca Mon Sep 17 00:00:00 2001 From: db0 Date: Sun, 30 Jun 2024 14:33:28 +0200 Subject: [PATCH 8/9] ld working --- hordelib/horde.py | 2 ++ hordelib/nodes/node_model_loader.py | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/hordelib/horde.py b/hordelib/horde.py index bd14f329..76ef9d90 100644 --- a/hordelib/horde.py +++ b/hordelib/horde.py @@ -1031,6 +1031,8 @@ def _final_pipeline_adjustments(self, payload, pipeline_data) -> tuple[dict, lis "control_v1p_sd15_qrcode_monster_v2.safetensors" ) if payload.get("transparent") is True: + # A transparent gen is basically a fancy lora + pipeline_params["model_loader.will_load_loras"] = True if SharedModelManager.manager.compvis: model_details = SharedModelManager.manager.compvis.get_model_reference_info(payload["model_name"]) # SD2, Cascade and SD3 not supported diff --git a/hordelib/nodes/node_model_loader.py b/hordelib/nodes/node_model_loader.py index 4f545d37..6f37b22b 100644 --- a/hordelib/nodes/node_model_loader.py +++ b/hordelib/nodes/node_model_loader.py @@ -104,6 +104,13 @@ def load_checkpoint( # Clear references so comfy can free memory as needed SharedModelManager.manager._models_in_ram = {} + # TODO: Currently we don't preload the layer_diffuse tensors which can potentially be big + # (3G for SDXL). So they will be loaded during runtime, and their memory usage will be + # handled by comfy as with any lora. + # Potential improvement here is to preload these models at this point + # And then just pass their reference to layered_diffusion.py, but that would require + # Quite a bit of refactoring. + if ckpt_name is not None and Path(ckpt_name).is_absolute(): ckpt_path = ckpt_name else: From dafbb2fbe71daaffbd2479625dfc796b7a50f0e7 Mon Sep 17 00:00:00 2001 From: db0 Date: Sun, 30 Jun 2024 16:31:05 +0200 Subject: [PATCH 9/9] fix: ensure diffusers version for layered diffusion node --- README.md | 16 ++++++++++++++++ requirements.txt | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 72a78e80..90766ebc 100644 --- a/README.md +++ b/README.md @@ -252,6 +252,22 @@ python run_controlnet.py The `images/` directory should have our test images. ``` +### Creating a local build of hordelib + +This is useful when testing new nodes via the horde-reGen-worker etc + +```bash +python build_helper.py +python -m build --sdist --wheel --outdir dist/ . +python build_helper.py --fix +``` + +On the venv where you want to install th new version + +```bash +python -m pip install /path/to/hordelib/dist/horde_engine-*.whl +``` + ### Updating the embedded version of ComfyUI - Change the value in `consts.py` to the desired ComfyUI version. diff --git a/requirements.txt b/requirements.txt index 25c28934..6874f189 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,7 +23,7 @@ pillow loguru GitPython clip-anytorch -diffusers +diffusers>=0.25.0 omegaconf psutil typing-extensions