From c31b19cb577a3dcab1ecf4ab814b993ffb5b47f2 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Thu, 13 Jun 2024 19:10:29 -0400 Subject: [PATCH 01/81] sd3 simplify safetensors handler --- modules/model_sd3.py | 141 ++++++++++++++----------------------------- 1 file changed, 44 insertions(+), 97 deletions(-) diff --git a/modules/model_sd3.py b/modules/model_sd3.py index 0b7373572..614c37f6a 100644 --- a/modules/model_sd3.py +++ b/modules/model_sd3.py @@ -27,7 +27,7 @@ def hf_login(): loggedin = True -def load_sd3(te3=None, fn=None, cache_dir=None, config=None): +def load_sd3(fn=None, cache_dir=None, config=None): from modules import devices hf_login() repo_id = 'stabilityai/stable-diffusion-3-medium-diffusers' @@ -37,78 +37,53 @@ def load_sd3(te3=None, fn=None, cache_dir=None, config=None): if fn is not None and fn.endswith('.safetensors') and os.path.exists(fn): model_id = fn loader = diffusers.StableDiffusion3Pipeline.from_single_file - kwargs = { - 'text_encoder': transformers.CLIPTextModelWithProjection.from_pretrained( - repo_id, - subfolder='text_encoder', - cache_dir=cache_dir, - torch_dtype=dtype, - ), - 'text_encoder_2': transformers.CLIPTextModelWithProjection.from_pretrained( - repo_id, - subfolder='text_encoder_2', - cache_dir=cache_dir, - torch_dtype=dtype, - ), - 'tokenizer': transformers.CLIPTokenizer.from_pretrained( - repo_id, - subfolder='tokenizer', - cache_dir=cache_dir, - ), - 'tokenizer_2': transformers.CLIPTokenizer.from_pretrained( - repo_id, - subfolder='tokenizer_2', - cache_dir=cache_dir, - ), - } + diffusers_minor = int(diffusers.__version__.split('.')[1]) + fn_size = os.path.getsize(fn) + if diffusers_minor < 30 or fn_size < 5e9: # te1/te2 do not get loaded correctly in diffusers 0.29.0 or model is without te1/te2 + kwargs = { + 'text_encoder': transformers.CLIPTextModelWithProjection.from_pretrained( + repo_id, + subfolder='text_encoder', + cache_dir=cache_dir, + torch_dtype=dtype, + ), + 'text_encoder_2': transformers.CLIPTextModelWithProjection.from_pretrained( + repo_id, + subfolder='text_encoder_2', + cache_dir=cache_dir, + torch_dtype=dtype, + ), + 'tokenizer': transformers.CLIPTokenizer.from_pretrained( + repo_id, + subfolder='tokenizer', + cache_dir=cache_dir, + ), + 'tokenizer_2': transformers.CLIPTokenizer.from_pretrained( + repo_id, + subfolder='tokenizer_2', + cache_dir=cache_dir, + ), + 'text_encoder_3': None, + } + elif fn_size < 1e10: # if model is below 10gb it does not have te4 + kwargs = { + 'text_encoder_3': None, + } + else: + kwargs = {} else: model_id = repo_id loader = diffusers.StableDiffusion3Pipeline.from_pretrained - if te3 == 'fp16': - text_encoder_3 = transformers.T5EncoderModel.from_pretrained( - repo_id, - subfolder='text_encoder_3', - torch_dtype=dtype, - cache_dir=cache_dir, - ) - pipe = loader( - model_id, - torch_dtype=dtype, - text_encoder_3=text_encoder_3, - cache_dir=cache_dir, - config=config, - **kwargs, - ) - elif te3 == 'fp8': - quantization_config = transformers.BitsAndBytesConfig(load_in_8bit=True) - text_encoder_3 = transformers.T5EncoderModel.from_pretrained( - repo_id, - subfolder='text_encoder_3', - quantization_config=quantization_config, - cache_dir=cache_dir, - config=config, - ) - pipe = loader( - model_id, - text_encoder_3=text_encoder_3, - device_map='balanced', - torch_dtype=dtype, - cache_dir=cache_dir, - config=config, - **kwargs, - ) - else: - pipe = loader( - model_id, - torch_dtype=dtype, - text_encoder_3=None, - cache_dir=cache_dir, - config=config, - **kwargs, - ) + pipe = loader( + model_id, + torch_dtype=dtype, + cache_dir=cache_dir, + config=config, + **kwargs, + ) diffusers.pipelines.auto_pipeline.AUTO_TEXT2IMAGE_PIPELINES_MAPPING["stable-diffusion-3"] = diffusers.StableDiffusion3Pipeline diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["stable-diffusion-3"] = diffusers.StableDiffusion3Img2ImgPipeline - devices.torch_gc(force=True) + devices.torch_gc() return pipe @@ -145,32 +120,4 @@ def load_te3(pipe, te3=None, cache_dir=None): subfolder='tokenizer_3', cache_dir=cache_dir, ) - devices.torch_gc(force=True) - - -if __name__ == '__main__': - model_fn = '/mnt/models/stable-diffusion/sd3/sd3_medium_incl_clips.safetensors' - import time - import logging - logging.basicConfig(level=logging.INFO) - log = logging.getLogger('sd') - t0 = time.time() - pipeline = load_sd3(te3='fp16', fn='') - - # pipeline.to('cuda') - t1 = time.time() - log.info(f'Loaded: time={t1-t0:.3f}') - - # pipeline.scheduler = diffusers.schedulers.EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config) - log.info(f'Scheduler, {pipeline.scheduler}') - image = pipeline( - prompt='a photo of a cute robot holding a sign above his head that says sdnext, high detailed', - negative_prompt='', - num_inference_steps=50, - height=1024, - width=1024, - guidance_scale=7.0, - ).images[0] - t2 = time.time() - log.info(f'Generated: time={t2-t1:.3f}') - image.save("/tmp/sd3.png") + devices.torch_gc() From 3f1c236ce4ac98945dc387602dbb0bb25e771fe0 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Thu, 13 Jun 2024 20:13:18 -0400 Subject: [PATCH 02/81] force apply vae config on model load --- CHANGELOG.md | 4 ++++ modules/sd_models.py | 11 +++++++---- modules/sd_vae.py | 23 +++++++++++++++++++++-- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e47aeda32..372bd7317 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Change Log for SD.Next +## Update for 2024-06-14 + +- force apply vae config on model load + ## Update for 2024-06-13 ### Highlights for 2024-06-13 diff --git a/modules/sd_models.py b/modules/sd_models.py index 2fb4a0292..535820686 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -545,7 +545,7 @@ def change_backend(): refresh_vae_list() -def detect_pipeline(f: str, op: str = 'model', warning=True): +def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False): guess = shared.opts.diffusers_pipeline warn = shared.log.warning if warning else lambda *args, **kwargs: None size = 0 @@ -642,7 +642,8 @@ def detect_pipeline(f: str, op: str = 'model', warning=True): guess = 'Stable Diffusion XL Instruct' # get actual pipeline pipeline = shared_items.get_pipelines().get(guess, None) - shared.log.info(f'Autodetect: {op}="{guess}" class={pipeline.__name__} file="{f}" size={size}MB') + if not quiet: + shared.log.info(f'Autodetect: {op}="{guess}" class={pipeline.__name__} file="{f}" size={size}MB') except Exception as e: shared.log.error(f'Error detecting diffusers pipeline: model={f} {e}') return None, None @@ -650,7 +651,8 @@ def detect_pipeline(f: str, op: str = 'model', warning=True): try: size = round(os.path.getsize(f) / 1024 / 1024) pipeline = shared_items.get_pipelines().get(guess, None) - shared.log.info(f'Diffusers: {op}="{guess}" class={pipeline.__name__} file="{f}" size={size}MB') + if not quiet: + shared.log.info(f'Diffusers: {op}="{guess}" class={pipeline.__name__} file="{f}" size={size}MB') except Exception as e: shared.log.error(f'Error loading diffusers pipeline: model={f} {e}') @@ -1157,7 +1159,8 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No timer.record("embeddings") set_diffuser_options(sd_model, vae, op) - + if op == 'model': + sd_vae.apply_vae_config(shared.sd_model.sd_checkpoint_info.filename, vae_file, sd_model) if op == 'refiner' and shared.opts.diffusers_move_refiner: shared.log.debug('Moving refiner model to CPU') move_model(sd_model, devices.cpu) diff --git a/modules/sd_vae.py b/modules/sd_vae.py index 2e27393e2..94a6c6b49 100644 --- a/modules/sd_vae.py +++ b/modules/sd_vae.py @@ -155,8 +155,6 @@ def load_vae(model, vae_file=None, vae_source="unknown-source"): except Exception as e: shared.log.error(f"Loading VAE failed: model={vae_file} source={vae_source} {e}") restore_base_vae(model) - # If vae used is not in dict, update it - # It will be removed on refresh though vae_opt = get_filename(vae_file) if vae_opt not in vae_dict: vae_dict[vae_opt] = vae_file @@ -165,6 +163,26 @@ def load_vae(model, vae_file=None, vae_source="unknown-source"): loaded_vae_file = vae_file +def apply_vae_config(model_file, vae_file, sd_model): + def get_vae_config(): + config_file = os.path.join(paths.sd_configs_path, os.path.splitext(os.path.basename(model_file))[0] + '_vae.json') + if config_file is not None and os.path.exists(config_file): + return shared.readfile(config_file) + config_file = os.path.join(paths.sd_configs_path, os.path.splitext(os.path.basename(vae_file))[0] + '.json') if vae_file else None + if config_file is not None and os.path.exists(config_file): + return shared.readfile(config_file) + config_file = os.path.join(paths.sd_configs_path, shared.sd_model_type, 'vae', 'config.json') + if config_file is not None and os.path.exists(config_file): + return shared.readfile(config_file) + return {} + + if hasattr(sd_model, 'vae') and hasattr(sd_model.vae, 'config'): + config = get_vae_config() + for k, v in config.items(): + if k in sd_model.vae.config and not k.startswith('_'): + sd_model.vae.config[k] = v + + def load_vae_diffusers(model_file, vae_file=None, vae_source="unknown-source"): if vae_file is None: return None @@ -262,6 +280,7 @@ def reload_vae_weights(sd_model=None, vae_file=unspecified): vae = load_vae_diffusers(shared.sd_model.sd_checkpoint_info.filename, vae_file, vae_source) if vae is not None: sd_models.set_diffuser_options(sd_model, vae=vae, op='vae') + apply_vae_config(shared.sd_model.sd_checkpoint_info.filename, vae_file, sd_model) if not shared.cmd_opts.lowvram and not shared.cmd_opts.medvram: sd_models.move_model(sd_model, devices.device) From 945da97d32bdd9348ab5e9e67f9c5a7b6785a945 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Fri, 14 Jun 2024 10:27:49 -0400 Subject: [PATCH 03/81] restructure api examples --- CHANGELOG.md | 3 + cli/{simple-control.py => api-control.py} | 2 +- cli/api-faceid.py | 116 +++++ cli/{simple-img2img.py => api-img2img.py} | 2 +- cli/{simple-info.py => api-info.py} | 2 +- cli/{simple-mask.py => api-mask.py} | 2 +- ...simple-preprocess.py => api-preprocess.py} | 2 +- cli/{idle.py => api-progress.py} | 0 cli/{simple-txt2img.js => api-txt2img.js} | 0 cli/{simple-txt2img.py => api-txt2img.py} | 7 +- cli/{simple-upscale.py => api-upscale.py} | 2 +- cli/{simple-vqa.py => api-vqa.py} | 2 +- cli/latents.py | 170 ------- cli/model-jit.py | 176 ------- cli/torch-compile.py | 99 ---- cli/train.py | 443 ------------------ cli/zluda-python.py | 2 +- modules/control/run.py | 2 + modules/processing_helpers.py | 3 + modules/scripts.py | 22 +- scripts/face-details.py | 6 +- 21 files changed, 162 insertions(+), 901 deletions(-) rename cli/{simple-control.py => api-control.py} (98%) create mode 100755 cli/api-faceid.py rename cli/{simple-img2img.py => api-img2img.py} (98%) rename cli/{simple-info.py => api-info.py} (96%) rename cli/{simple-mask.py => api-mask.py} (97%) rename cli/{simple-preprocess.py => api-preprocess.py} (97%) rename cli/{idle.py => api-progress.py} (100%) rename cli/{simple-txt2img.js => api-txt2img.js} (100%) rename cli/{simple-txt2img.py => api-txt2img.py} (93%) rename cli/{simple-upscale.py => api-upscale.py} (97%) rename cli/{simple-vqa.py => api-vqa.py} (96%) delete mode 100755 cli/latents.py delete mode 100755 cli/model-jit.py delete mode 100755 cli/torch-compile.py delete mode 100755 cli/train.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 372bd7317..93f4724e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,9 @@ ## Update for 2024-06-14 - force apply vae config on model load +- restructure api examples: `cli/api-*` +- fix control second pass resize +- fix api face-hires ## Update for 2024-06-13 diff --git a/cli/simple-control.py b/cli/api-control.py similarity index 98% rename from cli/simple-control.py rename to cli/api-control.py index a0246fdcc..a735bce4f 100755 --- a/cli/simple-control.py +++ b/cli/api-control.py @@ -132,7 +132,7 @@ def get_image(encoded, output): if __name__ == "__main__": - parser = argparse.ArgumentParser(description = 'simple-img2img') + parser = argparse.ArgumentParser(description = 'api-img2img') parser.add_argument('--init', required=False, default=None, help='init image') parser.add_argument('--input', required=False, default=None, help='input image') parser.add_argument('--mask', required=False, help='mask image') diff --git a/cli/api-faceid.py b/cli/api-faceid.py new file mode 100755 index 000000000..dd9645cea --- /dev/null +++ b/cli/api-faceid.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python +import os +import io +import time +import base64 +import logging +import argparse +import requests +import urllib3 +from PIL import Image + +sd_url = os.environ.get('SDAPI_URL', "http://127.0.0.1:7860") +sd_username = os.environ.get('SDAPI_USR', None) +sd_password = os.environ.get('SDAPI_PWD', None) + +logging.basicConfig(level = logging.INFO, format = '%(asctime)s %(levelname)s: %(message)s') +log = logging.getLogger(__name__) +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +options = { + "save_images": False, + "send_images": True, +} + + +def auth(): + if sd_username is not None and sd_password is not None: + return requests.auth.HTTPBasicAuth(sd_username, sd_password) + return None + + +def post(endpoint: str, dct: dict = None): + req = requests.post(f'{sd_url}{endpoint}', json = dct, timeout=300, verify=False, auth=auth()) + if req.status_code != 200: + return { 'error': req.status_code, 'reason': req.reason, 'url': req.url } + else: + return req.json() + + +def encode(f): + image = Image.open(f) + if image.mode == 'RGBA': + image = image.convert('RGB') + with io.BytesIO() as stream: + image.save(stream, 'JPEG') + image.close() + values = stream.getvalue() + encoded = base64.b64encode(values).decode() + return encoded + + +def generate(args): # pylint: disable=redefined-outer-name + t0 = time.time() + if args.model is not None: + post('/sdapi/v1/options', { 'sd_model_checkpoint': args.model }) + post('/sdapi/v1/reload-checkpoint') # needed if running in api-only to trigger new model load + options['prompt'] = args.prompt + options['negative_prompt'] = args.negative + options['steps'] = int(args.steps) + options['seed'] = int(args.seed) + options['sampler_name'] = args.sampler + options['width'] = args.width + options['height'] = args.height + options['face'] = { + 'mode': 'FaceID', + 'ip_model': 'FaceID Base', + 'source_images': [encode(args.face)], + } + data = post('/sdapi/v1/txt2img', options) + t1 = time.time() + if 'images' in data: + for i in range(len(data['images'])): + b64 = data['images'][i].split(',',1)[0] + info = data['info'] + image = Image.open(io.BytesIO(base64.b64decode(b64))) + log.info(f'received image: size={image.size} time={t1-t0:.2f} info="{info}"') + if args.output: + image.save(args.output) + log.info(f'image saved: size={image.size} filename={args.output}') + + else: + log.warning(f'no images received: {data}') + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description = 'api-faceid') + parser.add_argument('--width', required=False, default=512, help='image width') + parser.add_argument('--height', required=False, default=512, help='image height') + parser.add_argument('--face', required=False, help='face image') + parser.add_argument('--prompt', required=False, default='', help='prompt text') + parser.add_argument('--negative', required=False, default='', help='negative prompt text') + parser.add_argument('--steps', required=False, default=20, help='number of steps') + parser.add_argument('--seed', required=False, default=-1, help='initial seed') + parser.add_argument('--sampler', required=False, default='Euler a', help='sampler name') + parser.add_argument('--output', required=False, default=None, help='output image file') + parser.add_argument('--model', required=False, help='model name') + args = parser.parse_args() + log.info(f'img2img: {args}') + generate(args) + +""" +request.face.mode, +request.face.source_images, +request.face.ip_model, +request.face.ip_override_sampler, +request.face.ip_cache_model, +request.face.ip_strength, +request.face.ip_structure, +request.face.id_strength, +request.face.id_conditioning, +request.face.id_cache, +request.face.pm_trigger, +request.face.pm_strength, +request.face.pm_start, +request.face.fs_cache +""" diff --git a/cli/simple-img2img.py b/cli/api-img2img.py similarity index 98% rename from cli/simple-img2img.py rename to cli/api-img2img.py index 7cbfa14b7..3a2961e5b 100755 --- a/cli/simple-img2img.py +++ b/cli/api-img2img.py @@ -83,7 +83,7 @@ def generate(args): # pylint: disable=redefined-outer-name if __name__ == "__main__": - parser = argparse.ArgumentParser(description = 'simple-img2img') + parser = argparse.ArgumentParser(description = 'api-img2img') parser.add_argument('--init', required=True, help='init image') parser.add_argument('--mask', required=False, help='mask image') parser.add_argument('--prompt', required=False, default='', help='prompt text') diff --git a/cli/simple-info.py b/cli/api-info.py similarity index 96% rename from cli/simple-info.py rename to cli/api-info.py index 4d1fd6d75..83e4dfe2e 100755 --- a/cli/simple-info.py +++ b/cli/api-info.py @@ -50,7 +50,7 @@ def info(args): # pylint: disable=redefined-outer-name if __name__ == "__main__": - parser = argparse.ArgumentParser(description = 'simple-info') + parser = argparse.ArgumentParser(description = 'api-info') parser.add_argument('--input', required=True, help='input image') args = parser.parse_args() log.info(f'info: {args}') diff --git a/cli/simple-mask.py b/cli/api-mask.py similarity index 97% rename from cli/simple-mask.py rename to cli/api-mask.py index 2ea12234e..0a1372138 100755 --- a/cli/simple-mask.py +++ b/cli/api-mask.py @@ -73,7 +73,7 @@ def info(args): # pylint: disable=redefined-outer-name if __name__ == "__main__": - parser = argparse.ArgumentParser(description = 'simple-info') + parser = argparse.ArgumentParser(description = 'api-mask') parser.add_argument('--input', required=True, help='input image') parser.add_argument('--mask', required=False, help='input mask') parser.add_argument('--type', required=False, help='output mask type') diff --git a/cli/simple-preprocess.py b/cli/api-preprocess.py similarity index 97% rename from cli/simple-preprocess.py rename to cli/api-preprocess.py index 2b96750bf..084f6a0b4 100755 --- a/cli/simple-preprocess.py +++ b/cli/api-preprocess.py @@ -67,7 +67,7 @@ def info(args): # pylint: disable=redefined-outer-name if __name__ == "__main__": - parser = argparse.ArgumentParser(description = 'simple-info') + parser = argparse.ArgumentParser(description = 'api-preprocess') parser.add_argument('--input', required=True, help='input image') parser.add_argument('--model', required=True, help='preprocessing model') parser.add_argument('--output', required=False, help='output image') diff --git a/cli/idle.py b/cli/api-progress.py similarity index 100% rename from cli/idle.py rename to cli/api-progress.py diff --git a/cli/simple-txt2img.js b/cli/api-txt2img.js similarity index 100% rename from cli/simple-txt2img.js rename to cli/api-txt2img.js diff --git a/cli/simple-txt2img.py b/cli/api-txt2img.py similarity index 93% rename from cli/simple-txt2img.py rename to cli/api-txt2img.py index d3287ee46..a00515fe5 100755 --- a/cli/simple-txt2img.py +++ b/cli/api-txt2img.py @@ -48,7 +48,10 @@ def generate(args): # pylint: disable=redefined-outer-name options['sampler_name'] = args.sampler options['width'] = int(args.width) options['height'] = int(args.height) - options['restore_faces'] = args.faces + if args.faces: + options['restore_faces'] = args.faces + options['denoising_strength'] = 0.5 + options['hr_sampler_name'] = args.sampler data = post('/sdapi/v1/txt2img', options) t1 = time.time() if 'images' in data: @@ -65,7 +68,7 @@ def generate(args): # pylint: disable=redefined-outer-name if __name__ == "__main__": - parser = argparse.ArgumentParser(description = 'simple-txt2img') + parser = argparse.ArgumentParser(description = 'api-txt2img') parser.add_argument('--prompt', required=False, default='', help='prompt text') parser.add_argument('--negative', required=False, default='', help='negative prompt text') parser.add_argument('--width', required=False, default=512, help='image width') diff --git a/cli/simple-upscale.py b/cli/api-upscale.py similarity index 97% rename from cli/simple-upscale.py rename to cli/api-upscale.py index b5a2bb5dd..082e008a8 100755 --- a/cli/simple-upscale.py +++ b/cli/api-upscale.py @@ -80,7 +80,7 @@ def upscale(args): # pylint: disable=redefined-outer-name if __name__ == "__main__": - parser = argparse.ArgumentParser(description = 'simple-upscale') + parser = argparse.ArgumentParser(description = 'api-upscale') parser.add_argument('--input', required=True, help='input image') parser.add_argument('--output', required=True, help='output image') parser.add_argument('--upscaler', required=False, default='Nearest', help='upscaler name') diff --git a/cli/simple-vqa.py b/cli/api-vqa.py similarity index 96% rename from cli/simple-vqa.py rename to cli/api-vqa.py index 0ac181b7c..73de8dbc8 100755 --- a/cli/simple-vqa.py +++ b/cli/api-vqa.py @@ -55,7 +55,7 @@ def info(args): # pylint: disable=redefined-outer-name if __name__ == "__main__": - parser = argparse.ArgumentParser(description = 'simple-info') + parser = argparse.ArgumentParser(description = 'api-vqa') parser.add_argument('--input', required=True, help='input image') parser.add_argument('--model', required=False, help='vqa model') parser.add_argument('--question', required=False, help='question') diff --git a/cli/latents.py b/cli/latents.py deleted file mode 100755 index 717f17352..000000000 --- a/cli/latents.py +++ /dev/null @@ -1,170 +0,0 @@ -#!/usr/bin/env python - -import os -import sys -import json -import pathlib -import argparse -import warnings - -import cv2 -import numpy as np -import torch -from PIL import Image -from torchvision import transforms -from tqdm import tqdm -from util import Map - -from rich.pretty import install as pretty_install -from rich.traceback import install as traceback_install -from rich.console import Console - -console = Console(log_time=True, log_time_format='%H:%M:%S-%f') -pretty_install(console=console) -traceback_install(console=console, extra_lines=1, width=console.width, word_wrap=False, indent_guides=False) - -sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'modules', 'lora')) -import library.model_util as model_util -import library.train_util as train_util - -warnings.filterwarnings('ignore') -device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') -options = Map({ - 'batch': 1, - 'input': '', - 'json': '', - 'max': 1024, - 'min': 256, - 'noupscale': False, - 'precision': 'fp32', - 'resolution': '512,512', - 'steps': 64, - 'vae': 'stabilityai/sd-vae-ft-mse' -}) -vae = None - - -def get_latents(local_vae, images, weight_dtype): - image_transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.5], [0.5]) ]) - img_tensors = [image_transforms(image) for image in images] - img_tensors = torch.stack(img_tensors) - img_tensors = img_tensors.to(device, weight_dtype) - with torch.no_grad(): - latents = local_vae.encode(img_tensors).latent_dist.sample().float().to('cpu').numpy() - return latents, [images[0].shape[0], images[0].shape[1]] - - -def get_npz_filename_wo_ext(data_dir, image_key): - return os.path.join(data_dir, os.path.splitext(os.path.basename(image_key))[0]) - - -def create_vae_latents(local_params): - args = Map({**options, **local_params}) - console.log(f'create vae latents args: {args}') - image_paths = train_util.glob_images(args.input) - if os.path.exists(args.json): - with open(args.json, 'rt', encoding='utf-8') as f: - metadata = json.load(f) - else: - return - if args.precision == 'fp16': - weight_dtype = torch.float16 - elif args.precision == 'bf16': - weight_dtype = torch.bfloat16 - else: - weight_dtype = torch.float32 - global vae # pylint: disable=global-statement - if vae is None: - vae = model_util.load_vae(args.vae, weight_dtype) - vae.eval() - vae.to(device, dtype=weight_dtype) - max_reso = tuple([int(t) for t in args.resolution.split(',')]) - assert len(max_reso) == 2, f'illegal resolution: {args.resolution}' - bucket_manager = train_util.BucketManager(args.noupscale, max_reso, args.min, args.max, args.steps) - if not args.noupscale: - bucket_manager.make_buckets() - img_ar_errors = [] - def process_batch(is_last): - for bucket in bucket_manager.buckets: - if (is_last and len(bucket) > 0) or len(bucket) >= args.batch: - latents, original_size = get_latents(vae, [img for _, img in bucket], weight_dtype) - assert latents.shape[2] == bucket[0][1].shape[0] // 8 and latents.shape[3] == bucket[0][1].shape[1] // 8, f'latent shape {latents.shape}, {bucket[0][1].shape}' - for (image_key, _), latent in zip(bucket, latents): - npz_file_name = get_npz_filename_wo_ext(args.input, image_key) - # np.savez(npz_file_name, latent) - kwargs = {} - np.savez( - npz_file_name, - latents=latent, - original_size=np.array(original_size), - crop_ltrb=np.array([0, 0]), - **kwargs, - ) - bucket.clear() - data = [[(None, ip)] for ip in image_paths] - bucket_counts = {} - for data_entry in tqdm(data, smoothing=0.0): - if data_entry[0] is None: - continue - img_tensor, image_path = data_entry[0] - if img_tensor is not None: - image = transforms.functional.to_pil_image(img_tensor) - else: - image = Image.open(image_path) - image_key = os.path.basename(image_path) - image_key = os.path.join(os.path.basename(pathlib.Path(image_path).parent), pathlib.Path(image_path).stem) - if image_key not in metadata: - metadata[image_key] = {} - reso, resized_size, ar_error = bucket_manager.select_bucket(image.width, image.height) - img_ar_errors.append(abs(ar_error)) - bucket_counts[reso] = bucket_counts.get(reso, 0) + 1 - metadata[image_key]['train_resolution'] = (reso[0] - reso[0] % 8, reso[1] - reso[1] % 8) - if not args.noupscale: - assert resized_size[0] == reso[0] or resized_size[1] == reso[1], f'internal error, resized size not match: {reso}, {resized_size}, {image.width}, {image.height}' - assert resized_size[0] >= reso[0] and resized_size[1] >= reso[1], f'internal error, resized size too small: {reso}, {resized_size}, {image.width}, {image.height}' - assert resized_size[0] >= reso[0] and resized_size[1] >= reso[1], f'internal error resized size is small: {resized_size}, {reso}' - image = np.array(image) - if resized_size[0] != image.shape[1] or resized_size[1] != image.shape[0]: - image = cv2.resize(image, resized_size, interpolation=cv2.INTER_AREA) - if resized_size[0] > reso[0]: - trim_size = resized_size[0] - reso[0] - image = image[:, trim_size//2:trim_size//2 + reso[0]] - if resized_size[1] > reso[1]: - trim_size = resized_size[1] - reso[1] - image = image[trim_size//2:trim_size//2 + reso[1]] - assert image.shape[0] == reso[1] and image.shape[1] == reso[0], f'internal error, illegal trimmed size: {image.shape}, {reso}' - bucket_manager.add_image(reso, (image_key, image)) - process_batch(False) - - process_batch(True) - vae.to('cpu') - - bucket_manager.sort() - img_ar_errors = np.array(img_ar_errors) - for i, reso in enumerate(bucket_manager.resos): - count = bucket_counts.get(reso, 0) - if count > 0: - console.log(f'vae latents bucket: {i+1}/{len(bucket_manager.resos)} resolution: {reso} images: {count} mean-ar-error: {np.mean(img_ar_errors)}') - with open(args.json, 'wt', encoding='utf-8') as f: - json.dump(metadata, f, indent=2) - - -def unload_vae(): - global vae # pylint: disable=global-statement - vae = None - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('input', type=str, help='directory for train images') - parser.add_argument('--json', type=str, required=True, help='metadata file to input') - parser.add_argument('--vae', type=str, required=True, help='model name or path to encode latents') - parser.add_argument('--batch', type=int, default=1, help='batch size in inference') - parser.add_argument('--resolution', type=str, default='512,512', help='max resolution in fine tuning (width,height)') - parser.add_argument('--min', type=int, default=256, help='minimum resolution for buckets') - parser.add_argument('--max', type=int, default=1024, help='maximum resolution for buckets') - parser.add_argument('--steps', type=int, default=64, help='steps of resolution for buckets, divisible by 8') - parser.add_argument('--noupscale', action='store_true', help='make bucket for each image without upscaling') - parser.add_argument('--precision', type=str, default='fp32', choices=['fp32', 'fp16', 'bf16'], help='use precision') - params = parser.parse_args() - create_vae_latents(vars(params)) diff --git a/cli/model-jit.py b/cli/model-jit.py deleted file mode 100755 index e4af79e95..000000000 --- a/cli/model-jit.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python -import os -import time -import functools -import argparse -import logging -import warnings -from dataclasses import dataclass - -logging.getLogger("DeepSpeed").disabled = True -warnings.filterwarnings(action="ignore", category=FutureWarning) -warnings.filterwarnings(action="ignore", category=DeprecationWarning) - -import torch -import diffusers - -n_warmup = 5 -n_traces = 10 -n_runs = 100 -args = {} -pipe = None -log = logging.getLogger("sd") - - -def setup_logging(): - from rich.theme import Theme - from rich.logging import RichHandler - from rich.console import Console - from rich.traceback import install - log.setLevel(logging.DEBUG) - console = Console(log_time=True, log_time_format='%H:%M:%S-%f', theme=Theme({ "traceback.border": "black", "traceback.border.syntax_error": "black", "inspect.value.border": "black" })) - logging.basicConfig(level=logging.ERROR, format='%(asctime)s | %(name)s | %(levelname)s | %(module)s | %(message)s', handlers=[logging.NullHandler()]) # redirect default logger to null - rh = RichHandler(show_time=True, omit_repeated_times=False, show_level=True, show_path=False, markup=False, rich_tracebacks=True, log_time_format='%H:%M:%S-%f', level=logging.DEBUG, console=console) - rh.setLevel(logging.DEBUG) - log.addHandler(rh) - logging.getLogger("diffusers").setLevel(logging.ERROR) - logging.getLogger("torch").setLevel(logging.ERROR) - warnings.filterwarnings(action="ignore", category=torch.jit.TracerWarning) - install(console=console, extra_lines=1, max_frames=10, width=console.width, word_wrap=False, indent_guides=False, suppress=[]) - - -def generate_inputs(): - if args.type == 'sd15': - sample = torch.randn(2, 4, 64, 64).half().cuda() - timestep = torch.rand(1).half().cuda() * 999 - encoder_hidden_states = torch.randn(2, 77, 768).half().cuda() - return sample, timestep, encoder_hidden_states - if args.type == 'sdxl': - sample = torch.randn(2, 4, 64, 64).half().cuda() - timestep = torch.rand(1).half().cuda() * 999 - encoder_hidden_states = torch.randn(2, 77, 768).half().cuda() - text_embeds = torch.randn(1, 77, 2048).half().cuda() - return sample, timestep, encoder_hidden_states, text_embeds - - -def load_model(): - log.info(f'versions: torch={torch.__version__} diffusers={diffusers.__version__}') - diffusers_load_config = { - "low_cpu_mem_usage": True, - "torch_dtype": torch.float16, - "safety_checker": None, - "requires_safety_checker": False, - "load_connected_pipeline": True, - "use_safetensors": True, - } - pipeline = diffusers.StableDiffusionPipeline if args.type == 'sd15' else diffusers.StableDiffusionXLPipeline - global pipe # pylint: disable=global-statement - t0 = time.time() - pipe = pipeline.from_single_file(args.model, **diffusers_load_config).to('cuda') - size = os.path.getsize(args.model) - log.info(f'load: model={args.model} type={args.type} time={time.time() - t0:.3f}s size={size / 1024 / 1024:.3f}mb') - - -def load_trace(fn: str): - - @dataclass - class UNet2DConditionOutput: - sample: torch.FloatTensor - - class TracedUNet(torch.nn.Module): - def __init__(self): - super().__init__() - self.in_channels = pipe.unet.in_channels - self.device = pipe.unet.device - - def forward(self, latent_model_input, t, encoder_hidden_states): - sample = unet_traced(latent_model_input, t, encoder_hidden_states)[0] - return UNet2DConditionOutput(sample=sample) - - t0 = time.time() - unet_traced = torch.jit.load(fn) - pipe.unet = TracedUNet() - size = os.path.getsize(fn) - log.info(f'load: optimized={fn} time={time.time() - t0:.3f}s size={size / 1024 / 1024:.3f}mb') - - -def trace_model(): - log.info(f'tracing model: {args.model}') - torch.set_grad_enabled(False) - unet = pipe.unet - unet.eval() - # unet.to(memory_format=torch.channels_last) # use channels_last memory format - unet.forward = functools.partial(unet.forward, return_dict=False) # set return_dict=False as default - - # warmup - t0 = time.time() - for _ in range(n_warmup): - with torch.inference_mode(): - inputs = generate_inputs() - _output = unet(*inputs) - log.info(f'warmup: time={time.time() - t0:.3f}s passes={n_warmup}') - - # trace - t0 = time.time() - unet_traced = torch.jit.trace(unet, inputs, check_trace=True) - unet_traced.eval() - log.info(f'trace: time={time.time() - t0:.3f}s') - - # optimize graph - t0 = time.time() - for _ in range(n_traces): - with torch.inference_mode(): - inputs = generate_inputs() - _output = unet_traced(*inputs) - log.info(f'optimize: time={time.time() - t0:.3f}s passes={n_traces}') - - # save the model - if args.save: - t0 = time.time() - basename, _ext = os.path.splitext(args.model) - fn = f"{basename}.pt" - unet_traced.save(fn) - size = os.path.getsize(fn) - log.info(f'save: optimized={fn} time={time.time() - t0:.3f}s size={size / 1024 / 1024:.3f}mb') - return fn - - pipe.unet = unet_traced - return None - - -def benchmark_model(msg: str): - with torch.inference_mode(): - inputs = generate_inputs() - torch.cuda.synchronize() - for n in range(n_runs): - if n > n_runs / 10: - t0 = time.time() - _output = pipe.unet(*inputs) - torch.cuda.synchronize() - t1 = time.time() - log.info(f"benchmark unet: {t1 - t0:.3f}s passes={n_runs} type={msg}") - return t1 - t0 - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description = 'SD.Next') - parser.add_argument('--model', type=str, default='', required=True, help='model path') - parser.add_argument('--type', type=str, default='sd15', choices=['sd15', 'sdxl'], required=False, help='model type, default: %(default)s') - parser.add_argument('--benchmark', default = False, action='store_true', help = "run benchmarks, default: %(default)s") - parser.add_argument('--trace', default = True, action='store_true', help = "run jit tracing, default: %(default)s") - parser.add_argument('--save', default = False, action='store_true', help = "save optimized unet, default: %(default)s") - args = parser.parse_args() - setup_logging() - log.info('sdnext model jit tracing') - if not os.path.isfile(args.model): - log.error(f"invalid model path: {args.model}") - exit(1) - load_model() - if args.benchmark: - time0 = benchmark_model('original') - unet_saved = trace_model() - if unet_saved is not None: - load_trace(unet_saved) - if args.benchmark: - time1 = benchmark_model('traced') - log.info(f'benchmark speedup: {100 * (time0 - time1) / time0:.3f}%') diff --git a/cli/torch-compile.py b/cli/torch-compile.py deleted file mode 100755 index 891f27dc5..000000000 --- a/cli/torch-compile.py +++ /dev/null @@ -1,99 +0,0 @@ -#!/usr/bin/env python -# pylint: disable=cell-var-from-loop -""" -Test Torch Dynamo functionality and backends -""" -import json -import warnings - -import numpy as np -import torch -from torchvision.models import resnet18 - - -print('torch:', torch.__version__) -try: - # must be imported explicitly or namespace is not found - import torch._dynamo as dynamo # pylint: disable=ungrouped-imports -except Exception as err: - print('torch without dynamo support', err) - - -N_ITERS = 20 -torch._dynamo.config.verbose=True # pylint: disable=protected-access -warnings.filterwarnings('ignore', category=UserWarning) # disable those for now as many backends reports tons -# torch.set_float32_matmul_precision('high') # enable to test in fp32 - - -def timed(fn): # returns the result of running `fn()` and the time it took for `fn()` to run in ms using CUDA events - start = torch.cuda.Event(enable_timing=True) - end = torch.cuda.Event(enable_timing=True) - start.record() - result = fn() - end.record() - torch.cuda.synchronize() - return result, start.elapsed_time(end) - - -def generate_data(b): - return ( - torch.randn(b, 3, 128, 128).to(torch.float32).cuda(), - torch.randint(1000, (b,)).cuda(), - ) - - -def init_model(): - return resnet18().to(torch.float32).cuda() - - -def evaluate(mod, val): - return mod(val) - - -if __name__ == '__main__': - # first pass, dynamo is going to be slower as it compiles - model = init_model() - inp = generate_data(16)[0] - - # repeat test - results = {} - times = [] - print('eager initial eval:', timed(lambda: evaluate(model, inp))[1]) - for _i in range(N_ITERS): - inp = generate_data(16)[0] - _res, time = timed(lambda: evaluate(model, inp)) # noqa: B023 - times.append(time) - results['default'] = np.median(times) - - print('dynamo available backends:', dynamo.list_backends()) - for backend in dynamo.list_backends(): - try: - # required before changing backends - torch._dynamo.reset() # pylint: disable=protected-access - eval_dyn = dynamo.optimize(backend)(evaluate) - print('dynamo initial eval:', backend, timed(lambda: eval_dyn(model, inp))[1]) # noqa: B023 - times = [] - for _i in range(N_ITERS): - inp = generate_data(16)[0] - _res, time = timed(lambda: eval_dyn(model, inp)) # noqa: B023 - times.append(time) - results[backend] = np.median(times) - except Exception as err: - lines = str(err).split('\n') - print('dyanmo backend failed:', backend, lines[0]) # print just first error line as backtraces can be quite long - results[backend] = 'error' - - # print stats - print(json.dumps(results, indent = 4)) - -""" -Reference: -Training & Inference backends: - dynamo.optimize("inductor") - Uses TorchInductor backend with AotAutograd and cudagraphs by leveraging codegened Triton kernels - dynamo.optimize("aot_nvfuser") - nvFuser with AotAutograd - dynamo.optimize("aot_cudagraphs") - cudagraphs with AotAutograd -Inference-only backends: - dynamo.optimize("ofi") - Uses Torchscript optimize_for_inference - dynamo.optimize("fx2trt") - Uses Nvidia TensorRT for inference optimizations - dynamo.optimize("onnxrt") - Uses ONNXRT for inference on CPU/GPU -""" diff --git a/cli/train.py b/cli/train.py deleted file mode 100755 index 9e551ddf5..000000000 --- a/cli/train.py +++ /dev/null @@ -1,443 +0,0 @@ -#!/usr/bin/env python - -""" -Examples: -- sd15: train.py --type lora --tag girl --comments sdnext --input ~/generative/Input/mia --process original,interrogate,resize --name mia -- sdxl: train.py --type lora --tag girl --comments sdnext --input ~/generative/Input/mia --process original,interrogate,resize --precision fp32 --optimizer Adafactor --sdxl --name miaxl -- offline: train.py --type lora --tag girl --comments sdnext --input ~/generative/Input/mia --model /home/vlado/dev/sdnext/models/Stable-diffusion/sdxl/miaanimeSFWNSFWSDXL_v40.safetensors --dir /home/vlado/dev/sdnext/models/Lora/ --precision fp32 --optimizer Adafactor --sdxl --name miaxl -""" - -# system imports -import os -import re -import gc -import sys -import json -import shutil -import pathlib -import asyncio -import logging -import tempfile -import argparse - -# local imports -import util -import sdapi -import options - - -# globals -args = None -log = logging.getLogger('train') -valid_steps = ['original', 'face', 'body', 'blur', 'range', 'upscale', 'restore', 'interrogate', 'resize', 'square', 'segment'] -log_file = os.path.join(os.path.dirname(__file__), 'train.log') -server_ok = False - -# methods - -def setup_logging(): - from rich.theme import Theme - from rich.logging import RichHandler - from rich.console import Console - from rich.pretty import install as pretty_install - from rich.traceback import install as traceback_install - console = Console(log_time=True, log_time_format='%H:%M:%S-%f', theme=Theme({ - "traceback.border": "black", - "traceback.border.syntax_error": "black", - "inspect.value.border": "black", - })) - # logging.getLogger("urllib3").setLevel(logging.ERROR) - # logging.getLogger("httpx").setLevel(logging.ERROR) - level = logging.DEBUG if args.debug else logging.INFO - logging.basicConfig(level=logging.ERROR, format='%(asctime)s | %(name)s | %(levelname)s | %(module)s | %(message)s', filename=log_file, filemode='a', encoding='utf-8', force=True) - log.setLevel(logging.DEBUG) # log to file is always at level debug for facility `sd` - pretty_install(console=console) - traceback_install(console=console, extra_lines=1, width=console.width, word_wrap=False, indent_guides=False, suppress=[]) - rh = RichHandler(show_time=True, omit_repeated_times=False, show_level=True, show_path=False, markup=False, rich_tracebacks=True, log_time_format='%H:%M:%S-%f', level=level, console=console) - rh.set_name(level) - while log.hasHandlers() and len(log.handlers) > 0: - log.removeHandler(log.handlers[0]) - log.addHandler(rh) - - -def mem_stats(): - gc.collect() - import torch - if torch.cuda.is_available(): - with torch.no_grad(): - torch.cuda.empty_cache() - with torch.cuda.device('cuda'): - torch.cuda.empty_cache() - torch.cuda.ipc_collect() - mem = util.get_memory() - peak = { 'active': mem['gpu-active']['peak'], 'allocated': mem['gpu-allocated']['peak'], 'reserved': mem['gpu-reserved']['peak'] } - log.debug(f"memory cpu: {mem.ram} gpu current: {mem.gpu} gpu peak: {peak}") - - -def parse_args(): - global args # pylint: disable=global-statement - parser = argparse.ArgumentParser(description = 'SD.Next Train') - - group_server = parser.add_argument_group('Server') - group_server.add_argument('--server', type=str, default='http://127.0.0.1:7860', required=False, help='server url, default: %(default)s') - group_server.add_argument('--user', type=str, default=None, required=False, help='server url, default: %(default)s') - group_server.add_argument('--password', type=str, default=None, required=False, help='server url, default: %(default)s') - group_server.add_argument('--dir', type=str, default=None, required=False, help='folder with trained networks, default: use server setting') - - group_main = parser.add_argument_group('Main') - group_main.add_argument('--type', type=str, choices=['embedding', 'ti', 'lora', 'lyco', 'dreambooth', 'hypernetwork'], default=None, required=True, help='training type') - group_main.add_argument('--model', type=str, default='', required=False, help='base model to use for training, default: current loaded model') - group_main.add_argument('--name', type=str, default=None, required=True, help='output filename') - group_main.add_argument('--tag', type=str, default='person', required=False, help='primary tags, default: %(default)s') - group_main.add_argument('--comments', type=str, default='', required=False, help='comments to be added to trained model metadata, default: %(default)s') - - group_data = parser.add_argument_group('Dataset') - group_data.add_argument('--input', type=str, default=None, required=True, help='input folder with training images') - group_data.add_argument('--interim', type=str, default='', required=False, help='where to store processed images, default is system temp/train') - group_data.add_argument('--process', type=str, default='original,interrogate,resize,square', required=False, help=f'list of possible processing steps: {valid_steps}, default: %(default)s') - - group_train = parser.add_argument_group('Train') - group_train.add_argument('--gradient', type=int, default=1, required=False, help='gradient accumulation steps, default: %(default)s') - group_train.add_argument('--steps', type=int, default=2500, required=False, help='training steps, default: %(default)s') - group_train.add_argument('--batch', type=int, default=1, required=False, help='batch size, default: %(default)s') - group_train.add_argument('--lr', type=float, default=1e-04, required=False, help='model learning rate, default: %(default)s') - group_train.add_argument('--dim', type=int, default=32, required=False, help='network dimension or number of vectors, default: %(default)s') - - # lora params - group_train.add_argument('--repeats', type=int, default=1, required=False, help='number of repeats per image, default: %(default)s') - group_train.add_argument('--alpha', type=float, default=0, required=False, help='lora/lyco alpha for weights scaling, default: dim/2') - group_train.add_argument('--algo', type=str, default=None, choices=['locon', 'loha', 'lokr', 'ia3'], required=False, help='alternative lyco algoritm, default: %(default)s') - group_train.add_argument('--args', type=str, default=None, required=False, help='lora/lyco additional network arguments, default: %(default)s') - group_train.add_argument('--optimizer', type=str, default='AdamW', required=False, help='optimizer type, default: %(default)s') - group_train.add_argument('--precision', type=str, choices=['fp16', 'fp32'], default='fp16', required=False, help='training precision, default: %(default)s') - group_train.add_argument('--sdxl', default = False, action='store_true', help = "run sdxl training, default: %(default)s") - # AdamW (default), AdamW8bit, PagedAdamW8bit, Lion8bit, PagedLion8bit, Lion, SGDNesterov, SGDNesterov8bit, DAdaptation(DAdaptAdamPreprint), DAdaptAdaGrad, DAdaptAdam, DAdaptAdan, DAdaptAdanIP, DAdaptLion, DAdaptSGD, AdaFactor - - group_other = parser.add_argument_group('Other') - group_other.add_argument('--overwrite', default = False, action='store_true', help = "overwrite existing training, default: %(default)s") - group_other.add_argument('--experimental', default = False, action='store_true', help = "enable experimental options, default: %(default)s") - group_other.add_argument('--debug', default = False, action='store_true', help = "enable debug level logging, default: %(default)s") - - args = parser.parse_args() - - -def prepare_server(): - global server_ok # pylint: disable=global-statement - try: - server_status = util.Map(sdapi.progresssync()) - server_state = server_status['state'] - server_ok = True - except Exception: - log.warning(f'sdnext server error: {server_status}') - server_ok = False - if server_ok and server_state['job_count'] > 0: - log.error(f'sdnext server not idle: {server_state}') - exit(1) - if server_ok: - server_options = util.Map(sdapi.options()) - server_options.options.save_training_settings_to_txt = False - server_options.options.training_enable_tensorboard = False - server_options.options.training_tensorboard_save_images = False - server_options.options.pin_memory = True - server_options.options.save_optimizer_state = False - server_options.options.training_image_repeats_per_epoch = args.repeats - server_options.options.training_write_csv_every = 0 - sdapi.postsync('/sdapi/v1/options', server_options.options) - log.info('updated server options') - - -def verify_args(): - server_options = util.Map(sdapi.options()) - if args.model != '': - if not os.path.isfile(args.model): - log.error(f'cannot find loaded model: {args.model}') - exit(1) - if server_ok: - server_options.options.sd_model_checkpoint = args.model - sdapi.postsync('/sdapi/v1/options', server_options.options) - elif server_ok: - args.model = server_options.options.sd_model_checkpoint.split(' [')[0] - if args.sdxl and (server_options.sd_backend != 'diffusers' or server_options.diffusers_pipeline != 'Stable Diffusion XL'): - log.warning('server checkpoint is not sdxl') - else: - log.error('no model specified') - exit(1) - base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - if args.type == 'lora' and not server_ok and not args.dir: - log.error('offline lora training requires --dir ') - exit(1) - if args.type == 'lora': - import transformers - if transformers.__version__ != '4.30.2': - log.error(f'lora training requires specific transformers version: current {transformers.__version__} required transformers==4.30.2') - exit(1) - args.lora_dir = server_options.options.lora_dir or args.dir - if not os.path.isabs(args.lora_dir): - args.lora_dir = os.path.join(base_dir, args.lora_dir) - args.lyco_dir = server_options.options.lyco_dir or args.dir - if not os.path.isabs(args.lyco_dir): - args.lyco_dir = os.path.join(base_dir, args.lyco_dir) - args.embeddings_dir = server_options.options.embeddings_dir or args.dir - if not os.path.isfile(args.model): - args.ckpt_dir = server_options.options.ckpt_dir - if not os.path.isabs(args.ckpt_dir): - args.ckpt_dir = os.path.join(base_dir, args.ckpt_dir) - attempt = os.path.abspath(os.path.join(args.ckpt_dir, args.model)) - args.model = attempt if os.path.isfile(attempt) else args.model - if not os.path.isfile(args.model): - attempt = os.path.abspath(os.path.join(args.ckpt_dir, args.model + '.safetensors')) - args.model = attempt if os.path.isfile(attempt) else args.model - if not os.path.isfile(args.model): - log.error(f'cannot find loaded model: {args.model}') - exit(1) - if not os.path.exists(args.input) or not os.path.isdir(args.input): - log.error(f'cannot find training folder: {args.input}') - exit(1) - if not os.path.exists(args.lora_dir) or not os.path.isdir(args.lora_dir): - log.error(f'cannot find lora folder: {args.lora_dir}') - exit(1) - if not os.path.exists(args.lyco_dir) or not os.path.isdir(args.lyco_dir): - log.error(f'cannot find lyco folder: {args.lyco_dir}') - exit(1) - if args.interim != '': - args.process_dir = args.interim - else: - args.process_dir = os.path.join(tempfile.gettempdir(), 'train', args.name) - log.debug(f'args: {vars(args)}') - log.debug(f'server flags: {server_options.flags}') - log.debug(f'server options: {server_options.options}') - - -async def training_loop(): - async def async_train(): - res = await sdapi.post('/sdapi/v1/train/embedding', options.embedding) - log.info(f'train embedding result: {res}') - - async def async_monitor(): - from tqdm.rich import tqdm - await asyncio.sleep(3) - res = util.Map(sdapi.progress()) - with tqdm(desc='train embedding', total=res.state.job_count) as pbar: - while res.state.job_no < res.state.job_count and not res.state.interrupted and not res.state.skipped: - await asyncio.sleep(2) - prev_job = res.state.job_no - res = util.Map(sdapi.progress()) - loss = re.search(r"Loss: (.*?)(?=\<)", res.textinfo) - if loss: - pbar.set_postfix({ 'loss': loss.group(0) }) - pbar.update(res.state.job_no - prev_job) - - a = asyncio.create_task(async_train()) - b = asyncio.create_task(async_monitor()) - await asyncio.gather(a, b) # wait for both pipeline and monitor to finish - - -def train_embedding(): - log.info(f'{args.type} options: {options.embedding}') - create_options = util.Map({ - "name": args.name, - "num_vectors_per_token": args.dim, - "overwrite_old": False, - "init_text": args.tag, - }) - fn = os.path.join(args.embeddings_dir, args.name) + '.pt' - if os.path.exists(fn) and args.overwrite: - log.warning(f'delete existing embedding {fn}') - os.remove(fn) - else: - log.error(f'embedding exists {fn}') - return - log.info(f'create embedding {create_options}') - res = sdapi.postsync('/sdapi/v1/create/embedding', create_options) - if 'info' in res and 'error' in res['info']: # formatted error - log.error(res.info) - elif 'info' in res: # no error - asyncio.run(training_loop()) - else: # unknown error - log.error(f'create embedding error {res}') - - -def train_lora(): - fn = os.path.join(options.lora.output_dir, args.name) - for ext in ['.ckpt', '.pt', '.safetensors']: - if os.path.exists(fn + ext): - if args.overwrite: - log.warning(f'delete existing lora: {fn + ext}') - os.remove(fn + ext) - else: - log.error(f'lora exists: {fn + ext}') - return - log.info(f'{args.type} options: {options.lora}') - # lora imports - lora_path = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, 'modules', 'lora')) - lycoris_path = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, 'modules', 'lycoris')) - sys.path.append(lora_path) - if args.type == 'lyco': - sys.path.append(lycoris_path) - log.debug('importing lora lib') - if not args.sdxl: - import train_network - trainer = train_network.NetworkTrainer() - trainer.train(options.lora) - else: - import sdxl_train_network - trainer = sdxl_train_network.SdxlNetworkTrainer() - trainer.train(options.lora) - if args.type == 'lyco': - log.debug('importing lycoris lib') - import importlib - _network_module = importlib.import_module(options.lora.network_module) - - -def prepare_options(): - if args.type == 'embedding': - log.info('train embedding') - options.lora.in_json = None - if args.type == 'dreambooth': - log.info('train using dreambooth style training') - options.lora.vae_batch_size = args.batch - options.lora.in_json = None - if args.type == 'lora': - log.info('train using lora style training') - options.lora.output_dir = args.lora_dir - options.lora.in_json = os.path.join(args.process_dir, args.name + '.json') - if args.type == 'lyco': - log.info('train using lycoris network') - options.lora.output_dir = args.lora_dir - options.lora.network_module = 'lycoris.kohya' - options.lora.in_json = os.path.join(args.process_dir, args.name + '.json') - # lora specific - options.lora.save_model_as = 'safetensors' - options.lora.pretrained_model_name_or_path = args.model - options.lora.output_name = args.name - options.lora.max_train_steps = args.steps - options.lora.network_dim = args.dim - options.lora.network_alpha = args.dim // 2 if args.alpha == 0 else args.alpha - options.lora.network_args = [] - options.lora.training_comment = args.comments - options.lora.sdpa = True - options.lora.optimizer_type = args.optimizer - if args.algo is not None: - options.lora.network_args.append(f'algo={args.algo}') - if args.args is not None: - for net_arg in args.args: - options.lora.network_args.append(net_arg) - options.lora.gradient_accumulation_steps = args.gradient - options.lora.learning_rate = args.lr - options.lora.train_batch_size = args.batch - options.lora.train_data_dir = args.process_dir - options.lora.no_half_vae = args.precision == 'fp16' - # embedding specific - options.embedding.embedding_name = args.name - options.embedding.learn_rate = str(args.lr) - options.embedding.batch_size = args.batch - options.embedding.steps = args.steps - options.embedding.data_root = args.process_dir - options.embedding.log_directory = os.path.join(args.process_dir, 'log') - options.embedding.gradient_step = args.gradient - - -def process_inputs(): - import process - import filetype - pathlib.Path(args.process_dir).mkdir(parents=True, exist_ok=True) - processing_options = args.process.split(',') if isinstance(args.process, str) else args.process - processing_options = [opt.strip() for opt in re.split(',| ', args.process)] - log.info(f'processing steps: {processing_options}') - for step in processing_options: - if step not in valid_steps: - log.error(f'invalid processing step: {[step]}') - exit(1) - for root, _sub_dirs, folder in os.walk(args.input): - files = [os.path.join(root, f) for f in folder if filetype.is_image(os.path.join(root, f))] - log.info(f'processing input images: {len(files)}') - if os.path.exists(args.process_dir): - if args.overwrite: - log.warning(f'removing existing processed folder: {args.process_dir}') - shutil.rmtree(args.process_dir, ignore_errors=True) - else: - log.info(f'processed folder exists: {args.process_dir}') - steps = [step for step in processing_options if step in ['face', 'body', 'original']] - process.reset() - options.process.target_size = 1024 if args.sdxl else 512 - metadata = {} - for step in steps: - if step == 'face': - opts = [step for step in processing_options if step not in ['body', 'original']] - if step == 'body': - opts = [step for step in processing_options if step not in ['face', 'original', 'upscale', 'restore']] # body does not perform upscale or restore - if step == 'original': - opts = [step for step in processing_options if step not in ['face', 'body', 'upscale', 'restore', 'blur', 'range', 'segment']] # original does not perform most steps - log.info(f'processing current step: {opts}') - tag = step - if tag == 'original' and args.tag is not None: - concept = args.tag.split(',')[0].strip() - else: - concept = step - if args.type in ['lora', 'lyco', 'dreambooth']: - folder = os.path.join(args.process_dir, str(args.repeats) + '_' + concept) # separate concepts per folder - if args.type in ['embedding']: - folder = os.path.join(args.process_dir) # everything into same folder - log.info(f'processing concept: {concept}') - log.info(f'processing output folder: {folder}') - pathlib.Path(folder).mkdir(parents=True, exist_ok=True) - results = {} - if server_ok: - for f in files: - res = process.file(filename = f, folder = folder, tag = args.tag, requested = opts) - if res.image: # valid result - results[res.type] = results.get(res.type, 0) + 1 - results['total'] = results.get('total', 0) + 1 - rel_path = res.basename.replace(os.path.commonpath([res.basename, args.process_dir]), '') - if rel_path.startswith(os.path.sep): - rel_path = rel_path[1:] - metadata[rel_path] = { 'caption': res.caption, 'tags': ','.join(res.tags) } - if options.lora.in_json is None: - with open(res.output.replace(options.process.format, '.txt'), "w", encoding='utf-8') as outfile: - outfile.write(res.caption) - log.info(f"processing {'saved' if res.image is not None else 'skipped'}: {f} => {res.output} {res.ops} {res.message}") - else: - log.info('processing skipped: offline') - folders = [os.path.join(args.process_dir, folder) for folder in os.listdir(args.process_dir) if os.path.isdir(os.path.join(args.process_dir, folder))] - log.info(f'input datasets {folders}') - if options.lora.in_json is not None: - with open(options.lora.in_json, "w", encoding='utf-8') as outfile: # write json at the end only - outfile.write(json.dumps(metadata, indent=2)) - for folder in folders: # create latents - import latents - latents.create_vae_latents(util.Map({ 'input': folder, 'json': options.lora.in_json })) - latents.unload_vae() - r = { 'inputs': len(files), 'outputs': results, 'metadata': options.lora.in_json } - log.info(f'processing steps result: {r}') - if args.gradient < 0: - log.info(f"setting gradient accumulation to number of images: {results['total']}") - options.lora.gradient_accumulation_steps = results['total'] - options.embedding.gradient_step = results['total'] - process.unload() - - -if __name__ == '__main__': - parse_args() - setup_logging() - log.info('SD.Next Train') - sdapi.sd_url = args.server - if args.user is not None: - sdapi.sd_username = args.user - if args.password is not None: - sdapi.sd_password = args.password - prepare_server() - verify_args() - prepare_options() - mem_stats() - process_inputs() - mem_stats() - try: - if args.type == 'embedding': - train_embedding() - if args.type == 'lora' or args.type == 'lyco' or args.type == 'dreambooth': - train_lora() - except KeyboardInterrupt: - log.error('interrupt requested') - sdapi.interrupt() - mem_stats() - log.info('done') diff --git a/cli/zluda-python.py b/cli/zluda-python.py index 31ee96362..e0399d096 100644 --- a/cli/zluda-python.py +++ b/cli/zluda-python.py @@ -13,7 +13,7 @@ def __init__(self, env_globals, env_locals): def execute(self, s: str): try: - exec(s, self.env_globals, self.env_locals) + exec(s, self.env_globals, self.env_locals) # pylint: disable=exec-used except Exception as e: print(f'{e.__class__.__name__}: {e}') diff --git a/modules/control/run.py b/modules/control/run.py index 5a41b87e4..7b27aed4e 100644 --- a/modules/control/run.py +++ b/modules/control/run.py @@ -193,6 +193,8 @@ def control_run(units: List[unit.Unit] = [], inputs: List[Image.Image] = [], ini p.refiner_negative = refiner_negative if p.enable_hr and (p.hr_resize_x == 0 or p.hr_resize_y == 0): p.hr_upscale_to_x, p.hr_upscale_to_y = 8 * int(p.width * p.hr_scale / 8), 8 * int(p.height * p.hr_scale / 8) + elif p.enable_hr and (p.hr_upscale_to_x == 0 or p.hr_upscale_to_y == 0): + p.hr_upscale_to_x, p.hr_upscale_to_y = 8 * int(p.hr_resize_x / 8), 8 * int(hr_resize_y / 8) global p_extra_args # pylint: disable=global-statement for k, v in p_extra_args.items(): diff --git a/modules/processing_helpers.py b/modules/processing_helpers.py index be04ea8d4..5baf0193f 100644 --- a/modules/processing_helpers.py +++ b/modules/processing_helpers.py @@ -390,6 +390,9 @@ def resize_hires(p, latents): # input=latents output=pil if not latent_upscaler if latent_upscaler is not None: return torch.nn.functional.interpolate(latents, size=(p.hr_upscale_to_y // 8, p.hr_upscale_to_x // 8), mode=latent_upscaler["mode"], antialias=latent_upscaler["antialias"]) first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, full_quality=p.full_quality, output_type='pil') + if p.hr_upscale_to_x == 0 or p.hr_upscale_to_y == 0 and hasattr(p, 'init_hr'): + shared.log.error('Hires: missing upscaling dimensions') + return first_pass_images resized_images = [] for img in first_pass_images: if latent_upscaler is None: diff --git a/modules/scripts.py b/modules/scripts.py index 59eb4c82b..87a25a56b 100644 --- a/modules/scripts.py +++ b/modules/scripts.py @@ -489,8 +489,10 @@ def before_process(self, p, **kwargs): s = ScriptSummary('before-process') for script in self.alwayson_scripts: try: - script_args = p.script_args[script.args_from:script.args_to] - script.before_process(p, *script_args, **kwargs) + args = p.script_args[script.args_from:script.args_to] + if len(args) == 0: + continue + script.before_process(p, *args, **kwargs) except Exception as e: errors.display(e, f"Error running before process: {script.filename}") s.record(script.title()) @@ -501,6 +503,8 @@ def process(self, p, **kwargs): for script in self.alwayson_scripts: try: args = p.per_script_args.get(script.title(), p.script_args[script.args_from:script.args_to]) + if len(args) == 0: + continue script.process(p, *args, **kwargs) except Exception as e: errors.display(e, f'Running script process: {script.filename}') @@ -513,6 +517,8 @@ def process_images(self, p, **kwargs): for script in self.alwayson_scripts: try: args = p.per_script_args.get(script.title(), p.script_args[script.args_from:script.args_to]) + if len(args) == 0: + continue processed = script.process_images(p, *args, **kwargs) except Exception as e: errors.display(e, f'Running script process images: {script.filename}') @@ -525,6 +531,8 @@ def before_process_batch(self, p, **kwargs): for script in self.alwayson_scripts: try: args = p.per_script_args.get(script.title(), p.script_args[script.args_from:script.args_to]) + if len(args) == 0: + continue script.before_process_batch(p, *args, **kwargs) except Exception as e: errors.display(e, f'Running script before process batch: {script.filename}') @@ -536,6 +544,8 @@ def process_batch(self, p, **kwargs): for script in self.alwayson_scripts: try: args = p.per_script_args.get(script.title(), p.script_args[script.args_from:script.args_to]) + if len(args) == 0: + continue script.process_batch(p, *args, **kwargs) except Exception as e: errors.display(e, f'Running script process batch: {script.filename}') @@ -547,6 +557,8 @@ def postprocess(self, p, processed): for script in self.alwayson_scripts: try: args = p.per_script_args.get(script.title(), p.script_args[script.args_from:script.args_to]) + if len(args) == 0: + continue script.postprocess(p, processed, *args) except Exception as e: errors.display(e, f'Running script postprocess: {script.filename}') @@ -558,6 +570,8 @@ def postprocess_batch(self, p, images, **kwargs): for script in self.alwayson_scripts: try: args = p.per_script_args.get(script.title(), p.script_args[script.args_from:script.args_to]) + if len(args) == 0: + continue script.postprocess_batch(p, *args, images=images, **kwargs) except Exception as e: errors.display(e, f'Running script before postprocess batch: {script.filename}') @@ -569,6 +583,8 @@ def postprocess_batch_list(self, p, pp: PostprocessBatchListArgs, **kwargs): for script in self.alwayson_scripts: try: args = p.per_script_args.get(script.title(), p.script_args[script.args_from:script.args_to]) + if len(args) == 0: + continue script.postprocess_batch_list(p, pp, *args, **kwargs) except Exception as e: errors.display(e, f'Running script before postprocess batch list: {script.filename}') @@ -580,6 +596,8 @@ def postprocess_image(self, p, pp: PostprocessImageArgs): for script in self.alwayson_scripts: try: args = p.per_script_args.get(script.title(), p.script_args[script.args_from:script.args_to]) + if len(args) == 0: + continue script.postprocess_image(p, pp, *args) except Exception as e: errors.display(e, f'Running script postprocess image: {script.filename}') diff --git a/scripts/face-details.py b/scripts/face-details.py index 3604ecb47..b68d197db 100644 --- a/scripts/face-details.py +++ b/scripts/face-details.py @@ -104,11 +104,12 @@ def restore(self, np_image, p: processing.StableDiffusionProcessing = None): return np_image self.load() if self.model is None: - shared.log.error(f"Model load: type=FaceHires model='{self.model_name}' dir={self.model_dir} url={self.model_url}") + shared.log.debug('Face HiRes: model not loaded') return np_image image = Image.fromarray(np_image) faces = self.predict(image) if len(faces) == 0: + shared.log.debug('Face HiRes: no faces detected') return np_image # create backups @@ -140,6 +141,7 @@ def restore(self, np_image, p: processing.StableDiffusionProcessing = None): if args['denoising_strength'] == 0: shared.log.debug('Face HiRes skip: strength=0') control_pipeline = None + orig_class = shared.sd_model.__class__ if getattr(p, 'is_control', False): from modules.control import run control_pipeline = shared.sd_model @@ -177,6 +179,8 @@ def restore(self, np_image, p: processing.StableDiffusionProcessing = None): # restore pipeline if control_pipeline is not None: shared.sd_model = control_pipeline + else: + shared.sd_model.__class__ = orig_class p = processing_class.switch_class(p, orig_cls, orig_p) p.init_images = getattr(orig_p, 'init_images', None) p.image_mask = getattr(orig_p, 'image_mask', None) From 3df993c183ff6f03be78dcc57292dd5a9022fa45 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Fri, 14 Jun 2024 10:42:34 -0400 Subject: [PATCH 04/81] css tweaks --- CHANGELOG.md | 1 + javascript/black-teal.css | 2 +- modules/ui_sections.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93f4724e1..80d5bc85b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Update for 2024-06-14 +- css tweaks for standardui - force apply vae config on model load - restructure api examples: `cli/api-*` - fix control second pass resize diff --git a/javascript/black-teal.css b/javascript/black-teal.css index 63f6fb5c4..c6f266c54 100644 --- a/javascript/black-teal.css +++ b/javascript/black-teal.css @@ -30,7 +30,7 @@ --inactive-color: var(--primary--800); --body-text-color: var(--neutral-100); --body-text-color-subdued: var(--neutral-300); - --background-color: black; + --background-color: var(--neutral-950); --background-fill-primary: var(--neutral-700); --input-padding: 4px; --input-background-fill: var(--neutral-800); diff --git a/modules/ui_sections.py b/modules/ui_sections.py index c2bd4ecd9..874ac7d30 100644 --- a/modules/ui_sections.py +++ b/modules/ui_sections.py @@ -16,7 +16,7 @@ def parse_style(styles): if id_part is None: id_part = "img2img" if is_img2img else "txt2img" with gr.Row(elem_id=f"{id_part}_toprow", variant="compact"): - with gr.Column(elem_id=f"{id_part}_prompt_container", scale=6): + with gr.Column(elem_id=f"{id_part}_prompt_container", scale=5): with gr.Row(): with gr.Column(scale=80): with gr.Row(): From ad8419945a5454418e180a03d7368e3229cbd3e5 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 15 Jun 2024 10:20:59 -0400 Subject: [PATCH 05/81] sd3 add taesd preview --- modules/modeldata.py | 2 +- modules/sd_samplers_common.py | 8 +++--- modules/sd_vae_taesd.py | 53 +++++++++++++++++++++++++++++------ 3 files changed, 50 insertions(+), 13 deletions(-) diff --git a/modules/modeldata.py b/modules/modeldata.py index c904ef05b..a515bff61 100644 --- a/modules/modeldata.py +++ b/modules/modeldata.py @@ -83,7 +83,7 @@ def sd_model_type(self): return model_type if not shared.native: model_type = 'ldm' - elif "StableDiffusion3" in self.sd_refiner.__class__.__name__: + elif "StableDiffusion3" in self.sd_model.__class__.__name__: model_type = 'sd3' elif "StableDiffusionXL" in self.sd_model.__class__.__name__: model_type = 'sdxl' diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index 57b4137ef..c6ef9131f 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -56,7 +56,10 @@ def single_sample_to_image(sample, approximation=None): sample_min = torch.min(sample) if sample_min < -5: sample = sample * (5 / abs(sample_min)) - if sd_cascade: + if approximation == 2: # TAESD + x_sample = sd_vae_taesd.decode(sample) + x_sample = (1.0 + x_sample) / 2.0 # preview requires smaller range + elif sd_cascade: x_sample = sd_vae_stablecascade.decode(sample) elif approximation == 0: # Simple x_sample = sd_vae_approx.cheap_approximation(sample) * 0.5 + 0.5 @@ -64,9 +67,6 @@ def single_sample_to_image(sample, approximation=None): x_sample = sd_vae_approx.nn_approximation(sample) * 0.5 + 0.5 if shared.sd_model_type == "sdxl": x_sample = x_sample[[2,1,0], :, :] # BGR to RGB - elif approximation == 2: # TAESD - x_sample = sd_vae_taesd.decode(sample) - x_sample = (1.0 + x_sample) / 2.0 # preview requires smaller range elif approximation == 3: # Full VAE x_sample = processing.decode_first_stage(shared.sd_model, sample.unsqueeze(0))[0] * 0.5 + 0.5 else: diff --git a/modules/sd_vae_taesd.py b/modules/sd_vae_taesd.py index 2a3c427ee..0c4689767 100644 --- a/modules/sd_vae_taesd.py +++ b/modules/sd_vae_taesd.py @@ -11,7 +11,14 @@ from modules import devices, paths -taesd_models = { 'sd-decoder': None, 'sd-encoder': None, 'sdxl-decoder': None, 'sdxl-encoder': None } +taesd_models = { + 'sd-decoder': None, + 'sd-encoder': None, + 'sdxl-decoder': None, + 'sdxl-encoder': None, + 'sd3-decoder': None, + 'sd3-encoder': None, +} previous_warnings = False @@ -31,33 +38,63 @@ def __init__(self, n_in, n_out): def forward(self, x): return self.fuse(self.conv(x) + self.skip(x)) -def Encoder(): +def Encoder(latent_channels=4): return nn.Sequential( conv(3, 64), Block(64, 64), conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), - conv(64, 4), + conv(64, latent_channels), ) -def Decoder(): +def Decoder(latent_channels=4): return nn.Sequential( - Clamp(), conv(4, 64), nn.ReLU(), + Clamp(), conv(latent_channels, 64), nn.ReLU(), Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), Block(64, 64), conv(64, 3), ) + +class TAESD2(nn.Module): + latent_magnitude = 3 + latent_shift = 0.5 + + def __init__(self, encoder_path="taesd_encoder.pth", decoder_path="taesd_decoder.pth", latent_channels=None): + """Initialize pretrained TAESD on the given device from the given checkpoints.""" + super().__init__() + if latent_channels is None: + latent_channels = 16 if "taesd3" in str(encoder_path) else 4 + self.encoder = Encoder(latent_channels) + self.decoder = Decoder(latent_channels) + if encoder_path is not None: + self.encoder.load_state_dict(torch.load(encoder_path, map_location="cpu")) + if decoder_path is not None: + self.decoder.load_state_dict(torch.load(decoder_path, map_location="cpu")) + + @staticmethod + def scale_latents(x): + """raw latents -> [0, 1]""" + return x.div(2 * TAESD.latent_magnitude).add(TAESD.latent_shift).clamp(0, 1) + + @staticmethod + def unscale_latents(x): + """[0, 1] -> raw latents""" + return x.sub(TAESD.latent_shift).mul(2 * TAESD.latent_magnitude) + + class TAESD(nn.Module): # pylint: disable=abstract-method latent_magnitude = 3 latent_shift = 0.5 - def __init__(self, encoder_path="taesd_encoder.pth", decoder_path="taesd_decoder.pth"): + def __init__(self, encoder_path="taesd_encoder.pth", decoder_path="taesd_decoder.pth", latent_channels=None): """Initialize pretrained TAESD on the given device from the given checkpoints.""" super().__init__() - self.encoder = Encoder() - self.decoder = Decoder() + if latent_channels is None: + latent_channels = 16 if "taesd3" in str(encoder_path) or "taesd3" in str(decoder_path) else 4 + self.encoder = Encoder(latent_channels) + self.decoder = Decoder(latent_channels) if encoder_path is not None: self.encoder.load_state_dict(torch.load(encoder_path, map_location="cpu")) if decoder_path is not None: From 0765758064710ed9af56d5f2c6a21e3d2cf799dc Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 15 Jun 2024 10:22:13 -0400 Subject: [PATCH 06/81] update changelog --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 80d5bc85b..95f9a8449 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,8 +2,10 @@ ## Update for 2024-06-14 +- sd3 enable taesd preview and non-full quality mode +- sd3 simplified loading of model in single-file safetensors format - css tweaks for standardui -- force apply vae config on model load +- force apply vae config on model load, fix unsaturated outputs - restructure api examples: `cli/api-*` - fix control second pass resize - fix api face-hires From 3f7269e5305c62166142acfa339525869019694c Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 15 Jun 2024 10:22:26 -0400 Subject: [PATCH 07/81] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 95f9a8449..b9fd30724 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Change Log for SD.Next -## Update for 2024-06-14 +## Update for 2024-06-15 - sd3 enable taesd preview and non-full quality mode - sd3 simplified loading of model in single-file safetensors format From a673bebdefa225db76dab65c89a36187151fb582 Mon Sep 17 00:00:00 2001 From: Seunghoon Lee Date: Sun, 16 Jun 2024 04:49:15 +0900 Subject: [PATCH 08/81] DirectML 0.2.2 torch 2.3.1 --- installer.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/installer.py b/installer.py index 686dd6c02..40d2b2d76 100644 --- a/installer.py +++ b/installer.py @@ -515,14 +515,7 @@ def install_rocm_zluda(torch_command): torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision') # conceal ROCm installed - os.environ.pop("ROCM_HOME", None) - os.environ.pop("ROCM_PATH", None) - paths = os.environ["PATH"].split(";") - paths_no_rocm = [] - for path in paths: - if "ROCm" not in path: - paths_no_rocm.append(path) - os.environ["PATH"] = ";".join(paths_no_rocm) + conceal_rocm() else: if rocm_ver is None: # assume the latest if version check fails torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision --index-url https://download.pytorch.org/whl/rocm6.0') @@ -541,6 +534,17 @@ def install_rocm_zluda(torch_command): return torch_command +def conceal_rocm(): + os.environ.pop("ROCM_HOME", None) + os.environ.pop("ROCM_PATH", None) + paths = os.environ["PATH"].split(";") + paths_no_rocm = [] + for path in paths: + if "ROCm" not in path: + paths_no_rocm.append(path) + os.environ["PATH"] = ";".join(paths_no_rocm) + + def install_ipex(torch_command): check_python(supported_minors=[10,11], reason='IPEX backend requires Python 3.10 or 3.11') args.use_ipex = True # pylint: disable=attribute-defined-outside-init @@ -677,11 +681,11 @@ def check_torch(): torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision') elif allow_directml and args.use_directml and ('arm' not in machine and 'aarch' not in machine): log.info('Using DirectML Backend') - check_python(supported_minors=[10], reason='DirectML backend requires Python 3.10') - torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.0.0 torchvision torch-directml') + torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.3.1 torchvision torch-directml') if 'torch' in torch_command and not args.version: install(torch_command, 'torch torchvision') install('onnxruntime-directml', 'onnxruntime-directml', ignore=True) + conceal_rocm() else: if args.use_zluda: log.warning("ZLUDA failed to initialize: no HIP SDK found") From 68e7692ed541b7384204df33bcfea9882ede359b Mon Sep 17 00:00:00 2001 From: Seunghoon Lee Date: Sun, 16 Jun 2024 04:59:20 +0900 Subject: [PATCH 09/81] remove redundant hijacks --- installer.py | 2 +- modules/dml/backend.py | 3 - modules/dml/hijack/__init__.py | 4 - modules/dml/hijack/diffusers.py | 227 -------------------------- modules/dml/hijack/kdiffusion.py | 89 ---------- modules/dml/hijack/plms.py | 90 ---------- modules/dml/hijack/stablediffusion.py | 81 --------- modules/dml/hijack/torch.py | 35 ---- modules/processing_callbacks.py | 2 - 9 files changed, 1 insertion(+), 532 deletions(-) delete mode 100644 modules/dml/hijack/diffusers.py delete mode 100644 modules/dml/hijack/kdiffusion.py delete mode 100644 modules/dml/hijack/plms.py delete mode 100644 modules/dml/hijack/stablediffusion.py diff --git a/installer.py b/installer.py index 40d2b2d76..2abb8b33d 100644 --- a/installer.py +++ b/installer.py @@ -434,7 +434,7 @@ def check_onnx(): def install_rocm_zluda(torch_command): - check_python(supported_minors=[10,11], reason='RocM or Zluda backends require Python 3.10 or 3.11') + check_python(supported_minors=[10, 11], reason='ROCm or ZLUDA backends require Python 3.10 or 3.11') is_windows = platform.system() == 'Windows' log.info('AMD ROCm toolkit detected') os.environ.setdefault('PYTORCH_HIP_ALLOC_CONF', 'garbage_collection_threshold:0.8,max_split_size_mb:512') diff --git a/modules/dml/backend.py b/modules/dml/backend.py index 5f34b4542..7947dc81b 100644 --- a/modules/dml/backend.py +++ b/modules/dml/backend.py @@ -78,6 +78,3 @@ def max_memory_allocated(device: Optional[rDevice]=None): def reset_peak_memory_stats(device: Optional[rDevice]=None): return - - def synchronize_tensor(tensor: torch.Tensor) -> None: - tensor.__str__() diff --git a/modules/dml/hijack/__init__.py b/modules/dml/hijack/__init__.py index 4f27b500c..46aecf4bd 100644 --- a/modules/dml/hijack/__init__.py +++ b/modules/dml/hijack/__init__.py @@ -1,8 +1,4 @@ -import modules.dml.hijack.kdiffusion -import modules.dml.hijack.stablediffusion import modules.dml.hijack.torch import modules.dml.hijack.realesrgan_model -import modules.dml.hijack.plms -import modules.dml.hijack.diffusers import modules.dml.hijack.transformers import modules.dml.hijack.tomesd diff --git a/modules/dml/hijack/diffusers.py b/modules/dml/hijack/diffusers.py deleted file mode 100644 index 56b7d85cb..000000000 --- a/modules/dml/hijack/diffusers.py +++ /dev/null @@ -1,227 +0,0 @@ -from typing import Optional, Union, Tuple -import torch -import diffusers -import diffusers.utils.torch_utils - - -# copied from diffusers.PNDMScheduler._get_prev_sample -def PNDMScheduler__get_prev_sample(self, sample: torch.FloatTensor, timestep, prev_timestep, model_output): - torch.dml.synchronize_tensor(sample) # DML synchronize - alpha_prod_t = self.alphas_cumprod[timestep] - alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod - beta_prod_t = 1 - alpha_prod_t - beta_prod_t_prev = 1 - alpha_prod_t_prev - - if self.config.prediction_type == "v_prediction": - model_output = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample - elif self.config.prediction_type != "epsilon": - raise ValueError( - f"prediction_type given as {self.config.prediction_type} must be one of `epsilon` or `v_prediction`" - ) - - sample_coeff = (alpha_prod_t_prev / alpha_prod_t) ** (0.5) - - model_output_denom_coeff = alpha_prod_t * beta_prod_t_prev ** (0.5) + ( - alpha_prod_t * beta_prod_t * alpha_prod_t_prev - ) ** (0.5) - - # full formula (9) - prev_sample = ( - sample_coeff * sample - (alpha_prod_t_prev - alpha_prod_t) * model_output / model_output_denom_coeff - ) - - return prev_sample - - -diffusers.PNDMScheduler._get_prev_sample = PNDMScheduler__get_prev_sample # pylint: disable=protected-access - - -# copied from diffusers.UniPCMultistepScheduler.multistep_uni_p_bh_update -def UniPCMultistepScheduler_multistep_uni_p_bh_update( - self: diffusers.UniPCMultistepScheduler, - model_output: torch.FloatTensor, - *args, - sample: torch.FloatTensor = None, - order: int = None, - **_, -) -> torch.FloatTensor: - if sample is None: - if len(args) > 1: - sample = args[1] - else: - raise ValueError(" missing `sample` as a required keyward argument") - if order is None: - if len(args) > 2: - order = args[2] - else: - raise ValueError(" missing `order` as a required keyward argument") - model_output_list = self.model_outputs - - s0 = self.timestep_list[-1] - m0 = model_output_list[-1] - x = sample - - if self.solver_p: - x_t = self.solver_p.step(model_output, s0, x).prev_sample - return x_t - - torch.dml.synchronize_tensor(sample) # DML synchronize - sigma_t, sigma_s0 = self.sigmas[self.step_index + 1], self.sigmas[self.step_index] - alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) - alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) - - lambda_t = torch.log(alpha_t) - torch.log(sigma_t) - lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) - - h = lambda_t - lambda_s0 - device = sample.device - - rks = [] - D1s = [] - for i in range(1, order): - si = self.step_index - i - mi = model_output_list[-(i + 1)] - alpha_si, sigma_si = self._sigma_to_alpha_sigma_t(self.sigmas[si]) - lambda_si = torch.log(alpha_si) - torch.log(sigma_si) - rk = (lambda_si - lambda_s0) / h - rks.append(rk) - D1s.append((mi - m0) / rk) - - rks.append(1.0) - rks = torch.tensor(rks, device=device) - - R = [] - b = [] - - hh = -h if self.predict_x0 else h - h_phi_1 = torch.expm1(hh) # h\phi_1(h) = e^h - 1 - h_phi_k = h_phi_1 / hh - 1 - - factorial_i = 1 - - if self.config.solver_type == "bh1": - B_h = hh - elif self.config.solver_type == "bh2": - B_h = torch.expm1(hh) - else: - raise NotImplementedError - - for i in range(1, order + 1): - R.append(torch.pow(rks, i - 1)) - b.append(h_phi_k * factorial_i / B_h) - factorial_i *= i + 1 - h_phi_k = h_phi_k / hh - 1 / factorial_i - - R = torch.stack(R) - b = torch.tensor(b, device=device) - - rhos_p = None - if len(D1s) > 0: - D1s = torch.stack(D1s, dim=1) # (B, K) - # for order 2, we use a simplified version - if order == 2: - rhos_p = torch.tensor([0.5], dtype=x.dtype, device=device) - else: - rhos_p = torch.linalg.solve(R[:-1, :-1], b[:-1]) - else: - D1s = None - - if self.predict_x0: - x_t_ = sigma_t / sigma_s0 * x - alpha_t * h_phi_1 * m0 - if D1s is not None: - pred_res = torch.einsum("k,bkc...->bc...", rhos_p, D1s) - else: - pred_res = 0 - x_t = x_t_ - alpha_t * B_h * pred_res - else: - x_t_ = alpha_t / alpha_s0 * x - sigma_t * h_phi_1 * m0 - if D1s is not None: - pred_res = torch.einsum("k,bkc...->bc...", rhos_p, D1s) - else: - pred_res = 0 - x_t = x_t_ - sigma_t * B_h * pred_res - - x_t = x_t.to(x.dtype) - return x_t - - -diffusers.UniPCMultistepScheduler.multistep_uni_p_bh_update = UniPCMultistepScheduler_multistep_uni_p_bh_update - - -# copied from diffusers.LCMScheduler.step -def LCMScheduler_step( - self: diffusers.LCMScheduler, - model_output: torch.FloatTensor, - timestep: int, - sample: torch.FloatTensor, - generator: Optional[torch.Generator] = None, - return_dict: bool = True, - ) -> Union[diffusers.schedulers.scheduling_lcm.LCMSchedulerOutput, Tuple]: - if self.num_inference_steps is None: - raise ValueError( - "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" - ) - - if self.step_index is None: - self._init_step_index(timestep) - - # 1. get previous step value - prev_step_index = self.step_index + 1 - if prev_step_index < len(self.timesteps): - prev_timestep = self.timesteps[prev_step_index] - else: - prev_timestep = timestep - - # 2. compute alphas, betas - torch.dml.synchronize_tensor(sample) # DML synchronize - alpha_prod_t = self.alphas_cumprod[timestep] - alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod - - beta_prod_t = 1 - alpha_prod_t - beta_prod_t_prev = 1 - alpha_prod_t_prev - - # 3. Get scalings for boundary conditions - c_skip, c_out = self.get_scalings_for_boundary_condition_discrete(timestep) - - # 4. Compute the predicted original sample x_0 based on the model parameterization - if self.config.prediction_type == "epsilon": # noise-prediction - predicted_original_sample = (sample - beta_prod_t.sqrt() * model_output) / alpha_prod_t.sqrt() - elif self.config.prediction_type == "sample": # x-prediction - predicted_original_sample = model_output - elif self.config.prediction_type == "v_prediction": # v-prediction - predicted_original_sample = alpha_prod_t.sqrt() * sample - beta_prod_t.sqrt() * model_output - else: - raise ValueError( - f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample` or" - " `v_prediction` for `LCMScheduler`." - ) - - # 5. Clip or threshold "predicted x_0" - if self.config.thresholding: - predicted_original_sample = self._threshold_sample(predicted_original_sample) - elif self.config.clip_sample: - predicted_original_sample = predicted_original_sample.clamp( - -self.config.clip_sample_range, self.config.clip_sample_range - ) - - # 6. Denoise model output using boundary conditions - denoised = c_out * predicted_original_sample + c_skip * sample - - # 7. Sample and inject noise z ~ N(0, I) for MultiStep Inference - # Noise is not used for one-step sampling. - if len(self.timesteps) > 1: - noise = diffusers.utils.torch_utils.randn_tensor(model_output.shape, generator=generator, device=model_output.device) - prev_sample = alpha_prod_t_prev.sqrt() * denoised + beta_prod_t_prev.sqrt() * noise - else: - prev_sample = denoised - - # upon completion increase step index by one - self._step_index += 1 - - if not return_dict: - return (prev_sample, denoised) - - return diffusers.schedulers.scheduling_lcm.LCMSchedulerOutput(prev_sample=prev_sample, denoised=denoised) - - -diffusers.LCMScheduler.step = LCMScheduler_step diff --git a/modules/dml/hijack/kdiffusion.py b/modules/dml/hijack/kdiffusion.py deleted file mode 100644 index d772dc88f..000000000 --- a/modules/dml/hijack/kdiffusion.py +++ /dev/null @@ -1,89 +0,0 @@ -import torch -from tqdm.auto import tqdm -from k_diffusion import sampling -import modules.devices as devices - - -def dpm_solver_adaptive(self, x, t_start, t_end, order=3, rtol=0.05, atol=0.0078, h_init=0.05, pcoeff=0., icoeff=1., dcoeff=0., accept_safety=0.81, eta=0., s_noise=1., noise_sampler=None): - noise_sampler = sampling.default_noise_sampler(x) if noise_sampler is None else noise_sampler - if order not in {2, 3}: - raise ValueError('order should be 2 or 3') - forward = t_end > t_start - if not forward and eta: - raise ValueError('eta must be 0 for reverse sampling') - h_init = abs(h_init) * (1 if forward else -1) - atol = torch.tensor(atol, device=devices.device) - rtol = torch.tensor(rtol, device=devices.device) - s = t_start - x_prev = x - accept = True - pid = sampling.PIDStepSizeController(h_init, pcoeff, icoeff, dcoeff, 1.5 if eta else order, accept_safety) - info = {'steps': 0, 'nfe': 0, 'n_accept': 0, 'n_reject': 0} - - while s < t_end - 1e-5 if forward else s > t_end + 1e-5: - eps_cache = {} - t = torch.minimum(t_end, s + pid.h) if forward else torch.maximum(t_end, s + pid.h) - if eta: - sd, su = sampling.get_ancestral_step(self.sigma(s), self.sigma(t), eta) - t_ = torch.minimum(t_end, self.t(sd)) - su = (self.sigma(t) ** 2 - self.sigma(t_) ** 2) ** 0.5 - else: - t_, su = t, 0. - - eps, eps_cache = self.eps(eps_cache, 'eps', x, s) - denoised = x - self.sigma(s) * eps - - if order == 2: - x_low, eps_cache = self.dpm_solver_1_step(x, s, t_, eps_cache=eps_cache) - x_high, eps_cache = self.dpm_solver_2_step(x, s, t_, eps_cache=eps_cache) - else: - x_low, eps_cache = self.dpm_solver_2_step(x, s, t_, r1=1 / 3, eps_cache=eps_cache) - x_high, eps_cache = self.dpm_solver_3_step(x, s, t_, eps_cache=eps_cache) - delta = torch.maximum(atol, rtol * torch.maximum(x_low.abs(), x_prev.abs())) - error = torch.linalg.norm((x_low - x_high) / delta) / x.numel() ** 0.5 - accept = pid.propose_step(error) - if accept: - x_prev = x_low - x = x_high + su * s_noise * noise_sampler(self.sigma(s), self.sigma(t)) - s = t - info['n_accept'] += 1 - else: - info['n_reject'] += 1 - info['nfe'] += order - info['steps'] += 1 - - if self.info_callback is not None: - self.info_callback({'x': x, 'i': info['steps'] - 1, 't': s, 't_up': s, 'denoised': denoised, 'error': error, 'h': pid.h, **info}) - - return x, info - - -@devices.inference_context() -def sample_dpm_fast(model, x, sigma_min, sigma_max, n, extra_args=None, callback=None, disable=None, eta=0., s_noise=1., noise_sampler=None): - """DPM-Solver-Fast (fixed step size). See https://arxiv.org/abs/2206.00927.""" - if sigma_min <= 0 or sigma_max <= 0: - raise ValueError('sigma_min and sigma_max must not be 0') - with tqdm(total=n, disable=disable) as pbar: - dpm_solver = sampling.DPMSolver(model, extra_args, eps_callback=pbar.update) - if callback is not None: - dpm_solver.info_callback = lambda info: callback({'sigma': dpm_solver.sigma(info['t']), 'sigma_hat': dpm_solver.sigma(info['t_up']), **info}) - return dpm_solver.dpm_solver_fast(x, dpm_solver.t(torch.tensor(sigma_max, device=devices.device)), dpm_solver.t(torch.tensor(sigma_min, device=devices.device)), n, eta, s_noise, noise_sampler) - - -@devices.inference_context() -def sample_dpm_adaptive(model, x, sigma_min, sigma_max, extra_args=None, callback=None, disable=None, order=3, rtol=0.05, atol=0.0078, h_init=0.05, pcoeff=0., icoeff=1., dcoeff=0., accept_safety=0.81, eta=0., s_noise=1., noise_sampler=None, return_info=False): - """DPM-Solver-12 and 23 (adaptive step size). See https://arxiv.org/abs/2206.00927.""" - if sigma_min <= 0 or sigma_max <= 0: - raise ValueError('sigma_min and sigma_max must not be 0') - with tqdm(disable=disable) as pbar: - dpm_solver = sampling.DPMSolver(model, extra_args, eps_callback=pbar.update) - if callback is not None: - dpm_solver.info_callback = lambda info: callback({'sigma': dpm_solver.sigma(info['t']), 'sigma_hat': dpm_solver.sigma(info['t_up']), **info}) - x, info = dpm_solver.dpm_solver_adaptive(x, dpm_solver.t(torch.tensor(sigma_max, device=devices.device)), dpm_solver.t(torch.tensor(sigma_min, device=devices.device)), order, rtol, atol, h_init, pcoeff, icoeff, dcoeff, accept_safety, eta, s_noise, noise_sampler) - if return_info: - return x, info - return x - -sampling.DPMSolver.dpm_solver_adaptive = dpm_solver_adaptive -sampling.sample_dpm_fast = sample_dpm_fast -sampling.sample_dpm_adaptive = sample_dpm_adaptive diff --git a/modules/dml/hijack/plms.py b/modules/dml/hijack/plms.py deleted file mode 100644 index d19c46629..000000000 --- a/modules/dml/hijack/plms.py +++ /dev/null @@ -1,90 +0,0 @@ -import torch -from ldm.models.diffusion.ddim import noise_like -import modules.sd_hijack_inpainting as plms_hijack -import modules.devices as devices - - -@devices.inference_context() -def p_sample_plms(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False, - temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None, - unconditional_guidance_scale=1., unconditional_conditioning=None, old_eps=None, t_next=None, dynamic_threshold=None): - b, *_, device = *x.shape, x.device - - def get_model_output(x, t): - if unconditional_conditioning is None or unconditional_guidance_scale == 1.: - e_t = self.model.apply_model(x, t, c) - else: - x_in = torch.cat([x] * 2) - t_in = torch.cat([t] * 2) - - if isinstance(c, dict): - assert isinstance(unconditional_conditioning, dict) - c_in = {} - for k in c: - if isinstance(c[k], list): - c_in[k] = [ - torch.cat([unconditional_conditioning[k][i], c[k][i]]) - for i in range(len(c[k])) - ] - else: - c_in[k] = torch.cat([unconditional_conditioning[k], c[k]]) - else: - c_in = torch.cat([unconditional_conditioning, c]) - - e_t_uncond, e_t = self.model.apply_model(x_in, t_in, c_in).chunk(2) - e_t = e_t_uncond + unconditional_guidance_scale * (e_t - e_t_uncond) - - if score_corrector is not None: - assert self.model.parameterization == "eps" - e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs) - - return e_t - - alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas - alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev - sqrt_one_minus_alphas = self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas - sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas - - def get_x_prev_and_pred_x0(e_t, index): - # select parameters corresponding to the currently considered timestep - torch.dml.synchronize_tensor(alphas[index]) # DML synchronize - a_t = torch.full((b, 1, 1, 1), alphas[index], device=device) - a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device) - sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device) - sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index],device=device) - - # current prediction for x_0 - pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt() - if quantize_denoised: - pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0) - if dynamic_threshold is not None: - from ldm.models.diffusion.sampling_util import norm_thresholding - pred_x0 = norm_thresholding(pred_x0, dynamic_threshold) - # direction pointing to x_t - dir_xt = (1. - a_prev - sigma_t**2).sqrt() * e_t - noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature - if noise_dropout > 0.: - noise = torch.nn.functional.dropout(noise, p=noise_dropout) - x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise - return x_prev, pred_x0 - - e_t = get_model_output(x, t) - if len(old_eps) == 0: - # Pseudo Improved Euler (2nd order) - x_prev, pred_x0 = get_x_prev_and_pred_x0(e_t, index) - e_t_next = get_model_output(x_prev, t_next) - e_t_prime = (e_t + e_t_next) / 2 - elif len(old_eps) == 1: - # 2nd order Pseudo Linear Multistep (Adams-Bashforth) - e_t_prime = (3 * e_t - old_eps[-1]) / 2 - elif len(old_eps) == 2: - # 3nd order Pseudo Linear Multistep (Adams-Bashforth) - e_t_prime = (23 * e_t - 16 * old_eps[-1] + 5 * old_eps[-2]) / 12 - elif len(old_eps) >= 3: - # 4nd order Pseudo Linear Multistep (Adams-Bashforth) - e_t_prime = (55 * e_t - 59 * old_eps[-1] + 37 * old_eps[-2] - 9 * old_eps[-3]) / 24 - - x_prev, pred_x0 = get_x_prev_and_pred_x0(e_t_prime, index) - - return x_prev, pred_x0, e_t -plms_hijack.p_sample_plms = p_sample_plms diff --git a/modules/dml/hijack/stablediffusion.py b/modules/dml/hijack/stablediffusion.py deleted file mode 100644 index fc2518aa7..000000000 --- a/modules/dml/hijack/stablediffusion.py +++ /dev/null @@ -1,81 +0,0 @@ -import torch -from ldm.models.diffusion.ddim import DDIMSampler -from ldm.modules.diffusionmodules.util import noise_like -import modules.devices as devices - - -@devices.inference_context() -def p_sample_ddim(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False, - temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None, - unconditional_guidance_scale=1., unconditional_conditioning=None, - dynamic_threshold=None): - b, *_, device = *x.shape, x.device - - if unconditional_conditioning is None or unconditional_guidance_scale == 1.: - model_output = self.model.apply_model(x, t, c) - else: - x_in = torch.cat([x] * 2) - t_in = torch.cat([t] * 2) - if isinstance(c, dict): - assert isinstance(unconditional_conditioning, dict) - c_in = dict() - for k in c: - if isinstance(c[k], list): - c_in[k] = [torch.cat([ - unconditional_conditioning[k][i], - c[k][i]]) for i in range(len(c[k]))] - else: - c_in[k] = torch.cat([ - unconditional_conditioning[k], - c[k]]) - elif isinstance(c, list): - c_in = list() - assert isinstance(unconditional_conditioning, list) - for i in range(len(c)): - c_in.append(torch.cat([unconditional_conditioning[i], c[i]])) - else: - c_in = torch.cat([unconditional_conditioning, c]) - model_uncond, model_t = self.model.apply_model(x_in, t_in, c_in).chunk(2) - model_output = model_uncond + unconditional_guidance_scale * (model_t - model_uncond) - - if self.model.parameterization == "v": - e_t = self.model.predict_eps_from_z_and_v(x, t, model_output) - else: - e_t = model_output - - if score_corrector is not None: - assert self.model.parameterization == "eps", 'not implemented' - e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs) - - alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas - alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev - sqrt_one_minus_alphas = self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas - sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas - # select parameters corresponding to the currently considered timestep - torch.dml.synchronize_tensor(alphas[index]) # DML synchronize - a_t = torch.full((b, 1, 1, 1), alphas[index], device=device) - a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device) - sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device) - sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index],device=device) - - # current prediction for x_0 - if self.model.parameterization != "v": - pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt() - else: - pred_x0 = self.model.predict_start_from_z_and_v(x, t, model_output) - - if quantize_denoised: - pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0) - - if dynamic_threshold is not None: - raise NotImplementedError - - # direction pointing to x_t - dir_xt = (1. - a_prev - sigma_t**2).sqrt() * e_t - noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature - if noise_dropout > 0.: - noise = torch.nn.functional.dropout(noise, p=noise_dropout) - x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise - return x_prev, pred_x0 - -DDIMSampler.p_sample_ddim = p_sample_ddim diff --git a/modules/dml/hijack/torch.py b/modules/dml/hijack/torch.py index e4913a357..e216de46f 100644 --- a/modules/dml/hijack/torch.py +++ b/modules/dml/hijack/torch.py @@ -7,41 +7,6 @@ CondFunc('torch.Tensor.new', lambda orig, self, *args, **kwargs: orig(self.cpu(), *args, **kwargs).to(self.device), lambda orig, self, *args, **kwargs: torch.dml.is_directml_device(self.device)) -_lerp = torch.lerp -def lerp(*args, **kwargs) -> torch.Tensor: - rep = None - for i in range(0, len(args)): - if torch.is_tensor(args[i]): - rep = args[i] - break - if rep is None: - for key in kwargs: - if torch.is_tensor(kwargs[key]): - rep = kwargs[key] - break - if torch.dml.is_directml_device(rep.device): - args = list(args) - - if rep.dtype == torch.float16: - for i in range(len(args)): - if torch.is_tensor(args[i]): - args[i] = args[i].float() - for i in range(len(args)): - if torch.is_tensor(args[i]): - args[i] = args[i].cpu() - - if rep.dtype == torch.float16: - for kwarg in kwargs: - if torch.is_tensor(kwargs[kwarg]): - kwargs[kwarg] = kwargs[kwarg].float() - for kwarg in kwargs: - if torch.is_tensor(kwargs[kwarg]): - kwargs[kwarg] = kwargs[kwarg].cpu() - return _lerp(*args, **kwargs).to(rep.device).type(rep.dtype) - return _lerp(*args, **kwargs) -torch.lerp = lerp - - # https://github.com/lshqqytiger/stable-diffusion-webui-directml/issues/436 _pow_ = torch.Tensor.pow_ def pow_(self: torch.Tensor, *args, **kwargs): diff --git a/modules/processing_callbacks.py b/modules/processing_callbacks.py index d5b7d1922..824a79007 100644 --- a/modules/processing_callbacks.py +++ b/modules/processing_callbacks.py @@ -37,8 +37,6 @@ def diffusers_callback(pipe, step: int, timestep: int, kwargs: dict): if p is None: return kwargs latents = kwargs.get('latents', None) - if torch.is_tensor(latents) and latents.device.type == "privateuseone": - torch.dml.synchronize_tensor(latents) # DML synchronize debug_callback(f'Callback: step={step} timestep={timestep} latents={latents.shape if latents is not None else None} kwargs={list(kwargs)}') shared.state.sampling_step = step if shared.state.interrupted or shared.state.skipped: From 6c1d59d367d27c14b7f4efa318bdb474846a64ad Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 15 Jun 2024 16:31:55 -0400 Subject: [PATCH 10/81] civitai download validate name --- extensions-builtin/Lora/network_overrides.py | 8 +++++--- extensions-builtin/Lora/networks.py | 5 ++++- modules/modelloader.py | 4 ++++ modules/ui_models.py | 2 +- wiki | 2 +- 5 files changed, 15 insertions(+), 6 deletions(-) diff --git a/extensions-builtin/Lora/network_overrides.py b/extensions-builtin/Lora/network_overrides.py index 724e47c70..943575630 100644 --- a/extensions-builtin/Lora/network_overrides.py +++ b/extensions-builtin/Lora/network_overrides.py @@ -24,10 +24,12 @@ 'c2ec22757b46', # flash-sd15 ] -def check_override(shorthash): +def check_override(shorthash=''): + force = False + force = force or (shared.sd_model_type == 'sd3') # TODO sd3 forced diffusers for lora load if len(shorthash) < 4: - return False - force = any(x.startswith(shorthash) for x in maybe_diffusers) if shared.opts.lora_maybe_diffusers else False + return force + force = force or (any(x.startswith(shorthash) for x in maybe_diffusers) if shared.opts.lora_maybe_diffusers else False) force = force or any(x.startswith(shorthash) for x in force_diffusers) if force and shared.opts.lora_maybe_diffusers: shared.log.debug('LoRA override: force diffusers') diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py index e564e2b67..4e45a86bd 100644 --- a/extensions-builtin/Lora/networks.py +++ b/extensions-builtin/Lora/networks.py @@ -87,7 +87,10 @@ def load_diffusers(name, network_on_disk, lora_scale=1.0) -> network.Network: return cached if not shared.native: return None - shared.sd_model.load_lora_weights(network_on_disk.filename) + try: + shared.sd_model.load_lora_weights(network_on_disk.filename) + except Exception as e: + errors.display(e, "LoRA") if shared.opts.lora_fuse_diffusers: shared.sd_model.fuse_lora(lora_scale=lora_scale) net = network.Network(name, network_on_disk) diff --git a/modules/modelloader.py b/modules/modelloader.py index fa27ede6d..faf206d11 100644 --- a/modules/modelloader.py +++ b/modules/modelloader.py @@ -152,6 +152,10 @@ def download_civit_model_thread(model_name, model_url, model_path, model_type, t def download_civit_model(model_url: str, model_name: str, model_path: str, model_type: str, token: str = None): import threading + if model_name is None or len(model_name) == 0: + err = 'Model download: no target model name provided' + shared.log.error(err) + return err thread = threading.Thread(target=download_civit_model_thread, args=(model_name, model_url, model_path, model_type, token)) thread.start() return f'Model download: name={model_name} url={model_url} path={model_path}' diff --git a/modules/ui_models.py b/modules/ui_models.py index c7fa8425b..d29f2f404 100644 --- a/modules/ui_models.py +++ b/modules/ui_models.py @@ -431,7 +431,7 @@ def civit_search_model(name, tag, model_type): r = req(url) log.debug(f'CivitAI search: name="{name}" tag={tag or "none"} url="{url}" status={r.status_code}') if r.status_code != 200: - return [], [], [] + return [], gr.update(visible=False, value=[]), gr.update(visible=False, value=None), gr.update(visible=False, value=None) body = r.json() nonlocal data data = body.get('items', []) diff --git a/wiki b/wiki index 0db3587f4..23dc2fc80 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit 0db3587f4491680cbedb20282a6d3dd52e1d5769 +Subproject commit 23dc2fc800495d20319ea19ebf8e3fb8a52c1ce6 From 5feaed0c6ab81712e72817c1e910d851cfbcab2f Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 15 Jun 2024 16:47:03 -0400 Subject: [PATCH 11/81] unify hf-login --- modules/model_sd3.py | 24 ++++-------------------- modules/modeldata.py | 2 ++ modules/modelloader.py | 19 ++++++++++++++++++- modules/sd_models.py | 3 ++- 4 files changed, 26 insertions(+), 22 deletions(-) diff --git a/modules/model_sd3.py b/modules/model_sd3.py index 614c37f6a..e2004c1f4 100644 --- a/modules/model_sd3.py +++ b/modules/model_sd3.py @@ -1,6 +1,4 @@ -import io import os -import contextlib import warnings import torch import diffusers @@ -13,23 +11,9 @@ loggedin = False -def hf_login(): - global loggedin # pylint: disable=global-statement - import huggingface_hub as hf - from modules import shared - if shared.opts.huggingface_token is not None and len(shared.opts.huggingface_token) > 2 and not loggedin: - stdout = io.StringIO() - with contextlib.redirect_stdout(stdout): - hf.login(shared.opts.huggingface_token) - text = stdout.getvalue() or '' - line = [l for l in text.split('\n') if 'Token' in l] - shared.log.info(f'HF login: {line[0] if len(line) > 0 else text}') - loggedin = True - - def load_sd3(fn=None, cache_dir=None, config=None): - from modules import devices - hf_login() + from modules import devices, modelloader + modelloader.hf_login() repo_id = 'stabilityai/stable-diffusion-3-medium-diffusers' model_id = 'stabilityai/stable-diffusion-3-medium-diffusers' dtype = torch.float16 @@ -88,8 +72,8 @@ def load_sd3(fn=None, cache_dir=None, config=None): def load_te3(pipe, te3=None, cache_dir=None): - from modules import devices - hf_login() + from modules import devices, modelloader + modelloader.hf_login() repo_id = 'stabilityai/stable-diffusion-3-medium-diffusers' if pipe is None or not hasattr(pipe, 'text_encoder_3'): return pipe diff --git a/modules/modeldata.py b/modules/modeldata.py index a515bff61..2ae958218 100644 --- a/modules/modeldata.py +++ b/modules/modeldata.py @@ -97,6 +97,8 @@ def sd_model_type(self): model_type = 'sd' # sd is compatible with sd elif "Kandinsky" in self.sd_model.__class__.__name__: model_type = 'kandinsky' + elif "HunyuanDiT" in self.sd_model.__class__.__name__: + model_type = 'hunyuandit' elif "Cascade" in self.sd_model.__class__.__name__: model_type = 'sc' else: diff --git a/modules/modelloader.py b/modules/modelloader.py index faf206d11..0045478a3 100644 --- a/modules/modelloader.py +++ b/modules/modelloader.py @@ -1,8 +1,10 @@ +import io import os import time import json import shutil import importlib +import contextlib from typing import Dict from urllib.parse import urlparse from PIL import Image @@ -12,10 +14,25 @@ from modules.paths import script_path, models_path +loggedin = False diffuser_repos = [] debug = shared.log.trace if os.environ.get('SD_DOWNLOAD_DEBUG', None) is not None else lambda *args, **kwargs: None +def hf_login(token=None): + global loggedin # pylint: disable=global-statement + import huggingface_hub as hf + token = token or shared.opts.huggingface_token + if token is not None and len(token) > 2 and not loggedin: + stdout = io.StringIO() + with contextlib.redirect_stdout(stdout): + hf.login(shared.opts.huggingface_token) + text = stdout.getvalue() or '' + line = [l for l in text.split('\n') if 'Token' in l] + shared.log.info(f'HF login: {line[0] if len(line) > 0 else text}') + loggedin = True + + def download_civit_meta(model_path: str, model_id): fn = os.path.splitext(model_path)[0] + '.json' url = f'https://civitai.com/api/v1/models/{model_id}' @@ -188,7 +205,7 @@ def download_diffusers_model(hub_id: str, cache_dir: str = None, download_config token = token or shared.opts.huggingface_token if token is not None and len(token) > 2: shared.log.debug(f"Diffusers authentication: {token}") - hf.login(token) + hf_login(token) pipeline_dir = None ok = False diff --git a/modules/sd_models.py b/modules/sd_models.py index 535820686..c8b0b9397 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -1381,7 +1381,8 @@ def set_attn(pipe, attention): modules = [getattr(pipe, n, None) for n in module_names] modules = [m for m in modules if isinstance(m, torch.nn.Module) and hasattr(m, "set_attn_processor")] for module in modules: - if 'SD3Transformer2DModel' in module.__class__.__name__: # TODO SD3 + print('HERE', module.__class__.__name__) + if module.__class__.__name__ in ['SD3Transformer2DModel']: module.set_attn_processor(p.JointAttnProcessor2_0()) else: module.set_attn_processor(attention) From a726acbcc6f2c553b7c23d1ab363a51bf621cfc4 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 15 Jun 2024 17:08:33 -0400 Subject: [PATCH 12/81] fix HunyuanDiT --- CHANGELOG.md | 5 ++++- modules/sd_models.py | 3 ++- modules/sd_vae_taesd.py | 7 +++++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b9fd30724..7e25dfff1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,10 +5,13 @@ - sd3 enable taesd preview and non-full quality mode - sd3 simplified loading of model in single-file safetensors format - css tweaks for standardui -- force apply vae config on model load, fix unsaturated outputs +- force apply vae config on model load + fix unsaturated outputs - restructure api examples: `cli/api-*` - fix control second pass resize - fix api face-hires +- fix hunyuandit set attention processor +- fix civitai download without name ## Update for 2024-06-13 diff --git a/modules/sd_models.py b/modules/sd_models.py index c8b0b9397..5ceed8d5d 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -1381,9 +1381,10 @@ def set_attn(pipe, attention): modules = [getattr(pipe, n, None) for n in module_names] modules = [m for m in modules if isinstance(m, torch.nn.Module) and hasattr(m, "set_attn_processor")] for module in modules: - print('HERE', module.__class__.__name__) if module.__class__.__name__ in ['SD3Transformer2DModel']: module.set_attn_processor(p.JointAttnProcessor2_0()) + elif module.__class__.__name__ in ['HunyuanDiT2DModel']: + pass else: module.set_attn_processor(attention) diff --git a/modules/sd_vae_taesd.py b/modules/sd_vae_taesd.py index 0c4689767..d5edbd11f 100644 --- a/modules/sd_vae_taesd.py +++ b/modules/sd_vae_taesd.py @@ -57,7 +57,7 @@ def Decoder(latent_channels=4): ) -class TAESD2(nn.Module): +class TAESD2(nn.Module): # pylint: disable=abstract-method latent_magnitude = 3 latent_shift = 0.5 @@ -142,13 +142,16 @@ def model(model_class = 'sd', model_type = 'decoder'): def decode(latents): + global previous_warnings # pylint: disable=global-statement from modules import shared model_class = shared.sd_model_type if model_class == 'ldm': model_class = 'sd' dtype = devices.dtype_vae if devices.dtype_vae != torch.bfloat16 else torch.float16 # taesd does not support bf16 if 'sd' not in model_class: - shared.log.warning(f'TAESD unsupported model type: {model_class}') + if not previous_warnings: + previous_warnings = True + shared.log.warning(f'TAESD unsupported model type: {model_class}') return Image.new('RGB', (8, 8), color = (0, 0, 0)) vae = taesd_models[f'{model_class}-decoder'] if vae is None: From 67d46760d67a7fea8c235b1e991f7e96b3c0ba75 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 15 Jun 2024 17:09:05 -0400 Subject: [PATCH 13/81] update wiki --- wiki | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wiki b/wiki index 23dc2fc80..4e01da914 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit 23dc2fc800495d20319ea19ebf8e3fb8a52c1ce6 +Subproject commit 4e01da914a578ba0db26907da53640a49d7ecb2b From 26cff4557c7d1eba5ada9beffdb04d61c05a027d Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 16 Jun 2024 08:00:35 -0400 Subject: [PATCH 14/81] handle fallback theme --- CHANGELOG.md | 23 ++++++++++++++++------- modules/shared.py | 5 +++-- modules/theme.py | 4 +++- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e25dfff1..f3b797be4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,17 +1,26 @@ # Change Log for SD.Next +## Pending + +- Diffusers==0.30.0 +- https://github.com/huggingface/diffusers/issues/8579 +- https://github.com/huggingface/diffusers/issues/8546 +- https://github.com/huggingface/diffusers/pull/8566 +- https://github.com/huggingface/diffusers/pull/8506 + ## Update for 2024-06-15 -- sd3 enable taesd preview and non-full quality mode -- sd3 simplified loading of model in single-file safetensors format -- css tweaks for standardui -- force apply vae config on model load - fix unsaturated outputs -- restructure api examples: `cli/api-*` +- support for `torch-directml` **0.2.2**, thanks @lshqqytiger! +- **sd3** enable taesd preview and non-full quality mode +- **sd3** simplified loading of model in single-file safetensors format +- fix unsaturated outputs, force apply vae config on model load - fix control second pass resize - fix api face-hires -- fix hunyuandit set attention processor +- fix **hunyuandit** set attention processor - fix civitai download without name +- css tweaks for standardui +- restructure api examples: `cli/api-*` +- handle theme fallback when invalid theme is specified ## Update for 2024-06-13 diff --git a/modules/shared.py b/modules/shared.py index 571f06052..91638dd1c 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -330,8 +330,9 @@ def temp_disable_extensions(): modules.shared.opts.data['theme_type'] = 'None' modules.shared.opts.data['gradio_theme'] = theme_name else: - modules.shared.opts.data['theme_type'] = 'None' - modules.shared.opts.data['gradio_theme'] = theme_name + modules.shared.log.error(f'UI theme invalid: theme="{theme_name}" available={["standard/*", "modern/*", "none/*"]} fallback="standard/black-teal"') + modules.shared.opts.data['theme_type'] = 'Standard' + modules.shared.opts.data['gradio_theme'] = 'black-teal' for ext in disable_themes: if ext.lower() not in opts.disabled_extensions: diff --git a/modules/theme.py b/modules/theme.py index 8dfde3e22..26bb39858 100644 --- a/modules/theme.py +++ b/modules/theme.py @@ -91,7 +91,6 @@ def reload_gradio_theme(): 'font_mono':['IBM Plex Mono', 'ui-monospace', 'Consolas', 'monospace'] } gradio_theme = gr.themes.Base(**default_font_params) - available_themes = list_themes() if theme_name not in available_themes: modules.shared.log.error(f'UI theme invalid: type={modules.shared.opts.theme_type} theme="{theme_name}" available={available_themes}') @@ -99,6 +98,9 @@ def reload_gradio_theme(): theme_name = 'black-teal' elif modules.shared.opts.theme_type == 'Modern': theme_name = 'Default' + else: + modules.shared.opts.theme_type = 'Standard' + theme_name = 'black-teal' modules.shared.opts.data['gradio_theme'] = theme_name From 76b504fb0434b1612f3fcc5cf6d99e0d5821c180 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 16 Jun 2024 08:04:14 -0400 Subject: [PATCH 15/81] sd3 base lora support --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f3b797be4..ffa2f578e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - support for `torch-directml` **0.2.2**, thanks @lshqqytiger! - **sd3** enable taesd preview and non-full quality mode +- **sd3** enable base LoRA support - **sd3** simplified loading of model in single-file safetensors format - fix unsaturated outputs, force apply vae config on model load - fix control second pass resize From 66d0293e0c5eaa156b67e4583f626d5599c8ab48 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 16 Jun 2024 08:06:34 -0400 Subject: [PATCH 16/81] fix control resize --- modules/control/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/control/run.py b/modules/control/run.py index 7b27aed4e..b6cfb69eb 100644 --- a/modules/control/run.py +++ b/modules/control/run.py @@ -192,7 +192,7 @@ def control_run(units: List[unit.Unit] = [], inputs: List[Image.Image] = [], ini p.refiner_prompt = refiner_prompt p.refiner_negative = refiner_negative if p.enable_hr and (p.hr_resize_x == 0 or p.hr_resize_y == 0): - p.hr_upscale_to_x, p.hr_upscale_to_y = 8 * int(p.width * p.hr_scale / 8), 8 * int(p.height * p.hr_scale / 8) + p.hr_upscale_to_x, p.hr_upscale_to_y = 8 * int(width_before * p.hr_scale / 8), 8 * int(height_before * p.hr_scale / 8) elif p.enable_hr and (p.hr_upscale_to_x == 0 or p.hr_upscale_to_y == 0): p.hr_upscale_to_x, p.hr_upscale_to_y = 8 * int(p.hr_resize_x / 8), 8 * int(hr_resize_y / 8) From 5587280b552689bc7fe2d74b6c4fd246c2e2899a Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 16 Jun 2024 08:25:16 -0400 Subject: [PATCH 17/81] fix adetailer --- CHANGELOG.md | 1 + javascript/inputAccordion.js | 66 ++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 javascript/inputAccordion.js diff --git a/CHANGELOG.md b/CHANGELOG.md index ffa2f578e..a4307eb66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ - fix api face-hires - fix **hunyuandit** set attention processor - fix civitai download without name +- fix compatibility with latest adetailer - css tweaks for standardui - restructure api examples: `cli/api-*` - handle theme fallback when invalid theme is specified diff --git a/javascript/inputAccordion.js b/javascript/inputAccordion.js new file mode 100644 index 000000000..d4c577505 --- /dev/null +++ b/javascript/inputAccordion.js @@ -0,0 +1,66 @@ +function inputAccordionChecked(id, checked) { + const accordion = gradioApp().getElementById(id); + accordion.visibleCheckbox.checked = checked; + accordion.onVisibleCheckboxChange(); +} + +function setupAccordion(accordion) { + const labelWrap = accordion.querySelector('.label-wrap'); + const gradioCheckbox = gradioApp().querySelector(`#${accordion.id}-checkbox input`); + const extra = gradioApp().querySelector(`#${accordion.id}-extra`); + const span = labelWrap.querySelector('span'); + let linked = true; + + const isOpen = () => labelWrap.classList.contains('open'); + + const observerAccordionOpen = new MutationObserver((mutations) => { + mutations.forEach((mutationRecord) => { + accordion.classList.toggle('input-accordion-open', isOpen()); + + if (linked) { + accordion.visibleCheckbox.checked = isOpen(); + accordion.onVisibleCheckboxChange(); + } + }); + }); + observerAccordionOpen.observe(labelWrap, { attributes: true, attributeFilter: ['class'] }); + + if (extra) { + labelWrap.insertBefore(extra, labelWrap.lastElementChild); + } + + accordion.onChecked = (checked) => { + if (isOpen() !== checked) { + labelWrap.click(); + } + }; + + const visibleCheckbox = document.createElement('INPUT'); + visibleCheckbox.type = 'checkbox'; + visibleCheckbox.checked = isOpen(); + visibleCheckbox.id = `${accordion.id}-visible-checkbox`; + visibleCheckbox.className = `${gradioCheckbox.className} input-accordion-checkbox`; + span.insertBefore(visibleCheckbox, span.firstChild); + + accordion.visibleCheckbox = visibleCheckbox; + accordion.onVisibleCheckboxChange = () => { + if (linked && isOpen() !== visibleCheckbox.checked) { + labelWrap.click(); + } + + gradioCheckbox.checked = visibleCheckbox.checked; + updateInput(gradioCheckbox); + }; + + visibleCheckbox.addEventListener('click', (event) => { + linked = false; + event.stopPropagation(); + }); + visibleCheckbox.addEventListener('input', accordion.onVisibleCheckboxChange); +} + +onUiLoaded(() => { + for (const accordion of gradioApp().querySelectorAll('.input-accordion')) { + setupAccordion(accordion); + } +}); From a80fb381474cb8b255db494c23d3433f00374c69 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 16 Jun 2024 08:35:15 -0400 Subject: [PATCH 18/81] fix theme preview with no themes --- javascript/ui.js | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/javascript/ui.js b/javascript/ui.js index a3ff22542..8b4a5ce4a 100644 --- a/javascript/ui.js +++ b/javascript/ui.js @@ -471,19 +471,23 @@ function toggleCompact(val, old) { function previewTheme() { let name = gradioApp().getElementById('setting_gradio_theme').querySelectorAll('input')?.[0].value || ''; - fetch('/file=html/themes.json').then((res) => { - res.json().then((themes) => { - const theme = themes.find((t) => t.id === name); - if (theme) { - window.open(theme.subdomain, '_blank'); - } else { - const el = document.getElementById('theme-preview') || createThemeElement(); - el.style.display = el.style.display === 'block' ? 'none' : 'block'; - name = name.replace('/', '-'); - el.src = `/file=html/${name}.jpg`; - } - }); - }); + fetch('/file=html/themes.json') + .then((res) => { + res.json() + .then((themes) => { + const theme = Array.isArray(themes) ? themes.find((t) => t.id === name) : null; + if (theme) { + window.open(theme.subdomain, '_blank'); + } else { + const el = document.getElementById('theme-preview') || createThemeElement(); + el.style.display = el.style.display === 'block' ? 'none' : 'block'; + name = name.replace('/', '-'); + el.src = `/file=html/${name}.jpg`; + } + }) + .catch((e) => console.error('previewTheme:', e)); + }) + .catch((e) => console.error('previewTheme:', e)); } async function browseFolder() { From 9644063cd64a3bfbc8daba62c3da7753917ac068 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 16 Jun 2024 11:38:29 -0400 Subject: [PATCH 19/81] add generic t5 loader --- CHANGELOG.md | 24 ++++++++++++++++-------- extensions-builtin/Lora/networks.py | 1 - modules/control/units/xs_pipe.py | 6 +++--- modules/model_sd3.py | 27 +++++++++++++++++++-------- modules/sd_models.py | 22 +++++++++++++++------- 5 files changed, 53 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a4307eb66..2affef04b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,19 +3,27 @@ ## Pending - Diffusers==0.30.0 -- https://github.com/huggingface/diffusers/issues/8579 - https://github.com/huggingface/diffusers/issues/8546 - https://github.com/huggingface/diffusers/pull/8566 -- https://github.com/huggingface/diffusers/pull/8506 +- https://github.com/huggingface/diffusers/pull/8584 -## Update for 2024-06-15 +## Update for 2024-06-16 + +### Improvements + +- support for T5 text-encoder loader in **all** models that use T5 + *example*: load FP8 quantized T5 text-encoder into PixArt Sigma +- support for `torch-directml` **0.2.2**, thanks @lshqqytiger! + new directml is finally based on modern `torch` 2.3.1! +- **sd3** enable taesd preview and non-full quality mode +- **sd3** enable base LoRA support +- **sd3** simplified loading of model in single-file safetensors format + loading sd3 can now be performed fully offline + +### Fixes -- support for `torch-directml` **0.2.2**, thanks @lshqqytiger! -- **sd3** enable taesd preview and non-full quality mode -- **sd3** enable base LoRA support -- **sd3** simplified loading of model in single-file safetensors format - fix unsaturated outputs, force apply vae config on model load -- fix control second pass resize +- fix control second pass resize - fix api face-hires - fix **hunyuandit** set attention processor - fix civitai download without name diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py index 4e45a86bd..574b223bf 100644 --- a/extensions-builtin/Lora/networks.py +++ b/extensions-builtin/Lora/networks.py @@ -162,7 +162,6 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No list_available_networks() networks_on_disk = [available_network_aliases.get(name, None) for name in names] failed_to_load_networks = [] - recompile_model = False if shared.compiled_model_state is not None and shared.compiled_model_state.is_compiled: if len(names) == len(shared.compiled_model_state.lora_model): diff --git a/modules/control/units/xs_pipe.py b/modules/control/units/xs_pipe.py index 14581c0f1..7e717b542 100644 --- a/modules/control/units/xs_pipe.py +++ b/modules/control/units/xs_pipe.py @@ -1048,7 +1048,7 @@ def __call__( self.upcast_vae() latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype) - if not output_type == "latent": + if output_type != "latent": # make sure the VAE is in float32 mode, as it overflows in float16 needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast @@ -1064,7 +1064,7 @@ def __call__( else: image = latents - if not output_type == "latent": + if output_type != "latent": # apply watermark if available if self.watermark is not None: image = self.watermark.apply_watermark(image) @@ -1907,7 +1907,7 @@ def __call__( self.controlnet.to("cpu") torch.cuda.empty_cache() - if not output_type == "latent": + if output_type != "latent": image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[ 0 ] diff --git a/modules/model_sd3.py b/modules/model_sd3.py index e2004c1f4..312976183 100644 --- a/modules/model_sd3.py +++ b/modules/model_sd3.py @@ -13,7 +13,6 @@ def load_sd3(fn=None, cache_dir=None, config=None): from modules import devices, modelloader - modelloader.hf_login() repo_id = 'stabilityai/stable-diffusion-3-medium-diffusers' model_id = 'stabilityai/stable-diffusion-3-medium-diffusers' dtype = torch.float16 @@ -56,6 +55,7 @@ def load_sd3(fn=None, cache_dir=None, config=None): else: kwargs = {} else: + modelloader.hf_login() model_id = repo_id loader = diffusers.StableDiffusion3Pipeline.from_pretrained pipe = loader( @@ -71,34 +71,45 @@ def load_sd3(fn=None, cache_dir=None, config=None): return pipe -def load_te3(pipe, te3=None, cache_dir=None): +def load_t5(pipe, module, te3=None, cache_dir=None): from modules import devices, modelloader - modelloader.hf_login() repo_id = 'stabilityai/stable-diffusion-3-medium-diffusers' - if pipe is None or not hasattr(pipe, 'text_encoder_3'): + if pipe is None or not hasattr(pipe, module): return pipe if 'fp16' in te3.lower(): - pipe.text_encoder_3 = transformers.T5EncoderModel.from_pretrained( + modelloader.hf_login() + t5 = transformers.T5EncoderModel.from_pretrained( repo_id, subfolder='text_encoder_3', # torch_dtype=dtype, cache_dir=cache_dir, torch_dtype=pipe.text_encoder.dtype, ) + setattr(pipe, module, t5) elif 'fp8' in te3.lower(): + modelloader.hf_login() from installer import install install('bitsandbytes', quiet=True) quantization_config = transformers.BitsAndBytesConfig(load_in_8bit=True) - pipe.text_encoder_3 = transformers.T5EncoderModel.from_pretrained( + t5 = transformers.T5EncoderModel.from_pretrained( repo_id, subfolder='text_encoder_3', quantization_config=quantization_config, cache_dir=cache_dir, torch_dtype=pipe.text_encoder.dtype, ) + setattr(pipe, module, t5) + """ + if hasattr(pipe, 'remove_all_hooks'): + pipe.remove_all_hooks() + nn = getattr(pipe, module) + import accelerate + accelerate.hooks.remove_hook_from_module(nn, recurse=True) + nn.to(device=devices.device) + """ else: - pipe.text_encoder_3 = None - if getattr(pipe, 'text_encoder_3', None) is not None and getattr(pipe, 'tokenizer_3', None) is None: + setattr(pipe, module, None) + if getattr(pipe, 'text_encoder_3', None) is not None and getattr(pipe, 'tokenizer_3', None) is None: # not needed anymore pipe.tokenizer_3 = transformers.T5TokenizerFast.from_pretrained( repo_id, subfolder='tokenizer_3', diff --git a/modules/sd_models.py b/modules/sd_models.py index 5ceed8d5d..81214bae5 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -997,7 +997,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No elif model_type in ['PixArt-Sigma']: # forced pipeline try: # shared.opts.data['cuda_dtype'] = 'FP32' # override - shared.opts.data['diffusers_model_cpu_offload'] = True # override + # shared.opts.data['diffusers_model_cpu_offload'] = True # override devices.set_cuda_params() sd_model = diffusers.PixArtSigmaPipeline.from_pretrained( checkpoint_info.path, @@ -1168,7 +1168,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No move_model(sd_model, devices.device) timer.record("move") - reload_text_encoder() + reload_text_encoder(initial=True) if shared.opts.ipex_optimize: sd_model = sd_models_compile.ipex_optimize(sd_model) @@ -1527,11 +1527,19 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None, timer=None, shared.log.info(f'Model load finished: {memory_stats()} cached={len(checkpoints_loaded.keys())}') -def reload_text_encoder(): - if hasattr(shared.sd_model, 'text_encoder_3'): - from modules.model_sd3 import load_te3 - shared.log.debug(f'Load: TE3={shared.opts.sd_te3}') - load_te3(shared.sd_model, shared.opts.sd_te3, cache_dir=shared.opts.diffusers_dir) +def reload_text_encoder(initial=False): + if initial and (shared.opts.sd_te3 is None or shared.opts.sd_te3 == 'None'): + return # dont unload + signature = inspect.signature(shared.sd_model.__class__.__init__, follow_wrapped=True, eval_str=True).parameters + t5 = [k for k, v in signature.items() if 'T5EncoderModel' in str(v)] + if len(t5) > 0: + from modules.model_sd3 import load_t5 + shared.log.debug(f'Load: t5={shared.opts.sd_te3} module="{t5[0]}"') + load_t5(pipe=shared.sd_model, module=t5[0], te3=shared.opts.sd_te3, cache_dir=shared.opts.diffusers_dir) + elif hasattr(shared.sd_model, 'text_encoder_3'): + from modules.model_sd3 import load_t5 + shared.log.debug(f'Load: t5={shared.opts.sd_te3} module="text_encoder_3"') + load_t5(pipe=shared.sd_model, module='text_encoder_3', te3=shared.opts.sd_te3, cache_dir=shared.opts.diffusers_dir) def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model', force=False): From 10de19bd055c1a5129fcfecc09bee2791f3a5ca9 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 16 Jun 2024 12:02:31 -0400 Subject: [PATCH 20/81] extra networks add link to source url --- CHANGELOG.md | 2 ++ modules/ui_extra_networks.py | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2affef04b..42bf77ff8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,8 @@ - **sd3** enable base LoRA support - **sd3** simplified loading of model in single-file safetensors format loading sd3 can now be performed fully offline +- extra networks: info display now contains link to source url if model if its known + works for civitai and huggingface models ### Fixes diff --git a/modules/ui_extra_networks.py b/modules/ui_extra_networks.py index 7a49ce660..bd5238cfb 100644 --- a/modules/ui_extra_networks.py +++ b/modules/ui_extra_networks.py @@ -812,15 +812,22 @@ def show_details(text, img, desc, info, meta, description, prompt, negative, par Preview Embedded{item.preview.startswith('data:')} ''' # desc = f'Name: {os.path.basename(item.name)}\nDescription: {item.description}\nPrompt: {item.prompt}\nNegative: {item.negative}\nExtra: {item.extra}\n' + if item.name.startswith('Diffusers'): + url = item.name.replace('Diffusers/', '') + url = f'https://huggingface.co/models/{url}' if url is not None else 'N/A' + else: + url = info.get('id', None) if info is not None else None + url = f'civitai.com/models/{url}' if url is not None else 'N/A' text = f'''

{item.name}

- +
+ {lora} {model} From 8c94047cf33e99faf4a7c34f6f7bfd409e26fc6f Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 16 Jun 2024 12:28:38 -0400 Subject: [PATCH 21/81] fix invalid sampler warning --- CHANGELOG.md | 3 +- modules/control/run.py | 3 +- modules/txt2img.py | 3 +- modules/unipc/uni_pc.py | 80 ----------------------------------------- 4 files changed, 4 insertions(+), 85 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42bf77ff8..05e09a158 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ loading sd3 can now be performed fully offline - extra networks: info display now contains link to source url if model if its known works for civitai and huggingface models +- css tweaks for standardui ### Fixes @@ -30,7 +31,7 @@ - fix **hunyuandit** set attention processor - fix civitai download without name - fix compatibility with latest adetailer -- css tweaks for standardui +- fix invalid sampler warning - restructure api examples: `cli/api-*` - handle theme fallback when invalid theme is specified diff --git a/modules/control/run.py b/modules/control/run.py index b6cfb69eb..944474d50 100644 --- a/modules/control/run.py +++ b/modules/control/run.py @@ -90,8 +90,7 @@ def control_run(units: List[unit.Unit] = [], inputs: List[Image.Image] = [], ini shared.log.warning('Sampler: invalid') sampler_index = 0 if hr_sampler_index is None: - shared.log.warning('Sampler: invalid') - hr_sampler_index = 0 + hr_sampler_index = sampler_index p = StableDiffusionProcessingControl( prompt = prompt, diff --git a/modules/txt2img.py b/modules/txt2img.py index 56abf3de4..76b0a7c45 100644 --- a/modules/txt2img.py +++ b/modules/txt2img.py @@ -35,8 +35,7 @@ def txt2img(id_task, shared.log.warning('Sampler: invalid') sampler_index = 0 if hr_sampler_index is None: - shared.log.warning('Sampler: invalid') - hr_sampler_index = 0 + hr_sampler_index = sampler_index p = processing.StableDiffusionProcessingTxt2Img( sd_model=shared.sd_model, diff --git a/modules/unipc/uni_pc.py b/modules/unipc/uni_pc.py index ca7fdd7dc..6ba3a31fa 100644 --- a/modules/unipc/uni_pc.py +++ b/modules/unipc/uni_pc.py @@ -14,86 +14,6 @@ def __init__( continuous_beta_0=0.1, continuous_beta_1=20., ): - """Create a wrapper class for the forward SDE (VP type). - - *** - Update: We support discrete-time diffusion models by implementing a picewise linear interpolation for log_alpha_t. - We recommend to use schedule='discrete' for the discrete-time diffusion models, especially for high-resolution images. - *** - - The forward SDE ensures that the condition distribution q_{t|0}(x_t | x_0) = N ( alpha_t * x_0, sigma_t^2 * I ). - We further define lambda_t = log(alpha_t) - log(sigma_t), which is the half-logSNR (described in the DPM-Solver paper). - Therefore, we implement the functions for computing alpha_t, sigma_t and lambda_t. For t in [0, T], we have: - - log_alpha_t = self.marginal_log_mean_coeff(t) - sigma_t = self.marginal_std(t) - lambda_t = self.marginal_lambda(t) - - Moreover, as lambda(t) is an invertible function, we also support its inverse function: - - t = self.inverse_lambda(lambda_t) - - =============================================================== - - We support both discrete-time DPMs (trained on n = 0, 1, ..., N-1) and continuous-time DPMs (trained on t in [t_0, T]). - - 1. For discrete-time DPMs: - - For discrete-time DPMs trained on n = 0, 1, ..., N-1, we convert the discrete steps to continuous time steps by: - t_i = (i + 1) / N - e.g. for N = 1000, we have t_0 = 1e-3 and T = t_{N-1} = 1. - We solve the corresponding diffusion ODE from time T = 1 to time t_0 = 1e-3. - - Args: - betas: A `torch.Tensor`. The beta array for the discrete-time DPM. (See the original DDPM paper for details) - alphas_cumprod: A `torch.Tensor`. The cumprod alphas for the discrete-time DPM. (See the original DDPM paper for details) - - Note that we always have alphas_cumprod = cumprod(betas). Therefore, we only need to set one of `betas` and `alphas_cumprod`. - - **Important**: Please pay special attention for the args for `alphas_cumprod`: - The `alphas_cumprod` is the \hat{alpha_n} arrays in the notations of DDPM. Specifically, DDPMs assume that - q_{t_n | 0}(x_{t_n} | x_0) = N ( \sqrt{\hat{alpha_n}} * x_0, (1 - \hat{alpha_n}) * I ). - Therefore, the notation \hat{alpha_n} is different from the notation alpha_t in DPM-Solver. In fact, we have - alpha_{t_n} = \sqrt{\hat{alpha_n}}, - and - log(alpha_{t_n}) = 0.5 * log(\hat{alpha_n}). - - - 2. For continuous-time DPMs: - - We support two types of VPSDEs: linear (DDPM) and cosine (improved-DDPM). The hyperparameters for the noise - schedule are the default settings in DDPM and improved-DDPM: - - Args: - beta_min: A `float` number. The smallest beta for the linear schedule. - beta_max: A `float` number. The largest beta for the linear schedule. - cosine_s: A `float` number. The hyperparameter in the cosine schedule. - cosine_beta_max: A `float` number. The hyperparameter in the cosine schedule. - T: A `float` number. The ending time of the forward process. - - =============================================================== - - Args: - schedule: A `str`. The noise schedule of the forward SDE. 'discrete' for discrete-time DPMs, - 'linear' or 'cosine' for continuous-time DPMs. - Returns: - A wrapper object of the forward SDE (VP type). - - =============================================================== - - Example: - - # For discrete-time DPMs, given betas (the beta array for n = 0, 1, ..., N - 1): - >>> ns = NoiseScheduleVP('discrete', betas=betas) - - # For discrete-time DPMs, given alphas_cumprod (the \hat{alpha_n} array for n = 0, 1, ..., N - 1): - >>> ns = NoiseScheduleVP('discrete', alphas_cumprod=alphas_cumprod) - - # For continuous-time DPMs (VPSDE), linear schedule: - >>> ns = NoiseScheduleVP('linear', continuous_beta_0=0.1, continuous_beta_1=20.) - - """ - if schedule not in ['discrete', 'linear', 'cosine']: raise ValueError(f"Unsupported noise schedule {schedule}. The schedule needs to be 'discrete' or 'linear' or 'cosine'") From c55571118dc1a900124a4b7d23aaac1840184d3d Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 16 Jun 2024 14:19:20 -0400 Subject: [PATCH 22/81] fix starting from non git repo --- CHANGELOG.md | 1 + installer.py | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 05e09a158..e3df4a652 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ - fix civitai download without name - fix compatibility with latest adetailer - fix invalid sampler warning +- fix starting from non git repo - restructure api examples: `cli/api-*` - handle theme fallback when invalid theme is specified diff --git a/installer.py b/installer.py index 2abb8b33d..3a7a91e00 100644 --- a/installer.py +++ b/installer.py @@ -1019,7 +1019,7 @@ def get_version(force=False): 'url': origin.replace('\n', '') + '/tree/' + branch_name.replace('\n', '') } except Exception: - version = { 'app': 'sd.next', 'version': 'unknown' } + version = { 'app': 'sd.next', 'version': 'unknown', 'branch': 'unknown' } try: cwd = os.getcwd() os.chdir('extensions-builtin/sdnext-modernui') @@ -1035,9 +1035,7 @@ def get_version(force=False): def check_ui(ver): - if ver is None: - return - if ver['branch'] == ver['ui']: + if ver is None or 'branch' not in ver or 'ui' not in ver or ver['branch'] == ver['ui']: return log.debug(f'Branch mismatch: sdnext={ver["branch"]} ui={ver["ui"]}') cwd = os.getcwd() From 4c7b4f382ef615731ca7d14b72735f81d326ec4a Mon Sep 17 00:00:00 2001 From: Disty0 Date: Sun, 16 Jun 2024 21:47:20 +0300 Subject: [PATCH 23/81] Fix NNCF with T5 --- modules/sd_hijack.py | 19 +++++++++++++++++++ modules/sd_models_compile.py | 14 ++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index b811f33bf..6894c8ff9 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -283,6 +283,25 @@ def forward(self, input_ids): return torch.stack(vecs) +class NNCF_T5DenseGatedActDense(torch.nn.Module): # forward can't find what self is without creating a class + def __init__(self, T5DenseGatedActDense): + super().__init__() + self.wi_0 = T5DenseGatedActDense.wi_0 + self.wi_1 = T5DenseGatedActDense.wi_1 + self.wo = T5DenseGatedActDense.wo + self.dropout = T5DenseGatedActDense.dropout + self.act = T5DenseGatedActDense.act + + def forward(self, hidden_states): + hidden_gelu = self.act(self.wi_0(hidden_states)) + hidden_linear = self.wi_1(hidden_states) + hidden_states = hidden_gelu * hidden_linear + hidden_states = self.dropout(hidden_states) + hidden_states = hidden_states.to(torch.float32) # this line needs to be forced to fp32 + hidden_states = self.wo(hidden_states) + return hidden_states + + def add_circular_option_to_conv_2d(): conv2d_constructor = torch.nn.Conv2d.__init__ diff --git a/modules/sd_models_compile.py b/modules/sd_models_compile.py index c6006db25..f69b126b0 100644 --- a/modules/sd_models_compile.py +++ b/modules/sd_models_compile.py @@ -58,9 +58,23 @@ def apply_compile_to_model(sd_model, function, options, op=None): sd_model.text_encoder = None sd_model.text_encoder = sd_model.decoder_pipe.text_encoder = function(sd_model.decoder_pipe.text_encoder) else: + if op == "nncf" and sd_model.text_encoder.__class__.__name__ == "T5EncoderModel": + from modules.sd_hijack import NNCF_T5DenseGatedActDense # T5DenseGatedActDense uses fp32 + for i in range(len(sd_model.text_encoder.encoder.block)): + sd_model.text_encoder.encoder.block[i].layer[1].DenseReluDense = NNCF_T5DenseGatedActDense( + sd_model.text_encoder.encoder.block[i].layer[1].DenseReluDense + ) sd_model.text_encoder = function(sd_model.text_encoder) if hasattr(sd_model, 'text_encoder_2') and hasattr(sd_model.text_encoder_2, 'config'): sd_model.text_encoder_2 = function(sd_model.text_encoder_2) + if hasattr(sd_model, 'text_encoder_3') and hasattr(sd_model.text_encoder_2, 'config'): + if op == "nncf" and sd_model.text_encoder_3.__class__.__name__ == "T5EncoderModel": + from modules.sd_hijack import NNCF_T5DenseGatedActDense # T5DenseGatedActDense uses fp32 + for i in range(len(sd_model.text_encoder_3.encoder.block)): + sd_model.text_encoder_3.encoder.block[i].layer[1].DenseReluDense = NNCF_T5DenseGatedActDense( + sd_model.text_encoder_3.encoder.block[i].layer[1].DenseReluDense + ) + sd_model.text_encoder_3 = function(sd_model.text_encoder_3) if hasattr(sd_model, 'prior_pipe') and hasattr(sd_model, 'prior_text_encoder'): sd_model.prior_text_encoder = None sd_model.prior_text_encoder = sd_model.prior_pipe.text_encoder = function(sd_model.prior_pipe.text_encoder) From 77a3f0ab2f4770d98a29d993d1b621d33ee7caa5 Mon Sep 17 00:00:00 2001 From: Disty0 Date: Sun, 16 Jun 2024 21:49:41 +0300 Subject: [PATCH 24/81] Cleanup --- modules/sd_models_compile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_models_compile.py b/modules/sd_models_compile.py index f69b126b0..a4b4fb8af 100644 --- a/modules/sd_models_compile.py +++ b/modules/sd_models_compile.py @@ -67,7 +67,7 @@ def apply_compile_to_model(sd_model, function, options, op=None): sd_model.text_encoder = function(sd_model.text_encoder) if hasattr(sd_model, 'text_encoder_2') and hasattr(sd_model.text_encoder_2, 'config'): sd_model.text_encoder_2 = function(sd_model.text_encoder_2) - if hasattr(sd_model, 'text_encoder_3') and hasattr(sd_model.text_encoder_2, 'config'): + if hasattr(sd_model, 'text_encoder_3') and hasattr(sd_model.text_encoder_3, 'config'): if op == "nncf" and sd_model.text_encoder_3.__class__.__name__ == "T5EncoderModel": from modules.sd_hijack import NNCF_T5DenseGatedActDense # T5DenseGatedActDense uses fp32 for i in range(len(sd_model.text_encoder_3.encoder.block)): From f2b7e08e7a25d32b106cb57122a4392fc69f17a0 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 16 Jun 2024 14:59:25 -0400 Subject: [PATCH 25/81] update hidiffusion --- CHANGELOG.md | 4 ++- modules/hidiffusion/hidiffusion.py | 40 ++++++++++++++++-------------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e3df4a652..853d5876f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,11 +14,12 @@ - support for T5 text-encoder loader in **all** models that use T5 *example*: load FP8 quantized T5 text-encoder into PixArt Sigma - support for `torch-directml` **0.2.2**, thanks @lshqqytiger! - new directml is finally based on modern `torch` 2.3.1! + *note*: new directml is finally based on modern `torch` 2.3.1! - **sd3** enable taesd preview and non-full quality mode - **sd3** enable base LoRA support - **sd3** simplified loading of model in single-file safetensors format loading sd3 can now be performed fully offline +- **sd3** add support for nncf compressed weights, thanks @Disty0! - extra networks: info display now contains link to source url if model if its known works for civitai and huggingface models - css tweaks for standardui @@ -26,6 +27,7 @@ ### Fixes - fix unsaturated outputs, force apply vae config on model load +- fix hidiffusion handling of non-square aspect ratios, thanks @ShenZhang-Shin! - fix control second pass resize - fix api face-hires - fix **hunyuandit** set attention processor diff --git a/modules/hidiffusion/hidiffusion.py b/modules/hidiffusion/hidiffusion.py index df866bce8..7874f03af 100644 --- a/modules/hidiffusion/hidiffusion.py +++ b/modules/hidiffusion/hidiffusion.py @@ -1,4 +1,5 @@ from typing import Type, Dict, Any, Tuple, Optional +import math import torch import torch.nn.functional as F from diffusers.utils.torch_utils import is_torch_version @@ -100,15 +101,18 @@ def forward( # reference: https://github.com/microsoft/Swin-Transformer def window_partition(x, window_size, shift_size, H, W): B, _N, C = x.shape - # H, W = int(N**0.5), int(N**0.5) x = x.view(B,H,W,C) + if H % 2 != 0 or W % 2 != 0: + from modules.errors import log + log.warning('HiDiffusion: The feature size is not divisible by 2') + x = F.interpolate(x.permute(0,3,1,2).contiguous(), size=(window_size[0]*2, window_size[1]*2), mode='bicubic').permute(0,2,3,1).contiguous() if type(shift_size) == list or type(shift_size) == tuple: if shift_size[0] > 0: x = torch.roll(x, shifts=(-shift_size[0], -shift_size[1]), dims=(1, 2)) else: if shift_size > 0: x = torch.roll(x, shifts=(-shift_size, -shift_size), dims=(1, 2)) - x = x.view(B, H // window_size[0], window_size[0], W // window_size[1], window_size[1], C) + x = x.view(B, 2, window_size[0], 2, window_size[1], C) windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size[0], window_size[1], C) windows = windows.view(-1, window_size[0] * window_size[1], C) return windows @@ -116,15 +120,17 @@ def window_partition(x, window_size, shift_size, H, W): def window_reverse(windows, window_size, H, W, shift_size): B, _N, C = windows.shape windows = windows.view(-1, window_size[0], window_size[1], C) - B = int(windows.shape[0] / (H * W / window_size[0] / window_size[1])) - x = windows.view(B, H // window_size[0], W // window_size[1], window_size[0], window_size[1], -1) - x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + B = int(windows.shape[0] / 4) # 2x2 + x = windows.view(B, 2, 2, window_size[0], window_size[1], -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, window_size[0]*2, window_size[1]*2, -1) if type(shift_size) == list or type(shift_size) == tuple: if shift_size[0] > 0: x = torch.roll(x, shifts=(shift_size[0], shift_size[1]), dims=(1, 2)) else: if shift_size > 0: x = torch.roll(x, shifts=(shift_size, shift_size), dims=(1, 2)) + if H % 2 != 0 or W % 2 != 0: + x = F.interpolate(x.permute(0,3,1,2).contiguous(), size=(H, W), mode='bicubic').permute(0,2,3,1).contiguous() x = x.view(B, H*W, C) return x @@ -152,9 +158,9 @@ def window_reverse(windows, window_size, H, W, shift_size): rand_num = torch.rand(1) _B, N, _C = hidden_states.shape ori_H, ori_W = self.info['size'] - downsample_ratio = int(((ori_H*ori_W) // N)**0.5) - H, W = (ori_H//downsample_ratio, ori_W//downsample_ratio) - widow_size = (H//2, W//2) + downsample_ratio = round(((ori_H*ori_W) / N)**0.5) + H, W = (math.ceil(ori_H/downsample_ratio), math.ceil(ori_W/downsample_ratio)) + widow_size = (math.ceil(H/2), math.ceil(W/2)) if rand_num <= 0.25: shift_size = (0,0) if rand_num > 0.25 and rand_num <= 0.5: @@ -351,9 +357,11 @@ def custom_forward(*inputs): if i == 0: if self.aggressive_raunet and self.timestep >= self.T1_start and self.timestep < self.T1_end: - hidden_states = F.avg_pool2d(hidden_states, kernel_size=(2,2)) + self.info["upsample_size"] = (hidden_states.shape[2], hidden_states.shape[3]) + hidden_states = F.avg_pool2d(hidden_states, kernel_size=(2,2),ceil_mode=True) elif self.timestep < self.T1: - hidden_states = F.avg_pool2d(hidden_states, kernel_size=(2,2)) + self.info["upsample_size"] = (hidden_states.shape[2], hidden_states.shape[3]) + hidden_states = F.avg_pool2d(hidden_states, kernel_size=(2,2),ceil_mode=True) output_states = output_states + (hidden_states,) if self.downsamplers is not None: @@ -458,11 +466,9 @@ def fix_scale(first, second): # TODO hidiffusion breaks hidden_scale.shape on 3r )[0] if i == 1: if self.aggressive_raunet and self.timestep >= self.T1_start and self.timestep < self.T1_end: - re_size = (int(hidden_states.shape[-2] * 2), int(hidden_states.shape[-1] * 2)) - hidden_states = F.interpolate(hidden_states, size=re_size, mode='bicubic') + hidden_states = F.interpolate(hidden_states, size=self.info["upsample_size"], mode='bicubic') elif self.timestep < self.T1: - re_size = (int(hidden_states.shape[-2] * 2), int(hidden_states.shape[-1] * 2)) - hidden_states = F.interpolate(hidden_states, size=re_size, mode='bicubic') + hidden_states = F.interpolate(hidden_states, size=self.info["upsample_size"], mode='bicubic') if self.upsamplers is not None: for upsampler in self.upsamplers: @@ -589,9 +595,6 @@ def forward(self, hidden_states: torch.Tensor, scale = 1.0) -> torch.Tensor: # p self.T1 = int(aggressive_step/50 * self.max_timestep) else: self.T1 = int(self.max_timestep * self.T1_ratio) - if self.timestep < self.T1: - if ori_H != hidden_states.shape[2] and ori_W != hidden_states.shape[3]: - hidden_states = F.interpolate(hidden_states, scale_factor=2.0, mode='bicubic') self.timestep += 1 if self.timestep == self.max_timestep: self.timestep = 0 @@ -629,9 +632,10 @@ def apply_hidiffusion( make_block_fn = make_diffusers_unet_2d_condition model.unet.__class__ = make_block_fn(model.unet.__class__) diffusion_model = model.unet if hasattr(model, "unet") else model - diffusion_model.num_upsamplers += 2 + diffusion_model.num_upsamplers += 12 diffusion_model.info = { 'size': None, + 'upsample_size': None, 'hooks': [], 'text_to_img_controlnet': hasattr(model, 'controlnet'), 'is_inpainting_task': model.__class__ in auto_pipeline.AUTO_INPAINT_PIPELINES_MAPPING.values(), From deefbe59a4b7588d8c25e371a5a27cf904608bcb Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 16 Jun 2024 15:56:25 -0400 Subject: [PATCH 26/81] sd3 sampler shift support --- CHANGELOG.md | 19 ++++++++---- modules/images.py | 3 ++ modules/sd_samplers_diffusers.py | 4 ++- modules/shared.py | 29 ++++++++++--------- .../textual_inversion/textual_inversion.py | 11 ------- scripts/xyz_grid.py | 1 + train/templates/style.txt | 1 - train/templates/style_filewords.txt | 1 - train/templates/subject.txt | 1 - train/templates/subject_filewords.txt | 1 - train/templates/unknown.txt | 1 - train/templates/unknown_filewords.txt | 1 - webui.py | 1 - 13 files changed, 35 insertions(+), 39 deletions(-) delete mode 100644 train/templates/style.txt delete mode 100644 train/templates/style_filewords.txt delete mode 100644 train/templates/subject.txt delete mode 100644 train/templates/subject_filewords.txt delete mode 100644 train/templates/unknown.txt delete mode 100644 train/templates/unknown_filewords.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 853d5876f..20b92b865 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,17 +9,23 @@ ## Update for 2024-06-16 -### Improvements +### Improvements: SD3 + +- enable taesd preview and non-full quality mode +- enable base LoRA support +- simplified loading of model in single-file safetensors format + loading sd3 can now be performed fully offline +- add support for nncf compressed weights, thanks @Disty0! +- add support for sampler shift for Euler FlowMatch + see *settings -> samplers*, also available as param in xyz grid + higher shift means model will spend more time on structure and less on details + +### Improvements: General - support for T5 text-encoder loader in **all** models that use T5 *example*: load FP8 quantized T5 text-encoder into PixArt Sigma - support for `torch-directml` **0.2.2**, thanks @lshqqytiger! *note*: new directml is finally based on modern `torch` 2.3.1! -- **sd3** enable taesd preview and non-full quality mode -- **sd3** enable base LoRA support -- **sd3** simplified loading of model in single-file safetensors format - loading sd3 can now be performed fully offline -- **sd3** add support for nncf compressed weights, thanks @Disty0! - extra networks: info display now contains link to source url if model if its known works for civitai and huggingface models - css tweaks for standardui @@ -37,6 +43,7 @@ - fix starting from non git repo - restructure api examples: `cli/api-*` - handle theme fallback when invalid theme is specified +- remove obsolete training code leftovers ## Update for 2024-06-13 diff --git a/modules/images.py b/modules/images.py index caf3a7cc4..abb5608db 100644 --- a/modules/images.py +++ b/modules/images.py @@ -54,6 +54,9 @@ def image_grid(imgs, batch_size=1, rows=None): cols = math.ceil(len(imgs) / rows) params = script_callbacks.ImageGridLoopParams(imgs, cols, rows) script_callbacks.image_grid_callback(params) + imgs = [i for i in imgs if i is not None] if imgs is not None else [] + if len(imgs) == 0: + return None w, h = imgs[0].size grid = Image.new('RGB', size=(params.cols * w, params.rows * h), color=shared.opts.grid_background) for i, img in enumerate(params.imgs): diff --git a/modules/sd_samplers_diffusers.py b/modules/sd_samplers_diffusers.py index 25c94cfb2..997ca5c3a 100644 --- a/modules/sd_samplers_diffusers.py +++ b/modules/sd_samplers_diffusers.py @@ -66,7 +66,7 @@ 'Euler EDM': { }, 'DPM++ 2M EDM': { 'solver_order': 2, 'solver_type': 'midpoint', 'final_sigmas_type': 'zero', 'algorithm_type': 'dpmsolver++' }, 'CMSI': { }, #{ 'sigma_min': 0.002, 'sigma_max': 80.0, 'sigma_data': 0.5, 's_noise': 1.0, 'rho': 7.0, 'clip_denoised': True }, - 'Euler FlowMatch': { }, + 'Euler FlowMatch': { 'shift': 1, }, 'IPNDM': { }, } @@ -156,6 +156,8 @@ def __init__(self, name, constructor, model, **kwargs): self.config['beta_start'] = shared.opts.schedulers_beta_start if 'beta_end' in self.config and shared.opts.schedulers_beta_end > 0: self.config['beta_end'] = shared.opts.schedulers_beta_end + if 'shift' in self.config and shared.opts.schedulers_shift != 1: + self.config['shift'] = shared.opts.schedulers_shift if 'rescale_betas_zero_snr' in self.config: self.config['rescale_betas_zero_snr'] = shared.opts.schedulers_rescale_betas if 'timestep_spacing' in self.config and shared.opts.schedulers_timestep_spacing != 'default' and shared.opts.schedulers_timestep_spacing is not None: diff --git a/modules/shared.py b/modules/shared.py index 91638dd1c..4c5782c19 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -41,7 +41,7 @@ xformers_available = False locking_available = True clip_model = None -interrogator = modules.interrogate.InterrogateModels("interrogate") +interrogator = modules.interrogate.InterrogateModels(os.path.join("models", "interrogate")) sd_upscalers = [] face_restorers = [] tab_names = [] @@ -716,6 +716,7 @@ def temp_disable_extensions(): 'schedulers_timesteps_range': OptionInfo(1000, "Timesteps range", gr.Slider, {"minimum": 250, "maximum": 4000, "step": 1}), 'schedulers_timesteps': OptionInfo('', "Timesteps"), "schedulers_rescale_betas": OptionInfo(False, "Rescale betas with zero terminal SNR", gr.Checkbox), + 'schedulers_shift': OptionInfo(1, "Sampler shift", gr.Slider, {"minimum": 0.1, "maximum": 10, "step": 0.1}), # managed from ui.py for backend original k-diffusion "schedulers_sep_kdiffusers": OptionInfo("

K-Diffusion specific config

", "", gr.HTML), @@ -775,19 +776,19 @@ def temp_disable_extensions(): "control_unload_processor": OptionInfo(False, "Processor unload after use"), })) -options_templates.update(options_section(('training', "Training"), { - "unload_models_when_training": OptionInfo(False, "Move VAE and CLIP to RAM when training"), - "pin_memory": OptionInfo(True, "Pin training dataset to memory"), - "save_optimizer_state": OptionInfo(False, "Save resumable optimizer state when training"), - "save_training_settings_to_txt": OptionInfo(True, "Save training settings to a text file"), - "dataset_filename_word_regex": OptionInfo("", "Filename word regex"), - "dataset_filename_join_string": OptionInfo(" ", "Filename join string"), - "embeddings_templates_dir": OptionInfo(os.path.join(paths.script_path, 'train', 'templates'), "Embeddings train templates directory", folder=True), - "training_image_repeats_per_epoch": OptionInfo(1, "Image repeats per epoch", gr.Slider, {"minimum": 1, "maximum": 100, "step": 1}), - "training_write_csv_every": OptionInfo(0, "Save loss CSV file every n steps"), - "training_enable_tensorboard": OptionInfo(False, "Enable tensorboard logging"), - "training_tensorboard_save_images": OptionInfo(False, "Save generated images within tensorboard"), - "training_tensorboard_flush_every": OptionInfo(120, "Tensorboard flush period"), +options_templates.update(options_section(('interrogate', "Interrogate"), { # "Training" section disabled so just a placeholder + "unload_models_when_training": OptionInfo(False, "Move VAE and CLIP to RAM when training", gr.Checkbox, { "visible": False }), + "pin_memory": OptionInfo(True, "Pin training dataset to memory", gr.Checkbox, { "visible": False }), + "save_optimizer_state": OptionInfo(False, "Save resumable optimizer state when training", gr.Checkbox, { "visible": False }), + "save_training_settings_to_txt": OptionInfo(True, "Save training settings to a text file", gr.Checkbox, { "visible": False }), + "dataset_filename_word_regex": OptionInfo("", "Filename word regex", gr.Textbox, { "visible": False }), + "dataset_filename_join_string": OptionInfo(" ", "Filename join string", gr.Textbox, { "visible": False }), + "embeddings_templates_dir": OptionInfo("", "Embeddings train templates directory", gr.Textbox, { "visible": False }), + "training_image_repeats_per_epoch": OptionInfo(1, "Image repeats per epoch", gr.Slider, {"minimum": 1, "maximum": 100, "step": 1, "visible": False }), + "training_write_csv_every": OptionInfo(0, "Save loss CSV file every n steps", gr.Number, { "visible": False }), + "training_enable_tensorboard": OptionInfo(False, "Enable tensorboard logging", gr.Checkbox, { "visible": False }), + "training_tensorboard_save_images": OptionInfo(False, "Save generated images within tensorboard", gr.Checkbox, { "visible": False }), + "training_tensorboard_flush_every": OptionInfo(120, "Tensorboard flush period", gr.Number, { "visible": False }), })) options_templates.update(options_section(('interrogate', "Interrogate"), { diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index e0fcbc55f..b0779716b 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -13,17 +13,6 @@ debug = shared.log.trace if os.environ.get('SD_TI_DEBUG', None) is not None else lambda *args, **kwargs: None debug('Trace: TEXTUAL INVERSION') TokenToAdd = namedtuple("TokenToAdd", ["clip_l", "clip_g"]) -TextualInversionTemplate = namedtuple("TextualInversionTemplate", ["name", "path"]) -textual_inversion_templates = {} - - -def list_textual_inversion_templates(): - textual_inversion_templates.clear() - for root, _dirs, fns in os.walk(shared.opts.embeddings_templates_dir): - for fn in fns: - path = os.path.join(root, fn) - textual_inversion_templates[fn] = TextualInversionTemplate(fn, path) - return textual_inversion_templates def list_embeddings(*dirs): diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index a7ff92532..6ba75c775 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -251,6 +251,7 @@ def __init__(self, *args, **kwargs): AxisOption("[Sampler] Sigma tmax", float, apply_field("s_tmax")), AxisOption("[Sampler] Sigma Churn", float, apply_field("s_churn")), AxisOption("[Sampler] Sigma noise", float, apply_field("s_noise")), + AxisOption("[Sampler] Shift", float, apply_setting("schedulers_shift")), AxisOption("[Sampler] ETA", float, apply_setting("scheduler_eta")), AxisOption("[Sampler] Solver order", int, apply_setting("schedulers_solver_order")), AxisOption("[Second pass] Upscaler", str, apply_field("hr_upscaler"), choices=lambda: [*shared.latent_upscale_modes, *[x.name for x in shared.sd_upscalers]]), diff --git a/train/templates/style.txt b/train/templates/style.txt deleted file mode 100644 index b6cd90b7d..000000000 --- a/train/templates/style.txt +++ /dev/null @@ -1 +0,0 @@ -a painting, art by [name] diff --git a/train/templates/style_filewords.txt b/train/templates/style_filewords.txt deleted file mode 100644 index 2b73960fc..000000000 --- a/train/templates/style_filewords.txt +++ /dev/null @@ -1 +0,0 @@ -photo of [filewords], art by [name] diff --git a/train/templates/subject.txt b/train/templates/subject.txt deleted file mode 100644 index 1c5a3ae93..000000000 --- a/train/templates/subject.txt +++ /dev/null @@ -1 +0,0 @@ -photo of [name] diff --git a/train/templates/subject_filewords.txt b/train/templates/subject_filewords.txt deleted file mode 100644 index be2cbdfe7..000000000 --- a/train/templates/subject_filewords.txt +++ /dev/null @@ -1 +0,0 @@ -photo of [name], [filewords] diff --git a/train/templates/unknown.txt b/train/templates/unknown.txt deleted file mode 100644 index 259b7ef34..000000000 --- a/train/templates/unknown.txt +++ /dev/null @@ -1 +0,0 @@ -[name] diff --git a/train/templates/unknown_filewords.txt b/train/templates/unknown_filewords.txt deleted file mode 100644 index 43a4ef17a..000000000 --- a/train/templates/unknown_filewords.txt +++ /dev/null @@ -1 +0,0 @@ -[name], [filewords] diff --git a/webui.py b/webui.py index 2d66d36f5..b305767a8 100644 --- a/webui.py +++ b/webui.py @@ -118,7 +118,6 @@ def initialize(): shared.opts.onchange("temp_dir", gr_tempdir.on_tmpdir_changed) timer.startup.record("onchange") - modules.textual_inversion.textual_inversion.list_textual_inversion_templates() shared.reload_hypernetworks() shared.prompt_styles.reload() From 60376ebe2fb50430bd753c6cd2f9a4565682e38e Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 16 Jun 2024 16:36:14 -0400 Subject: [PATCH 27/81] css fix tags overflow --- javascript/sdnext.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/javascript/sdnext.css b/javascript/sdnext.css index e33a11f7a..73b4b34e8 100644 --- a/javascript/sdnext.css +++ b/javascript/sdnext.css @@ -220,7 +220,7 @@ table.settings-value-table td { padding: 0.4em; border: 1px solid #ccc; max-widt .extra-network-cards .card .preview { box-shadow: var(--button-shadow); min-height: 30px; } .extra-network-cards .card:hover .overlay { background: rgba(0, 0, 0, 0.70); } .extra-network-cards .card:hover .preview { box-shadow: none; filter: grayscale(100%); } -.extra-network-cards .card .overlay .tags { display: none; overflow-wrap: anywhere; position: absolute; top: 100%; z-index: 20; background: var(--body-background-fill); } +.extra-network-cards .card .overlay .tags { display: none; overflow-wrap: anywhere; position: absolute; top: 100%; z-index: 20; background: var(--body-background-fill); overflow-x: hidden; overflow-y: auto; max-height: 333px; } .extra-network-cards .card .overlay .tag { padding: 2px; margin: 2px; background: rgba(70, 70, 70, 0.60); font-size: var(--text-md); cursor: pointer; display: inline-block; } .extra-network-cards .card .actions>span { padding: 4px; font-size: 34px !important; } .extra-network-cards .card .actions>span:hover { color: var(--highlight-color); } From a1f53add94a2d49cb7f037f3a62a323ee44968ea Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 16 Jun 2024 17:00:35 -0400 Subject: [PATCH 28/81] fix typos --- installer.py | 2 +- modules/devices.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/installer.py b/installer.py index 3a7a91e00..9c01b90e5 100644 --- a/installer.py +++ b/installer.py @@ -1047,7 +1047,7 @@ def check_ui(ver): if ver['branch'] == ver['ui']: log.debug(f'Branch synchronized: {ver["branch"]}') else: - log.debug(f'Branch synch failed: sdnext={ver["branch"]} ui={ver["ui"]}') + log.debug(f'Branch sync failed: sdnext={ver["branch"]} ui={ver["ui"]}') except Exception as e: log.debug(f'Branch switch: {e}') os.chdir(cwd) diff --git a/modules/devices.py b/modules/devices.py index 675fcac19..f3bdbaffe 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -175,7 +175,7 @@ def set_cuda_sync_mode(mode): return try: import ctypes - log.info(f'Set cuda synch: mode={mode}') + log.info(f'Set cuda sync: mode={mode}') torch.cuda.set_device(torch.device(get_optimal_device_name())) ctypes.CDLL('libcudart.so').cudaSetDeviceFlags({'auto': 0, 'spin': 1, 'yield': 2, 'block': 4}[mode]) except Exception: From d3802f9fbc049107e91420da3188c1a645c73b61 Mon Sep 17 00:00:00 2001 From: AI-Casanova <54461896+AI-Casanova@users.noreply.github.com> Date: Sun, 16 Jun 2024 18:00:30 -0500 Subject: [PATCH 29/81] SD3 Prompt Parsing, preliminary --- modules/processing_args.py | 6 ++- modules/prompt_parser_diffusers.py | 74 +++++++++++++++++++++++++----- 2 files changed, 68 insertions(+), 12 deletions(-) diff --git a/modules/processing_args.py b/modules/processing_args.py index a8e7c33a3..163c51efd 100644 --- a/modules/processing_args.py +++ b/modules/processing_args.py @@ -106,7 +106,7 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2 shared.log.error(f'Sampler timesteps: {e}') else: shared.log.warning(f'Sampler: sampler={model.scheduler.__class__.__name__} timesteps not supported') - if shared.opts.prompt_attention != 'Fixed attention' and ('StableDiffusion' in model.__class__.__name__ or 'StableCascade' in model.__class__.__name__) and 'Onnx' not in model.__class__.__name__ and 'StableDiffusion3' not in model.__class__.__name__: + if shared.opts.prompt_attention != 'Fixed attention' and ('StableDiffusion' in model.__class__.__name__ or 'StableCascade' in model.__class__.__name__) and 'Onnx' not in model.__class__.__name__: try: prompt_parser_diffusers.encode_prompts(model, p, prompts, negative_prompts, steps=steps, clip_skip=clip_skip) parser = shared.opts.prompt_attention @@ -126,6 +126,8 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2 args['prompt_embeds_pooled'] = p.positive_pooleds[0].unsqueeze(0) elif 'XL' in model.__class__.__name__ and len(getattr(p, 'positive_pooleds', [])) > 0: args['pooled_prompt_embeds'] = p.positive_pooleds[0] + elif 'StableDiffusion3' in model.__class__.__name__ and len(getattr(p, 'positive_pooleds', [])) > 0: + args['pooled_prompt_embeds'] = p.positive_pooleds[0] else: args['prompt'] = prompts if 'negative_prompt' in possible: @@ -135,6 +137,8 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2 args['negative_prompt_embeds_pooled'] = p.negative_pooleds[0].unsqueeze(0) if 'XL' in model.__class__.__name__ and len(getattr(p, 'negative_pooleds', [])) > 0: args['negative_pooled_prompt_embeds'] = p.negative_pooleds[0] + if 'StableDiffusion3' in model.__class__.__name__ and len(getattr(p, 'negative_pooleds', [])) > 0: + args['negative_pooled_prompt_embeds'] = p.negative_pooleds[0] else: if 'PixArtSigmaPipeline' in model.__class__.__name__: # pixart-sigma pipeline throws list-of-list for negative prompt args['negative_prompt'] = negative_prompts[0] diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py index 3c472b227..0d410af87 100644 --- a/modules/prompt_parser_diffusers.py +++ b/modules/prompt_parser_diffusers.py @@ -217,7 +217,7 @@ def prepare_embedding_providers(pipe, clip_skip) -> list[EmbeddingsProvider]: embeddings_providers = [] if 'StableCascade' in pipe.__class__.__name__: embedding_type = -(clip_skip) - elif 'XL' in pipe.__class__.__name__: + elif 'XL' in pipe.__class__.__name__ or 'SD3' in pipe.__class__.__name__: embedding_type = -(clip_skip + 1) else: embedding_type = clip_skip @@ -257,15 +257,33 @@ def pad_to_same_length(pipe, embeds): embeds[i] = embed return embeds +def split_prompts(prompt, SD3 = False): + if prompt.find("TE2:") != -1: + prompt, prompt2 = prompt.split("TE2:") + else: + prompt2 = prompt + + if prompt.find("TE3:") != -1: + prompt, prompt3 = prompt.split("TE3:") + elif prompt2.find("TE3:") != -1: + prompt2, prompt3 = prompt2.split("TE3:") + else: + prompt3 = prompt + + prompt = prompt.strip() + prompt2 = " " if prompt2.strip() == "" else prompt2.strip() + prompt3 = " " if prompt3.strip() == "" else prompt3.strip() + + if SD3 and prompt3 != " ": + ps, ws = get_prompts_with_weights(prompt3) + prompt3 = ", ".join(ps) + return prompt, prompt2, prompt3 def get_weighted_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", clip_skip: int = None): device = pipe.device if str(pipe.device) != 'meta' else devices.device - prompt_split = prompt.split("TE2:") - prompt = prompt_split[0] - prompt_2 = prompt_split[-1] - neg_prompt_split = neg_prompt.split("TE2:") - neg_prompt_2 = neg_prompt_split[-1] - neg_prompt = neg_prompt_split[0] + SD3 = hasattr(pipe, 'text_encoder_3') + prompt, prompt_2, prompt_3 = split_prompts(prompt, SD3) + neg_prompt, neg_prompt_2, neg_prompt_3 = split_prompts(prompt, SD3) if prompt != prompt_2: ps = [get_prompts_with_weights(p) for p in [prompt, prompt_2]] @@ -285,8 +303,8 @@ def get_weighted_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", c embedding_providers = prepare_embedding_providers(pipe, clip_skip) prompt_embeds = [] negative_prompt_embeds = [] - pooled_prompt_embeds = None - negative_pooled_prompt_embeds = None + pooled_prompt_embeds = [] + negative_pooled_prompt_embeds = [] for i in range(len(embedding_providers)): t0 = time.time() text = list(positives[i]) @@ -310,8 +328,25 @@ def get_weighted_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", c embed, ntokens = embedding_providers[i].get_embeddings_for_weighted_prompt_fragments(text_batch=[negatives[i]], fragment_weights_batch=[negative_weights[i]], device=device, should_return_tokens=True) negative_prompt_embeds.append(embed) debug(f'Prompt: unpadded shape={prompt_embeds[0].shape} TE{i+1} ptokens={torch.count_nonzero(ptokens)} ntokens={torch.count_nonzero(ntokens)} time={(time.time() - t0):.3f}') - - if prompt_embeds[-1].shape[-1] > 768: + if SD3: + t0 = time.time() + for i in range(len(prompt_embeds)): + pooled_prompt_embeds.append(prompt_embeds[i][ + torch.arange(prompt_embeds[i].shape[0], device=device), + (ptokens.to(dtype=torch.int, device=device) == 49407) + .int() + .argmax(dim=-1), + ]) + negative_pooled_prompt_embeds.append(negative_prompt_embeds[i][ + torch.arange(negative_prompt_embeds[i].shape[0], device=device), + (ntokens.to(dtype=torch.int, device=device) == 49407) + .int() + .argmax(dim=-1), + ]) + pooled_prompt_embeds = torch.cat(pooled_prompt_embeds, dim=-1) + negative_pooled_prompt_embeds = torch.cat(negative_pooled_prompt_embeds, dim=-1) + debug(f'Prompt: pooled shape={pooled_prompt_embeds[0].shape} time={(time.time() - t0):.3f}') + elif prompt_embeds[-1].shape[-1] > 768: t0 = time.time() if shared.opts.diffusers_pooled == "weighted": pooled_prompt_embeds = prompt_embeds[-1][ @@ -341,4 +376,21 @@ def get_weighted_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", c debug(f'Prompt: positive={prompt_embeds.shape if prompt_embeds is not None else None} pooled={pooled_prompt_embeds.shape if pooled_prompt_embeds is not None else None} negative={negative_prompt_embeds.shape if negative_prompt_embeds is not None else None} pooled={negative_pooled_prompt_embeds.shape if negative_pooled_prompt_embeds is not None else None}') if prompt_embeds.shape[1] != negative_prompt_embeds.shape[1]: [prompt_embeds, negative_prompt_embeds] = pad_to_same_length(pipe, [prompt_embeds, negative_prompt_embeds]) + if SD3: + t5_prompt_embed = pipe._get_t5_prompt_embeds( + prompt=prompt_3, + num_images_per_prompt=prompt_embeds.shape[0], + device=pipe.device, + ) + prompt_embeds = torch.nn.functional.pad( + prompt_embeds, (0, t5_prompt_embed.shape[-1] - prompt_embeds.shape[-1])) + prompt_embeds = torch.cat([prompt_embeds, t5_prompt_embed], dim=-2) + t5_negative_prompt_embed = pipe._get_t5_prompt_embeds( + prompt=neg_prompt_3, + num_images_per_prompt=prompt_embeds.shape[0], + device=pipe.device, + ) + negative_prompt_embeds = torch.nn.functional.pad( + negative_prompt_embeds, (0, t5_negative_prompt_embed.shape[-1] - negative_prompt_embeds.shape[-1])) + negative_prompt_embeds = torch.cat([negative_prompt_embeds, t5_negative_prompt_embed], dim=-2) return prompt_embeds, pooled_prompt_embeds, negative_prompt_embeds, negative_pooled_prompt_embeds From bbaa03a4669ec85fd05a95b0691ef69924db886c Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Mon, 17 Jun 2024 09:39:30 -0400 Subject: [PATCH 30/81] add 4bit t5 --- CHANGELOG.md | 3 +++ modules/model_sd3.py | 13 +++++++++++++ modules/sd_models.py | 10 +++++----- modules/shared.py | 2 +- scripts/xyz_grid.py | 6 ++++++ webui.py | 2 +- 6 files changed, 29 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 20b92b865..a8d77aa81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,12 +13,15 @@ - enable taesd preview and non-full quality mode - enable base LoRA support +- add support for 4bit quantized t5 text encoder + simply select in *settings -> model -> text encoder* - simplified loading of model in single-file safetensors format loading sd3 can now be performed fully offline - add support for nncf compressed weights, thanks @Disty0! - add support for sampler shift for Euler FlowMatch see *settings -> samplers*, also available as param in xyz grid higher shift means model will spend more time on structure and less on details +- add support for selecting text encoder in xyz grid ### Improvements: General diff --git a/modules/model_sd3.py b/modules/model_sd3.py index 312976183..4d8aaf250 100644 --- a/modules/model_sd3.py +++ b/modules/model_sd3.py @@ -86,6 +86,19 @@ def load_t5(pipe, module, te3=None, cache_dir=None): torch_dtype=pipe.text_encoder.dtype, ) setattr(pipe, module, t5) + elif 'fp4' in te3.lower(): + modelloader.hf_login() + from installer import install + install('bitsandbytes', quiet=True) + quantization_config = transformers.BitsAndBytesConfig(load_in_4bit=True) + t5 = transformers.T5EncoderModel.from_pretrained( + repo_id, + subfolder='text_encoder_3', + quantization_config=quantization_config, + cache_dir=cache_dir, + torch_dtype=pipe.text_encoder.dtype, + ) + setattr(pipe, module, t5) elif 'fp8' in te3.lower(): modelloader.hf_login() from installer import install diff --git a/modules/sd_models.py b/modules/sd_models.py index 81214bae5..b06afefe8 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -1528,18 +1528,18 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None, timer=None, def reload_text_encoder(initial=False): - if initial and (shared.opts.sd_te3 is None or shared.opts.sd_te3 == 'None'): + if initial and (shared.opts.sd_text_encoder is None or shared.opts.sd_text_encoder == 'None'): return # dont unload signature = inspect.signature(shared.sd_model.__class__.__init__, follow_wrapped=True, eval_str=True).parameters t5 = [k for k, v in signature.items() if 'T5EncoderModel' in str(v)] if len(t5) > 0: from modules.model_sd3 import load_t5 - shared.log.debug(f'Load: t5={shared.opts.sd_te3} module="{t5[0]}"') - load_t5(pipe=shared.sd_model, module=t5[0], te3=shared.opts.sd_te3, cache_dir=shared.opts.diffusers_dir) + shared.log.debug(f'Load: t5={shared.opts.sd_text_encoder} module="{t5[0]}"') + load_t5(pipe=shared.sd_model, module=t5[0], te3=shared.opts.sd_text_encoder, cache_dir=shared.opts.diffusers_dir) elif hasattr(shared.sd_model, 'text_encoder_3'): from modules.model_sd3 import load_t5 - shared.log.debug(f'Load: t5={shared.opts.sd_te3} module="text_encoder_3"') - load_t5(pipe=shared.sd_model, module='text_encoder_3', te3=shared.opts.sd_te3, cache_dir=shared.opts.diffusers_dir) + shared.log.debug(f'Load: t5={shared.opts.sd_text_encoder} module="text_encoder_3"') + load_t5(pipe=shared.sd_model, module='text_encoder_3', te3=shared.opts.sd_text_encoder, cache_dir=shared.opts.diffusers_dir) def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model', force=False): diff --git a/modules/shared.py b/modules/shared.py index 4c5782c19..bd5fd4369 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -391,7 +391,7 @@ def temp_disable_extensions(): "sd_model_refiner": OptionInfo('None', "Refiner model", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints), "sd_vae": OptionInfo("Automatic", "VAE model", gr.Dropdown, lambda: {"choices": shared_items.sd_vae_items()}, refresh=shared_items.refresh_vae_list), "sd_unet": OptionInfo("None", "UNET model", gr.Dropdown, lambda: {"choices": shared_items.sd_unet_items()}, refresh=shared_items.refresh_unet_list), - "sd_te3": OptionInfo('None', "Text encoder model", gr.Dropdown, lambda: {"choices": ['None', 'T5 FP8', 'T5 FP16']}), + "sd_text_encoder": OptionInfo('None', "Text encoder model", gr.Dropdown, lambda: {"choices": ['None', 'T5 FP4', 'T5 FP8', 'T5 FP16']}), "sd_checkpoint_autoload": OptionInfo(True, "Model autoload on start"), "sd_model_dict": OptionInfo('None', "Use separate base dict", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints), "stream_load": OptionInfo(False, "Load models using stream loading method", gr.Checkbox, {"visible": not native }), diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index 6ba75c775..c4b44282d 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -138,6 +138,11 @@ def apply_vae(p, x, xs): sd_vae.reload_vae_weights(shared.sd_model, vae_file=find_vae(x)) +def apply_te(p, x, xs): + shared.opts.data["sd_text_encoder"] = x + sd_models.reload_text_encoder() + + def apply_styles(p: processing.StableDiffusionProcessingTxt2Img, x: str, _): p.styles.extend(x.split(',')) @@ -230,6 +235,7 @@ def __init__(self, *args, **kwargs): AxisOption("Prompt S/R", str, apply_prompt, fmt=format_value), AxisOption("Model", str, apply_checkpoint, fmt=format_value, cost=1.0, choices=lambda: sorted(sd_models.checkpoints_list)), AxisOption("VAE", str, apply_vae, cost=0.7, choices=lambda: ['None'] + list(sd_vae.vae_dict)), + AxisOption("Text encoder", str, apply_te, cost=0.7, choices=lambda: ['None', 'T5 FP4', 'T5 FP8', 'T5 FP16']), AxisOption("Styles", str, apply_styles, choices=lambda: [s.name for s in shared.prompt_styles.styles.values()]), AxisOption("Seed", int, apply_field("seed")), AxisOption("Steps", int, apply_field("steps")), diff --git a/webui.py b/webui.py index b305767a8..c9af90e75 100644 --- a/webui.py +++ b/webui.py @@ -168,7 +168,7 @@ def load_model(): thread_refiner.join() shared.opts.onchange("sd_model_checkpoint", wrap_queued_call(lambda: modules.sd_models.reload_model_weights(op='model')), call=False) shared.opts.onchange("sd_model_refiner", wrap_queued_call(lambda: modules.sd_models.reload_model_weights(op='refiner')), call=False) - shared.opts.onchange("sd_te3", wrap_queued_call(lambda: modules.sd_models.reload_text_encoder()), call=False) + shared.opts.onchange("sd_text_encoder", wrap_queued_call(lambda: modules.sd_models.reload_text_encoder()), call=False) shared.opts.onchange("sd_model_dict", wrap_queued_call(lambda: modules.sd_models.reload_model_weights(op='dict')), call=False) shared.opts.onchange("sd_vae", wrap_queued_call(lambda: modules.sd_vae.reload_vae_weights()), call=False) shared.opts.onchange("sd_backend", wrap_queued_call(lambda: modules.sd_models.change_backend()), call=False) From 7435caaba5c162cd011401b21713592b21403d7f Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Mon, 17 Jun 2024 10:44:18 -0400 Subject: [PATCH 31/81] fix control api --- CHANGELOG.md | 1 + modules/control/run.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a8d77aa81..d0fd1e531 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ - fix compatibility with latest adetailer - fix invalid sampler warning - fix starting from non git repo +- fix control api negative prompt handling - restructure api examples: `cli/api-*` - handle theme fallback when invalid theme is specified - remove obsolete training code leftovers diff --git a/modules/control/run.py b/modules/control/run.py index 944474d50..5e0749d81 100644 --- a/modules/control/run.py +++ b/modules/control/run.py @@ -55,7 +55,7 @@ def control_set(kwargs): def control_run(units: List[unit.Unit] = [], inputs: List[Image.Image] = [], inits: List[Image.Image] = [], mask: Image.Image = None, unit_type: str = None, is_generator: bool = True, input_type: int = 0, - prompt: str = '', negative: str = '', styles: List[str] = [], + prompt: str = '', negative_prompt: str = '', styles: List[str] = [], steps: int = 20, sampler_index: int = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, cfg_scale: float = 6.0, clip_skip: float = 1.0, image_cfg_scale: float = 6.0, diffusers_guidance_rescale: float = 0.7, pag_scale: float = 0.0, pag_adaptive: float = 0.5, cfg_end: float = 1.0, @@ -94,7 +94,7 @@ def control_run(units: List[unit.Unit] = [], inputs: List[Image.Image] = [], ini p = StableDiffusionProcessingControl( prompt = prompt, - negative_prompt = negative, + negative_prompt = negative_prompt, styles = styles, steps = steps, n_iter = batch_count, From 0de6be462130f6a0198b432000848dc6297c76e6 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Mon, 17 Jun 2024 11:03:24 -0400 Subject: [PATCH 32/81] fix save style without name --- CHANGELOG.md | 1 + .../Lora/ui_extra_networks_lora.py | 2 +- html/locale_en.json | 2 +- html/locale_ko.json | 2 +- modules/shared.py | 4 +- modules/styles.py | 2 +- modules/ui_extra_networks.py | 21 +++---- modules/ui_extra_networks_checkpoints.py | 2 +- modules/ui_extra_networks_hypernets.py | 2 +- modules/ui_extra_networks_styles.py | 3 +- .../ui_extra_networks_textual_inversion.py | 2 +- modules/ui_extra_networks_vae.py | 2 +- modules/ui_prompt_styles.py | 57 ------------------- 13 files changed, 24 insertions(+), 78 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d0fd1e531..ad593c8c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ - fix invalid sampler warning - fix starting from non git repo - fix control api negative prompt handling +- fix saving style without name provided - restructure api examples: `cli/api-*` - handle theme fallback when invalid theme is specified - remove obsolete training code leftovers diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py index d224f4c67..3a612a635 100644 --- a/extensions-builtin/Lora/ui_extra_networks_lora.py +++ b/extensions-builtin/Lora/ui_extra_networks_lora.py @@ -102,7 +102,7 @@ def find_version(): return item except Exception as e: - shared.log.debug(f"Extra networks error: type=lora file={name} {e}") + shared.log.debug(f"Networks error: type=lora file={name} {e}") from modules import errors errors.display('e', 'Lora') return None diff --git a/html/locale_en.json b/html/locale_en.json index 4b773c730..800ee6e58 100644 --- a/html/locale_en.json +++ b/html/locale_en.json @@ -230,7 +230,7 @@ {"id":"","label":"Control Options","localized":"","hint":"Settings related the Control tab"}, {"id":"","label":"Training","localized":"","hint":"Settings related to model training configuration and directories"}, {"id":"","label":"Interrogate","localized":"","hint":"Settings related to interrogation configuration"}, - {"id":"","label":"Extra Networks","localized":"","hint":"Settings related to extra networks user interface, extra networks multiplier defaults, and configuration"}, + {"id":"","label":"Networks","localized":"","hint":"Settings related to networks user interface, networks multiplier defaults, and configuration"}, {"id":"","label":"Licenses","localized":"","hint":"View licenses of all additional included libraries"}, {"id":"","label":"Show all pages","localized":"","hint":"Show all settings pages"} ], diff --git a/html/locale_ko.json b/html/locale_ko.json index c333d934c..0ddf62afb 100644 --- a/html/locale_ko.json +++ b/html/locale_ko.json @@ -48,7 +48,7 @@ {"id":"","label":"Interrogate\nDeepBooru","localized":"DeepBooru 모델 사용","hint":"DeepBooru 모델을 사용해 이미지에서 설명을 추출한다."} ], "extra networks": [ - {"id":"","label":"Extra networks tab order","localized":"엑스트라 네트워크 탭 순서","hint":"Comma-separated list of tab names; tabs listed here will appear in the extra networks UI first and in order lsited"}, + {"id":"","label":"Networks tab order","localized":"엑스트라 네트워크 탭 순서","hint":"Comma-separated list of tab names; tabs listed here will appear in the extra networks UI first and in order lsited"}, {"id":"","label":"UI position","localized":"UI 위치","hint":""}, {"id":"","label":"UI height (%)","localized":"UI 높이 (%)","hint":""}, {"id":"","label":"UI sidebar width (%)","localized":"UI 사이드바 너비 (%)","hint":""}, diff --git a/modules/shared.py b/modules/shared.py index bd5fd4369..d65084053 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -806,9 +806,9 @@ def temp_disable_extensions(): "deepbooru_filter_tags": OptionInfo("", "Filter out tags from deepbooru output"), })) -options_templates.update(options_section(('extra_networks', "Extra Networks"), { +options_templates.update(options_section(('extra_networks', "Networks"), { "extra_networks_sep1": OptionInfo("

Extra networks UI

", "", gr.HTML), - "extra_networks": OptionInfo(["All"], "Extra networks", gr.Dropdown, lambda: {"multiselect":True, "choices": ['All'] + [en.title for en in extra_networks]}), + "extra_networks": OptionInfo(["All"], "Networks", gr.Dropdown, lambda: {"multiselect":True, "choices": ['All'] + [en.title for en in extra_networks]}), "extra_networks_sort": OptionInfo("Default", "Sort order", gr.Dropdown, {"choices": ['Default', 'Name [A-Z]', 'Name [Z-A]', 'Date [Newest]', 'Date [Oldest]', 'Size [Largest]', 'Size [Smallest]']}), "extra_networks_view": OptionInfo("gallery", "UI view", gr.Radio, {"choices": ["gallery", "list"]}), "extra_networks_card_cover": OptionInfo("sidebar", "UI position", gr.Radio, {"choices": ["cover", "inline", "sidebar"]}), diff --git a/modules/styles.py b/modules/styles.py index d118800e8..4269b3f55 100644 --- a/modules/styles.py +++ b/modules/styles.py @@ -328,7 +328,7 @@ def save_styles(self, path, verbose=False): "preview": "", } keepcharacters = (' ','.','_') - fn = "".join(c for c in name if c.isalnum() or c in keepcharacters).rstrip() + fn = "".join(c for c in name if c.isalnum() or c in keepcharacters).strip() fn = os.path.join(path, fn + ".json") try: with open(fn, 'w', encoding='utf-8') as f: diff --git a/modules/ui_extra_networks.py b/modules/ui_extra_networks.py index bd5238cfb..505a62145 100644 --- a/modules/ui_extra_networks.py +++ b/modules/ui_extra_networks.py @@ -71,7 +71,7 @@ def get_metadata(page: str = "", item: str = ""): metadata = page.metadata.get(item, 'none') if metadata is None: metadata = '' - # shared.log.debug(f"Extra networks metadata: page='{page}' item={item} len={len(metadata)}") + # shared.log.debug(f"Networks metadata: page='{page}' item={item} len={len(metadata)}") return JSONResponse({"metadata": metadata}) def get_info(page: str = "", item: str = ""): @@ -84,7 +84,7 @@ def get_info(page: str = "", item: str = ""): info = page.find_info(item['filename']) if info is None: info = {} - # shared.log.debug(f"Extra networks info: page='{page.name}' item={item['name']} len={len(info)}") + # shared.log.debug(f"Networks info: page='{page.name}' item={item['name']} len={len(info)}") return JSONResponse({"info": info}) def get_desc(page: str = "", item: str = ""): @@ -97,7 +97,7 @@ def get_desc(page: str = "", item: str = ""): desc = page.find_description(item['filename']) if desc is None: desc = '' - # shared.log.debug(f"Extra networks desc: page='{page.name}' item={item['name']} len={len(desc)}") + # shared.log.debug(f"Networks desc: page='{page.name}' item={item['name']} len={len(desc)}") return JSONResponse({"description": desc}) app.add_api_route("/sd_extra_networks/thumb", fetch_file, methods=["GET"]) @@ -186,7 +186,7 @@ def create_thumb(self): except Exception as e: shared.log.warning(f'Extra network error creating thumbnail: {f} {e}') if created > 0: - shared.log.info(f"Extra network thumbnails: {self.name} created={created}") + shared.log.info(f"Network thumbnails: {self.name} created={created}") self.missing_thumbs.clear() def create_items(self, tabname): @@ -235,7 +235,7 @@ def create_page(self, tabname, skip = False): continue # if not self.is_empty(tgt): subdirs[subdir] = 1 - debug(f"Extra networks: page='{self.name}' subfolders={list(subdirs)}") + debug(f"Networks: page='{self.name}' subfolders={list(subdirs)}") subdirs = OrderedDict(sorted(subdirs.items())) if self.name == 'model': subdirs['Reference'] = 1 @@ -272,7 +272,7 @@ def create_page(self, tabname, skip = False): self.html += ''.join(htmls) self.page_time = time.time() self.html = f"
{subdirs_html}
{self.html}
" - shared.log.debug(f"Extra networks: page='{self.name}' items={len(self.items)} subfolders={len(subdirs)} tab={tabname} folders={self.allowed_directories_for_previews()} list={self.list_time:.2f} thumb={self.preview_time:.2f} desc={self.desc_time:.2f} info={self.info_time:.2f} workers={shared.max_workers} sort={shared.opts.extra_networks_sort}") + shared.log.debug(f"Networks: page='{self.name}' items={len(self.items)} subfolders={len(subdirs)} tab={tabname} folders={self.allowed_directories_for_previews()} list={self.list_time:.2f} thumb={self.preview_time:.2f} desc={self.desc_time:.2f} info={self.info_time:.2f} workers={shared.max_workers} sort={shared.opts.extra_networks_sort}") if len(self.missing_thumbs) > 0: threading.Thread(target=self.create_thumb).start() return self.patch(self.html, tabname) @@ -570,7 +570,7 @@ def toggle_visibility(is_visible): with gr.Group(elem_id=f"{tabname}_extra_details_tabs", visible=False) as ui.details_tabs: with gr.Tabs(): with gr.Tab('Description', elem_classes=['extra-details-tabs']): - desc = gr.Textbox('', show_label=False, lines=8, placeholder="Extra network description...") + desc = gr.Textbox('', show_label=False, lines=8, placeholder="Network description...") ui.details_components.append(desc) with gr.Row(): btn_save_desc = gr.Button('Save', elem_classes=['small-button'], elem_id=f'{tabname}_extra_details_save_desc') @@ -895,7 +895,8 @@ def ui_save_click(): return res def ui_quicksave_click(name): - if name is None: + if name is None or len(name) < 1: + shared.log.warning("Network quick save style: no name provided") return fn = os.path.join(paths.data_path, "params.txt") if os.path.exists(fn): @@ -915,9 +916,9 @@ def ui_quicksave_click(name): } shared.writefile(item, fn, silent=True) if len(prompt) > 0: - shared.log.debug(f"Extra network quick save style: item={name} filename='{fn}'") + shared.log.debug(f"Network quick save style: item={name} filename='{fn}'") else: - shared.log.warning(f"Extra network quick save model: item={name} filename='{fn}' prompt is empty") + shared.log.warning(f"Network quick save model: item={name} filename='{fn}' prompt is empty") def ui_sort_cards(sort_order): if shared.opts.extra_networks_sort != sort_order: diff --git a/modules/ui_extra_networks_checkpoints.py b/modules/ui_extra_networks_checkpoints.py index a6c95ee7c..7a5285d49 100644 --- a/modules/ui_extra_networks_checkpoints.py +++ b/modules/ui_extra_networks_checkpoints.py @@ -64,7 +64,7 @@ def create_item(self, name): record["info"] = self.find_info(checkpoint.filename) record["description"] = self.find_description(checkpoint.filename, record["info"]) except Exception as e: - shared.log.debug(f"Extra networks error: type=model file={name} {e}") + shared.log.debug(f"Networks error: type=model file={name} {e}") return record def list_items(self): diff --git a/modules/ui_extra_networks_hypernets.py b/modules/ui_extra_networks_hypernets.py index caf861938..b6fbbd38f 100644 --- a/modules/ui_extra_networks_hypernets.py +++ b/modules/ui_extra_networks_hypernets.py @@ -27,7 +27,7 @@ def list_items(self): "size": os.path.getsize(path), } except Exception as e: - shared.log.debug(f"Extra networks error: type=hypernetwork file={path} {e}") + shared.log.debug(f"Networks error: type=hypernetwork file={path} {e}") def allowed_directories_for_previews(self): return [shared.opts.hypernetwork_dir] diff --git a/modules/ui_extra_networks_styles.py b/modules/ui_extra_networks_styles.py index d8bb13cfa..f03cb22be 100644 --- a/modules/ui_extra_networks_styles.py +++ b/modules/ui_extra_networks_styles.py @@ -93,11 +93,12 @@ def create_item(self, k): "size": os.path.getsize(style.filename), } except Exception as e: - shared.log.debug(f"Extra networks error: type=style file={k} {e}") + shared.log.debug(f"Networks error: type=style file={k} {e}") return item def list_items(self): items = [self.create_item(k) for k in list(shared.prompt_styles.styles)] + items = [item for item in items if item is not None] self.update_all_previews(items) return items diff --git a/modules/ui_extra_networks_textual_inversion.py b/modules/ui_extra_networks_textual_inversion.py index 3b0ec0948..0e086e55d 100644 --- a/modules/ui_extra_networks_textual_inversion.py +++ b/modules/ui_extra_networks_textual_inversion.py @@ -37,7 +37,7 @@ def create_item(self, embedding: Embedding): record["info"] = self.find_info(embedding.filename) record["description"] = self.find_description(embedding.filename, record["info"]) except Exception as e: - shared.log.debug(f"Extra networks error: type=embedding file={embedding.filename} {e}") + shared.log.debug(f"Networks error: type=embedding file={embedding.filename} {e}") return record def list_items(self): diff --git a/modules/ui_extra_networks_vae.py b/modules/ui_extra_networks_vae.py index a4a212fdf..8a161bbbb 100644 --- a/modules/ui_extra_networks_vae.py +++ b/modules/ui_extra_networks_vae.py @@ -31,7 +31,7 @@ def list_items(self): record["description"] = self.find_description(filename, record["info"]) yield record except Exception as e: - shared.log.debug(f"Extra networks error: type=vae file={filename} {e}") + shared.log.debug(f"Networks error: type=vae file={filename} {e}") def allowed_directories_for_previews(self): return [v for v in [shared.opts.vae_dir] if v is not None] diff --git a/modules/ui_prompt_styles.py b/modules/ui_prompt_styles.py index f0b031eaa..a7f81ee8b 100644 --- a/modules/ui_prompt_styles.py +++ b/modules/ui_prompt_styles.py @@ -46,60 +46,3 @@ def refresh_styles(): class UiPromptStyles: def __init__(self, tabname, main_ui_prompt, main_ui_negative_prompt): # pylint: disable=unused-argument self.dropdown = gr.Dropdown(label="Styles", elem_id=f"{tabname}_styles", choices=[style.name for style in shared.prompt_styles.styles.values()], value=[], multiselect=True) - - """ - def __init__(self, tabname, main_ui_prompt, main_ui_negative_prompt): - self.tabname = tabname - - with gr.Row(elem_id=f"{tabname}_styles_row"): - self.dropdown = gr.Dropdown(label="Styles", show_label=False, elem_id=f"{tabname}_styles", choices=list(shared.prompt_styles.styles), value=[], multiselect=True, tooltip="Styles") - edit_button = ui_components.ToolButton(value=styles_edit_symbol, elem_id=f"{tabname}_styles_edit_button", tooltip="Edit styles") - - with gr.Box(elem_id=f"{tabname}_styles_dialog", elem_classes="popup-dialog") as styles_dialog: - with gr.Row(): - self.selection = gr.Dropdown(label="Styles", elem_id=f"{tabname}_styles_edit_select", choices=list(shared.prompt_styles.styles), value=[], allow_custom_value=True, info="Styles allow you to add custom text to prompt. Use the {prompt} token in style text, and it will be replaced with user's prompt when applying style. Otherwise, style's text will be added to the end of the prompt.") - ui_common.create_refresh_button([self.dropdown, self.selection], shared.prompt_styles.reload, lambda: {"choices": list(shared.prompt_styles.styles)}, f"refresh_{tabname}_styles") - self.materialize = ui_components.ToolButton(value=styles_materialize_symbol, elem_id=f"{tabname}_style_apply", tooltip="Apply all selected styles from the style selction dropdown in main UI to the prompt.") - - with gr.Row(): - self.prompt = gr.Textbox(label="Prompt", show_label=True, elem_id=f"{tabname}_edit_style_prompt", lines=3) - - with gr.Row(): - self.neg_prompt = gr.Textbox(label="Negative prompt", show_label=True, elem_id=f"{tabname}_edit_style_neg_prompt", lines=3) - - with gr.Row(): - self.save = gr.Button('Save', variant='primary', elem_id=f'{tabname}_edit_style_save', visible=False) - self.delete = gr.Button('Delete', variant='primary', elem_id=f'{tabname}_edit_style_delete', visible=False) - self.close = gr.Button('Close', variant='secondary', elem_id=f'{tabname}_edit_style_close') - - self.selection.change( - fn=select_style, - inputs=[self.selection], - outputs=[self.prompt, self.neg_prompt, self.delete, self.save], - show_progress=False, - ) - - self.save.click( - fn=save_style, - inputs=[self.selection, self.prompt, self.neg_prompt], - outputs=[self.delete], - show_progress=False, - ).then(refresh_styles, outputs=[self.dropdown, self.selection], show_progress=False) - - self.delete.click( - fn=delete_style, - _js='function(name){ if(name == "") return ""; return confirm("Delete style " + name + "?") ? name : ""; }', - inputs=[self.selection], - outputs=[self.selection, self.prompt, self.neg_prompt], - show_progress=False, - ).then(refresh_styles, outputs=[self.dropdown, self.selection], show_progress=False) - - self.materialize.click( - fn=materialize_styles, - inputs=[main_ui_prompt, main_ui_negative_prompt, self.dropdown], - outputs=[main_ui_prompt, main_ui_negative_prompt, self.dropdown], - show_progress=False, - ).then(fn=None, _js="function(){update_"+tabname+"_tokens(); closePopup();}", show_progress=False) - - ui_common.setup_dialog(button_show=edit_button, dialog=styles_dialog, button_close=self.close) - """ From a4309110284cdb7a21c6430fd9a301deda809f87 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Mon, 17 Jun 2024 11:13:55 -0400 Subject: [PATCH 33/81] fix t2i-color adapter --- CHANGELOG.md | 1 + modules/control/units/t2iadapter.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad593c8c5..d0b2d7722 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ - fix starting from non git repo - fix control api negative prompt handling - fix saving style without name provided +- fix t2i-color adapter - restructure api examples: `cli/api-*` - handle theme fallback when invalid theme is specified - remove obsolete training code leftovers diff --git a/modules/control/units/t2iadapter.py b/modules/control/units/t2iadapter.py index 1c481398b..80eca41dd 100644 --- a/modules/control/units/t2iadapter.py +++ b/modules/control/units/t2iadapter.py @@ -74,7 +74,7 @@ def __init__(self, model_id: str = None, device = None, dtype = None, load_confi self.model_id: str = model_id self.device = device self.dtype = dtype - self.load_config = { 'cache_dir': cache_dir } + self.load_config = { 'cache_dir': cache_dir, 'use_safetensors': False } if load_config is not None: self.load_config.update(load_config) if model_id is not None: @@ -101,7 +101,7 @@ def load(self, model_id: str = None) -> str: log.error(f'Control {what} model load failed: id="{model_id}" error=unknown model id') return log.debug(f'Control {what} model loading: id="{model_id}" path="{model_path}"') - if model_path.endswith('.pth') or model_path.endswith('.pt') or model_path.endswith('.safetensors'): + if model_path.endswith('.pth') or model_path.endswith('.pt') or model_path.endswith('.safetensors') or model_path.endswith('.bin'): from huggingface_hub import hf_hub_download parts = model_path.split('/') repo_id = f'{parts[0]}/{parts[1]}' From fc665a2bc91831ab121ae2b0463528046804742d Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Mon, 17 Jun 2024 14:49:15 -0400 Subject: [PATCH 34/81] fix has been incorrectly initialized --- CHANGELOG.md | 8 +++++++- modules/pag/pipe_sdxl.py | 6 +++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d0b2d7722..14cd38a34 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,12 @@ - https://github.com/huggingface/diffusers/pull/8566 - https://github.com/huggingface/diffusers/pull/8584 -## Update for 2024-06-16 +## Update for 2024-06-17 + +### Highlights for 2024-06-17 + +Following zero-day **SD3** release, a week later here's a refresh with more than a few improvements. +Other than SD3, highlight is (finally) new release of **Torch-DirectML** as well as ability to use explicit **T5** text-encoder in many supported models! ### Improvements: SD3 @@ -47,6 +52,7 @@ - fix control api negative prompt handling - fix saving style without name provided - fix t2i-color adapter +- fix sdxl "has been incorrectly initialized" - restructure api examples: `cli/api-*` - handle theme fallback when invalid theme is specified - remove obsolete training code leftovers diff --git a/modules/pag/pipe_sdxl.py b/modules/pag/pipe_sdxl.py index 1576926d1..13ba99bd5 100644 --- a/modules/pag/pipe_sdxl.py +++ b/modules/pag/pipe_sdxl.py @@ -446,6 +446,7 @@ def __init__( feature_extractor: CLIPImageProcessor = None, force_zeros_for_empty_prompt: bool = True, add_watermarker: Optional[bool] = None, + requires_aesthetics_score: Optional[bool] = None, # todo: patch SDXLPAG pipeline ): super().__init__() @@ -461,12 +462,11 @@ def __init__( feature_extractor=feature_extractor, ) self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt) + self.register_to_config(requires_aesthetics_score=requires_aesthetics_score) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) - self.default_sample_size = self.unet.config.sample_size - - add_watermarker = add_watermarker if add_watermarker is not None else is_invisible_watermark_available() + add_watermarker = False if add_watermarker: self.watermark = StableDiffusionXLWatermarker() From ed8d2ee1d8fe538ea55427c994e899f1e53b196e Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Mon, 17 Jun 2024 19:22:32 -0400 Subject: [PATCH 35/81] update changelog --- CHANGELOG.md | 1 + modules/prompt_parser_diffusers.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 14cd38a34..ce05525c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ Other than SD3, highlight is (finally) new release of **Torch-DirectML** as well - extra networks: info display now contains link to source url if model if its known works for civitai and huggingface models - css tweaks for standardui +- css tweaks for modernui ### Fixes diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py index 3c472b227..9174426e4 100644 --- a/modules/prompt_parser_diffusers.py +++ b/modules/prompt_parser_diffusers.py @@ -126,7 +126,7 @@ def get_tokens(msg, prompt): except Exception: tokens.append(f'UNK_{i}') token_count = len(ids) - int(has_bos_token) - int(has_eos_token) - shared.log.trace(f'Prompt tokenizer: type={msg} tokens={token_count} {tokens}') + debug(f'Prompt tokenizer: type={msg} tokens={token_count} {tokens}') def encode_prompts(pipe, p, prompts: list, negative_prompts: list, steps: int, clip_skip: typing.Optional[int] = None): From 906b9be121af735451900a274be2799ae9694541 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Mon, 17 Jun 2024 19:22:49 -0400 Subject: [PATCH 36/81] update modernui --- extensions-builtin/sdnext-modernui | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui index 285743a83..8eaf18c49 160000 --- a/extensions-builtin/sdnext-modernui +++ b/extensions-builtin/sdnext-modernui @@ -1 +1 @@ -Subproject commit 285743a83f251ae23e3a4120d15badcead4eab33 +Subproject commit 8eaf18c491ae2a54bb4963cf332fab5303fe1a24 From 549f9ff7619f099a9d0bcabd2b8e2c8446aa39b1 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Mon, 17 Jun 2024 20:37:40 -0400 Subject: [PATCH 37/81] update inputaccordion --- extensions-builtin/sdnext-modernui | 2 +- javascript/base.css | 1 + javascript/inputAccordion.js | 31 ++++++++++-------------------- javascript/sdnext.css | 2 +- javascript/startup.js | 1 + 5 files changed, 14 insertions(+), 23 deletions(-) diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui index 8eaf18c49..dae2c67d8 160000 --- a/extensions-builtin/sdnext-modernui +++ b/extensions-builtin/sdnext-modernui @@ -1 +1 @@ -Subproject commit 8eaf18c491ae2a54bb4963cf332fab5303fe1a24 +Subproject commit dae2c67d826b631dcc343c028c60f478b0437877 diff --git a/javascript/base.css b/javascript/base.css index 799f06613..7daa8b2bd 100644 --- a/javascript/base.css +++ b/javascript/base.css @@ -17,6 +17,7 @@ .tooltip-show { opacity: 0.9; } .tooltip-left { right: unset; left: 1em; } .toolbutton-selected { background: var(--background-fill-primary) !important; } +.input-accordion-checkbox { display: none; } /* live preview */ .progressDiv { position: relative; height: 20px; background: #b4c0cc; margin-bottom: -3px; } diff --git a/javascript/inputAccordion.js b/javascript/inputAccordion.js index d4c577505..f5f49bef1 100644 --- a/javascript/inputAccordion.js +++ b/javascript/inputAccordion.js @@ -10,13 +10,10 @@ function setupAccordion(accordion) { const extra = gradioApp().querySelector(`#${accordion.id}-extra`); const span = labelWrap.querySelector('span'); let linked = true; - const isOpen = () => labelWrap.classList.contains('open'); - const observerAccordionOpen = new MutationObserver((mutations) => { mutations.forEach((mutationRecord) => { accordion.classList.toggle('input-accordion-open', isOpen()); - if (linked) { accordion.visibleCheckbox.checked = isOpen(); accordion.onVisibleCheckboxChange(); @@ -24,15 +21,9 @@ function setupAccordion(accordion) { }); }); observerAccordionOpen.observe(labelWrap, { attributes: true, attributeFilter: ['class'] }); - - if (extra) { - labelWrap.insertBefore(extra, labelWrap.lastElementChild); - } - + if (extra) labelWrap.insertBefore(extra, labelWrap.lastElementChild); accordion.onChecked = (checked) => { - if (isOpen() !== checked) { - labelWrap.click(); - } + if (isOpen() !== checked) labelWrap.click(); }; const visibleCheckbox = document.createElement('INPUT'); @@ -41,13 +32,9 @@ function setupAccordion(accordion) { visibleCheckbox.id = `${accordion.id}-visible-checkbox`; visibleCheckbox.className = `${gradioCheckbox.className} input-accordion-checkbox`; span.insertBefore(visibleCheckbox, span.firstChild); - accordion.visibleCheckbox = visibleCheckbox; accordion.onVisibleCheckboxChange = () => { - if (linked && isOpen() !== visibleCheckbox.checked) { - labelWrap.click(); - } - + if (linked && isOpen() !== visibleCheckbox.checked) labelWrap.click(); gradioCheckbox.checked = visibleCheckbox.checked; updateInput(gradioCheckbox); }; @@ -59,8 +46,10 @@ function setupAccordion(accordion) { visibleCheckbox.addEventListener('input', accordion.onVisibleCheckboxChange); } -onUiLoaded(() => { - for (const accordion of gradioApp().querySelectorAll('.input-accordion')) { - setupAccordion(accordion); - } -}); +// onUiLoaded(() => { +// for (const accordion of gradioApp().querySelectorAll('.input-accordion')) setupAccordion(accordion); +// }); + +function initAccordions() { + for (const accordion of gradioApp().querySelectorAll('.input-accordion')) setupAccordion(accordion); +} diff --git a/javascript/sdnext.css b/javascript/sdnext.css index 73b4b34e8..f2d553106 100644 --- a/javascript/sdnext.css +++ b/javascript/sdnext.css @@ -240,7 +240,7 @@ table.settings-value-table td { padding: 0.4em; border: 1px solid #ccc; max-widt .extra-details > div { overflow-y: auto; min-height: 40vh; max-height: 80vh; align-self: flex-start; } .extra-details td:first-child { font-weight: bold; vertical-align: top; } .extra-details .gradio-image { max-height: 50vh; } - +.input-accordion-checkbox { display: none !important; } /* specific elements */ #modelmerger_interp_description { margin-top: 1em; margin-bottom: 1em; } diff --git a/javascript/startup.js b/javascript/startup.js index ab930645f..f1a44faf5 100644 --- a/javascript/startup.js +++ b/javascript/startup.js @@ -12,6 +12,7 @@ async function initStartup() { initLogMonitor(); initContextMenu(); initDragDrop(); + initAccordions(); initSettings(); initImageViewer(); initGallery(); From 4122c6fbce08b1e3b9d38b8fe3a75edc693f2e75 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Tue, 18 Jun 2024 08:37:14 -0400 Subject: [PATCH 38/81] fix api ip-adapter --- CHANGELOG.md | 3 ++- cli/api-json.py | 55 +++++++++++++++++++++++++++++++++++++++++++ cli/image-encode.py | 32 +++++++++++++++++++++++++ modules/api/models.py | 4 ++-- wiki | 2 +- 5 files changed, 92 insertions(+), 4 deletions(-) create mode 100755 cli/api-json.py create mode 100755 cli/image-encode.py diff --git a/CHANGELOG.md b/CHANGELOG.md index ce05525c3..363020797 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,7 +44,6 @@ Other than SD3, highlight is (finally) new release of **Torch-DirectML** as well - fix unsaturated outputs, force apply vae config on model load - fix hidiffusion handling of non-square aspect ratios, thanks @ShenZhang-Shin! - fix control second pass resize -- fix api face-hires - fix **hunyuandit** set attention processor - fix civitai download without name - fix compatibility with latest adetailer @@ -54,6 +53,8 @@ Other than SD3, highlight is (finally) new release of **Torch-DirectML** as well - fix saving style without name provided - fix t2i-color adapter - fix sdxl "has been incorrectly initialized" +- fix api face-hires +- fix api ip-adapter - restructure api examples: `cli/api-*` - handle theme fallback when invalid theme is specified - remove obsolete training code leftovers diff --git a/cli/api-json.py b/cli/api-json.py new file mode 100755 index 000000000..889b70d01 --- /dev/null +++ b/cli/api-json.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python + +# curl -vX POST http://localhost:7860/sdapi/v1/txt2img --header "Content-Type: application/json" -d @3261.json +import os +import json +import logging +import argparse +import requests +import urllib3 + + +sd_url = os.environ.get('SDAPI_URL', "http://127.0.0.1:7860") +sd_username = os.environ.get('SDAPI_USR', None) +sd_password = os.environ.get('SDAPI_PWD', None) +options = { + "save_images": True, + "send_images": True, +} + +logging.basicConfig(level = logging.INFO, format = '%(asctime)s %(levelname)s: %(message)s') +log = logging.getLogger(__name__) +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + + +def auth(): + if sd_username is not None and sd_password is not None: + return requests.auth.HTTPBasicAuth(sd_username, sd_password) + return None + + +def post(endpoint: str, payload: dict = None): + if 'sdapi' not in endpoint: + endpoint = f'sdapi/v1/{endpoint}' + if 'http' not in endpoint: + endpoint = f'{sd_url}/{endpoint}' + req = requests.post(endpoint, json = payload, timeout=300, verify=False, auth=auth()) + return { 'error': req.status_code, 'reason': req.reason, 'url': req.url } if req.status_code != 200 else req.json() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description = 'api-txt2img') + parser.add_argument('endpoint', nargs=1, help='endpoint') + parser.add_argument('json', nargs=1, help='json data or file') + args = parser.parse_args() + log.info(f'api-json: {args}') + if os.path.isfile(args.json[0]): + with open(args.json[0], 'r', encoding='ascii') as f: + txt = f.read() + txt = txt.encode('ascii') + print('HERE', txt) + dct = json.loads(txt) + else: + dct = json.loads(args.json[0]) + res = post(endpoint=args.endpoint[0], payload=dct) + print(res) diff --git a/cli/image-encode.py b/cli/image-encode.py new file mode 100755 index 000000000..0769c2544 --- /dev/null +++ b/cli/image-encode.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +import io +import os +import sys +import base64 +from PIL import Image +from rich import print # pylint: disable=redefined-builtin + + +def encode(file: str): + image = Image.open(file) if os.path.exists(file) else None + print(f'Input: file={file} image={image}') + if image is None: + return None + if image.mode != 'RGB': + image = image.convert('RGB') + with io.BytesIO() as stream: + image.save(stream, 'JPEG') + image.close() + values = stream.getvalue() + encoded = base64.b64encode(values).decode() + return encoded + + +if __name__ == "__main__": + sys.argv.pop(0) + fn = sys.argv[0] if len(sys.argv) > 0 else '' + b64 = encode(fn) + print('=== BEGIN ===') + print(f'{b64}') + print('=== END ===') + diff --git a/modules/api/models.py b/modules/api/models.py index a0cb2562d..8437e91ba 100644 --- a/modules/api/models.py +++ b/modules/api/models.py @@ -152,8 +152,8 @@ class ItemIPAdapter(BaseModel): adapter: str = Field(title="Adapter", default="Base", description="") images: List[str] = Field(title="Image", default=[], description="") masks: Optional[List[str]] = Field(title="Mask", default=[], description="") - scale: float = Field(title="Scale", default=0.5, gt=0, le=1, description="") - start: float = Field(title="Start", default=0.0, gt=0, le=1, description="") + scale: float = Field(title="Scale", default=0.5, ge=0, le=1, description="") + start: float = Field(title="Start", default=0.0, ge=0, le=1, description="") end: float = Field(title="End", default=1.0, gt=0, le=1, description="") class ItemFace(BaseModel): diff --git a/wiki b/wiki index 4e01da914..c5c9e8998 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit 4e01da914a578ba0db26907da53640a49d7ecb2b +Subproject commit c5c9e89981c8bd35b51823315418a4a4864bb5e1 From 2a27cd916d0524f900d9850be6be8987e56ac45d Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Tue, 18 Jun 2024 13:21:13 -0400 Subject: [PATCH 39/81] pixart sigma improvements --- CHANGELOG.md | 20 +++++- cli/api-json.py | 5 +- extensions-builtin/Lora/network_overrides.py | 18 ++++- html/reference.json | 23 +++++-- javascript/ui.js | 1 + modules/model_pixart.py | 29 ++++++++ modules/model_sd3.py | 69 +------------------- modules/model_t5.py | 51 +++++++++++++++ modules/modelloader.py | 5 +- modules/sd_models.py | 56 ++++------------ 10 files changed, 152 insertions(+), 125 deletions(-) create mode 100644 modules/model_pixart.py create mode 100644 modules/model_t5.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 363020797..1167844ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## Pending +- SC Lora +- HunyuanDiT 1.1 - Diffusers==0.30.0 - https://github.com/huggingface/diffusers/issues/8546 - https://github.com/huggingface/diffusers/pull/8566 @@ -12,9 +14,13 @@ ### Highlights for 2024-06-17 Following zero-day **SD3** release, a week later here's a refresh with more than a few improvements. -Other than SD3, highlight is (finally) new release of **Torch-DirectML** as well as ability to use explicit **T5** text-encoder in many supported models! +But there's more than SD3: +- support for **PixArt-Sigma** in small/medium/large variants AND using 4/8/16bit quantized T5 text-encoder! +- (finally) new release of **Torch-DirectML** -### Improvements: SD3 +### Models + +#### Stable Diffusion 3 - enable taesd preview and non-full quality mode - enable base LoRA support @@ -28,10 +34,18 @@ Other than SD3, highlight is (finally) new release of **Torch-DirectML** as well higher shift means model will spend more time on structure and less on details - add support for selecting text encoder in xyz grid +#### Pixart-Σ + +- Add *small* (512px) and *large* (2k) variations, in addition to existing *medium* (1k) +- Add support for 4/8bit quantized t5 text encoder + *note* by default pixart-Σ uses full fp16 t5 encoder with large memory footprint + simply select in *settings -> model -> text encoder* before or after model load + ### Improvements: General +- support FP4 quantized T5 text encoder, in addtion to existing FP8 and FP16 - support for T5 text-encoder loader in **all** models that use T5 - *example*: load FP8 quantized T5 text-encoder into PixArt Sigma + *example*: load FP4 or FP8 quantized T5 text-encoder into PixArt Sigma or Stable Cascade! - support for `torch-directml` **0.2.2**, thanks @lshqqytiger! *note*: new directml is finally based on modern `torch` 2.3.1! - extra networks: info display now contains link to source url if model if its known diff --git a/cli/api-json.py b/cli/api-json.py index 889b70d01..e8c5270fb 100755 --- a/cli/api-json.py +++ b/cli/api-json.py @@ -45,10 +45,7 @@ def post(endpoint: str, payload: dict = None): log.info(f'api-json: {args}') if os.path.isfile(args.json[0]): with open(args.json[0], 'r', encoding='ascii') as f: - txt = f.read() - txt = txt.encode('ascii') - print('HERE', txt) - dct = json.loads(txt) + dct = json.load(f) # TODO fails with b64 encoded images inside json due to string encoding else: dct = json.loads(args.json[0]) res = post(endpoint=args.endpoint[0], payload=dct) diff --git a/extensions-builtin/Lora/network_overrides.py b/extensions-builtin/Lora/network_overrides.py index 943575630..dacecd0bd 100644 --- a/extensions-builtin/Lora/network_overrides.py +++ b/extensions-builtin/Lora/network_overrides.py @@ -1,7 +1,7 @@ from modules import shared -maybe_diffusers = [ +maybe_diffusers = [ # forced if lora_maybe_diffusers is enabled 'aaebf6360f7d', # sd15-lcm '3d18b05e4f56', # sdxl-lcm 'b71dcb732467', # sdxl-tcd @@ -19,14 +19,26 @@ '8cca3706050b', # hyper-sdxl-1step ] -force_diffusers = [ +force_diffusers = [ # forced always '816d0eed49fd', # flash-sdxl 'c2ec22757b46', # flash-sd15 ] +force_models = [ # forced always + 'sd3', + 'sc', + 'hunyuandit', + 'kandinsky', +] + +force_classes = [ # forced always +] + + def check_override(shorthash=''): force = False - force = force or (shared.sd_model_type == 'sd3') # TODO sd3 forced diffusers for lora load + force = force or (shared.sd_model_type in force_classes) + force = force or (shared.sd_model.__class__.__name__ in force_classes) if len(shorthash) < 4: return force force = force or (any(x.startswith(shorthash) for x in maybe_diffusers) if shared.opts.lora_maybe_diffusers else False) diff --git a/html/reference.json b/html/reference.json index 2ffdba6a4..a3f62e7e1 100644 --- a/html/reference.json +++ b/html/reference.json @@ -160,15 +160,30 @@ "preview": "PixArt-alpha--PixArt-XL-2-1024-MS.jpg", "extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 2.0" }, - "Pixart-Σ": { - "path": "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS", + "Pixart-Σ Small": { + "path": "huggingface/PixArt-alpha/PixArt-Sigma-XL-2-512-MS", "desc": "PixArt-Σ, a Diffusion Transformer model (DiT) capable of directly generating images at 4K resolution. PixArt-Σ represents a significant advancement over its predecessor, PixArt-α, offering images of markedly higher fidelity and improved alignment with text prompts.", "preview": "PixArt-alpha--pixart_sigma_sdxlvae_T5_diffusers.jpg", + "skip": true, + "extras": "width: 512, height: 512, sampler: Default, cfg_scale: 2.0" + }, + "Pixart-Σ Medium": { + "path": "huggingface/PixArt-alpha/PixArt-Sigma-XL-2-1024-MS", + "desc": "PixArt-Σ, a Diffusion Transformer model (DiT) capable of directly generating images at 4K resolution. PixArt-Σ represents a significant advancement over its predecessor, PixArt-α, offering images of markedly higher fidelity and improved alignment with text prompts.", + "preview": "PixArt-alpha--pixart_sigma_sdxlvae_T5_diffusers.jpg", + "skip": true, + "extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 2.0" + }, + "Pixart-Σ Large": { + "path": "huggingface/PixArt-alpha/PixArt-Sigma-XL-2-2K-MS", + "desc": "PixArt-Σ, a Diffusion Transformer model (DiT) capable of directly generating images at 4K resolution. PixArt-Σ represents a significant advancement over its predecessor, PixArt-α, offering images of markedly higher fidelity and improved alignment with text prompts.", + "preview": "PixArt-alpha--pixart_sigma_sdxlvae_T5_diffusers.jpg", + "skip": true, "extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 2.0" }, - "Tencent HunyuanDiT": { - "path": "Tencent-Hunyuan/HunyuanDiT-Diffusers", + "Tencent HunyuanDiT 1.1": { + "path": "Tencent-Hunyuan/HunyuanDiT-v1.1-Diffusers", "desc": "Hunyuan-DiT : A Powerful Multi-Resolution Diffusion Transformer with Fine-Grained Chinese Understanding.", "preview": "Tencent-Hunyuan-HunyuanDiT.jpg", "extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 2.0" diff --git a/javascript/ui.js b/javascript/ui.js index 8b4a5ce4a..c7ecd6680 100644 --- a/javascript/ui.js +++ b/javascript/ui.js @@ -424,6 +424,7 @@ function selectVAE(name) { } function selectReference(name) { + log(`Select reference: ${name}`); desiredCheckpointName = name; gradioApp().getElementById('change_reference').click(); } diff --git a/modules/model_pixart.py b/modules/model_pixart.py new file mode 100644 index 000000000..4ae7eb21c --- /dev/null +++ b/modules/model_pixart.py @@ -0,0 +1,29 @@ +import diffusers + + +def load_pixart(checkpoint_info, diffusers_load_config={}): + from modules import shared, devices, modelloader, model_t5 + modelloader.hf_login() + # shared.opts.data['cuda_dtype'] = 'FP32' # override + # shared.opts.data['diffusers_model_cpu_offload'] = True # override + # devices.set_cuda_params() + fn = checkpoint_info.path.replace('huggingface/', '') + t5 = model_t5.load_t5(shared.opts.sd_text_encoder, cache_dir=shared.opts.diffusers_dir) + transformer = diffusers.PixArtTransformer2DModel.from_pretrained( + fn, + subfolder = 'transformer', + cache_dir = shared.opts.diffusers_dir, + **diffusers_load_config, + ) + transformer.to(devices.device) + kwargs = { 'transformer': transformer } + if t5 is not None: + kwargs['text_encoder'] = t5 + pipe = diffusers.PixArtSigmaPipeline.from_pretrained( + 'PixArt-alpha/PixArt-Sigma-XL-2-1024-MS', + cache_dir = shared.opts.diffusers_dir, + **kwargs, + **diffusers_load_config, + ) + devices.torch_gc() + return pipe diff --git a/modules/model_sd3.py b/modules/model_sd3.py index 4d8aaf250..81470a97e 100644 --- a/modules/model_sd3.py +++ b/modules/model_sd3.py @@ -1,14 +1,7 @@ import os -import warnings import torch import diffusers import transformers -import rich.traceback - - -rich.traceback.install() -warnings.filterwarnings(action="ignore", category=FutureWarning) -loggedin = False def load_sd3(fn=None, cache_dir=None, config=None): @@ -48,7 +41,7 @@ def load_sd3(fn=None, cache_dir=None, config=None): ), 'text_encoder_3': None, } - elif fn_size < 1e10: # if model is below 10gb it does not have te4 + elif fn_size < 1e10: # if model is below 10gb it does not have te3 kwargs = { 'text_encoder_3': None, } @@ -69,63 +62,3 @@ def load_sd3(fn=None, cache_dir=None, config=None): diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["stable-diffusion-3"] = diffusers.StableDiffusion3Img2ImgPipeline devices.torch_gc() return pipe - - -def load_t5(pipe, module, te3=None, cache_dir=None): - from modules import devices, modelloader - repo_id = 'stabilityai/stable-diffusion-3-medium-diffusers' - if pipe is None or not hasattr(pipe, module): - return pipe - if 'fp16' in te3.lower(): - modelloader.hf_login() - t5 = transformers.T5EncoderModel.from_pretrained( - repo_id, - subfolder='text_encoder_3', - # torch_dtype=dtype, - cache_dir=cache_dir, - torch_dtype=pipe.text_encoder.dtype, - ) - setattr(pipe, module, t5) - elif 'fp4' in te3.lower(): - modelloader.hf_login() - from installer import install - install('bitsandbytes', quiet=True) - quantization_config = transformers.BitsAndBytesConfig(load_in_4bit=True) - t5 = transformers.T5EncoderModel.from_pretrained( - repo_id, - subfolder='text_encoder_3', - quantization_config=quantization_config, - cache_dir=cache_dir, - torch_dtype=pipe.text_encoder.dtype, - ) - setattr(pipe, module, t5) - elif 'fp8' in te3.lower(): - modelloader.hf_login() - from installer import install - install('bitsandbytes', quiet=True) - quantization_config = transformers.BitsAndBytesConfig(load_in_8bit=True) - t5 = transformers.T5EncoderModel.from_pretrained( - repo_id, - subfolder='text_encoder_3', - quantization_config=quantization_config, - cache_dir=cache_dir, - torch_dtype=pipe.text_encoder.dtype, - ) - setattr(pipe, module, t5) - """ - if hasattr(pipe, 'remove_all_hooks'): - pipe.remove_all_hooks() - nn = getattr(pipe, module) - import accelerate - accelerate.hooks.remove_hook_from_module(nn, recurse=True) - nn.to(device=devices.device) - """ - else: - setattr(pipe, module, None) - if getattr(pipe, 'text_encoder_3', None) is not None and getattr(pipe, 'tokenizer_3', None) is None: # not needed anymore - pipe.tokenizer_3 = transformers.T5TokenizerFast.from_pretrained( - repo_id, - subfolder='tokenizer_3', - cache_dir=cache_dir, - ) - devices.torch_gc() diff --git a/modules/model_t5.py b/modules/model_t5.py new file mode 100644 index 000000000..02764ea57 --- /dev/null +++ b/modules/model_t5.py @@ -0,0 +1,51 @@ +import transformers + + +def load_t5(t5=None, cache_dir=None): + from modules import devices, modelloader + repo_id = 'stabilityai/stable-diffusion-3-medium-diffusers' + if 'fp16' in t5.lower(): + modelloader.hf_login() + t5 = transformers.T5EncoderModel.from_pretrained( + repo_id, + subfolder='text_encoder_3', + # torch_dtype=dtype, + cache_dir=cache_dir, + torch_dtype=devices.dtype, + ) + elif 'fp4' in t5.lower(): + modelloader.hf_login() + from installer import install + install('bitsandbytes', quiet=True) + quantization_config = transformers.BitsAndBytesConfig(load_in_4bit=True) + t5 = transformers.T5EncoderModel.from_pretrained( + repo_id, + subfolder='text_encoder_3', + quantization_config=quantization_config, + cache_dir=cache_dir, + torch_dtype=devices.dtype, + ) + elif 'fp8' in t5.lower(): + modelloader.hf_login() + from installer import install + install('bitsandbytes', quiet=True) + quantization_config = transformers.BitsAndBytesConfig(load_in_8bit=True) + t5 = transformers.T5EncoderModel.from_pretrained( + repo_id, + subfolder='text_encoder_3', + quantization_config=quantization_config, + cache_dir=cache_dir, + torch_dtype=devices.dtype, + ) + else: + t5 = None + return t5 + + +def set_t5(pipe, module, t5=None, cache_dir=None): + from modules import devices + if pipe is None or not hasattr(pipe, module): + return pipe + t5 = load_t5(t5=t5, cache_dir=cache_dir) + setattr(pipe, module, t5) + devices.torch_gc() diff --git a/modules/modelloader.py b/modules/modelloader.py index 0045478a3..fd9e41edb 100644 --- a/modules/modelloader.py +++ b/modules/modelloader.py @@ -204,7 +204,6 @@ def download_diffusers_model(hub_id: str, cache_dir: str = None, download_config shared.log.debug(f'Diffusers downloading: id="{hub_id}" args={download_config}') token = token or shared.opts.huggingface_token if token is not None and len(token) > 2: - shared.log.debug(f"Diffusers authentication: {token}") hf_login(token) pipeline_dir = None @@ -318,6 +317,10 @@ def get_reference_opts(name: str, quiet=False): if k == name or model_name == name: model_opts = v break + model_name = model_name.replace('huggingface/', '') + if k == name or model_name == name: + model_opts = v + break if not model_opts: # shared.log.error(f'Reference: model="{name}" not found') return {} diff --git a/modules/sd_models.py b/modules/sd_models.py index b06afefe8..ef18ebf5e 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -202,11 +202,17 @@ def get_closet_checkpoint_match(search_string): if checkpoint_info is not None: return checkpoint_info found = sorted([info for info in checkpoints_list.values() if search_string in info.title], key=lambda x: len(x.title)) - if found: + if found and len(found) > 0: return found[0] found = sorted([info for info in checkpoints_list.values() if search_string.split(' ')[0] in info.title], key=lambda x: len(x.title)) - if found: + if found and len(found) > 0: return found[0] + for v in shared.reference_models.values(): + if search_string in v['path'] or os.path.basename(search_string) in v['path']: + model_name = search_string.replace('huggingface/', '') + checkpoint_info = CheckpointInfo(v['path']) # create a virutal model info + checkpoint_info.type = 'huggingface' + return checkpoint_info return None @@ -565,34 +571,20 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False): # elif size < 0: # unknown # guess = 'Stable Diffusion 2B' elif size >= 5791 and size <= 5799: # 5795 - if not shared.native: - warn(f'Model detected as SD-XL refiner model, but attempting to load using backend=original: {op}={f} size={size} MB') if op == 'model': warn(f'Model detected as SD-XL refiner model, but attempting to load a base model: {op}={f} size={size} MB') guess = 'Stable Diffusion XL Refiner' elif (size >= 6611 and size <= 7220): # 6617, HassakuXL is 6776, monkrenRealisticINT_v10 is 7217 - if not shared.native: - warn(f'Model detected as SD-XL base model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'Stable Diffusion XL' elif size >= 3361 and size <= 3369: # 3368 - if not shared.native: - warn(f'Model detected as SD upscale model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'Stable Diffusion Upscale' elif size >= 4891 and size <= 4899: # 4897 - if not shared.native: - warn(f'Model detected as SD XL inpaint model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'Stable Diffusion XL Inpaint' elif size >= 9791 and size <= 9799: # 9794 - if not shared.native: - warn(f'Model detected as SD XL instruct pix2pix model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'Stable Diffusion XL Instruct' elif size > 3138 and size < 3142: #3140 - if not shared.native: - warn(f'Model detected as Segmind Vega model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'Stable Diffusion XL' elif size > 5692 and size < 5698 or size > 4134 and size < 4138: - if not shared.native: - warn(f'Model detected as Stable Diffusion 3 model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'Stable Diffusion 3' # guess by name """ @@ -602,34 +594,20 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False): guess = 'Latent Consistency Model' """ if 'instaflow' in f.lower(): - if not shared.native: - warn(f'Model detected as InstaFlow model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'InstaFlow' if 'segmoe' in f.lower(): - if not shared.native: - warn(f'Model detected as SegMoE model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'SegMoE' if 'hunyuandit' in f.lower(): - if not shared.native: - warn(f'Model detected as Tenecent HunyuanDiT model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'HunyuanDiT' if 'pixart-xl' in f.lower(): - if not shared.native: - warn(f'Model detected as PixArt Alpha model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'PixArt-Alpha' if 'stable-diffusion-3' in f.lower(): - if not shared.native: - warn(f'Model detected as Stable Diffusion 3 model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'Stable Diffusion 3' if 'stable-cascade' in f.lower() or 'stablecascade' in f.lower() or 'wuerstchen3' in f.lower(): - if not shared.native: - warn(f'Model detected as Stable Cascade model, but attempting to load using backend=original: {op}={f} size={size} MB') if devices.dtype == torch.float16: warn('Stable Cascade does not support Float16') guess = 'Stable Cascade' if 'pixart-sigma' in f.lower(): - if not shared.native: - warn(f'Model detected as PixArt-Sigma model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'PixArt-Sigma' # switch for specific variant if guess == 'Stable Diffusion' and 'inpaint' in f.lower(): @@ -996,14 +974,8 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No return elif model_type in ['PixArt-Sigma']: # forced pipeline try: - # shared.opts.data['cuda_dtype'] = 'FP32' # override - # shared.opts.data['diffusers_model_cpu_offload'] = True # override - devices.set_cuda_params() - sd_model = diffusers.PixArtSigmaPipeline.from_pretrained( - checkpoint_info.path, - use_safetensors=True, - cache_dir=shared.opts.diffusers_dir, - **diffusers_load_config) + from modules.model_pixart import load_pixart + sd_model = load_pixart(checkpoint_info, diffusers_load_config) except Exception as e: shared.log.error(f'Diffusers Failed loading {op}: {checkpoint_info.path} {e}') if debug_load: @@ -1533,13 +1505,13 @@ def reload_text_encoder(initial=False): signature = inspect.signature(shared.sd_model.__class__.__init__, follow_wrapped=True, eval_str=True).parameters t5 = [k for k, v in signature.items() if 'T5EncoderModel' in str(v)] if len(t5) > 0: - from modules.model_sd3 import load_t5 + from modules.model_t5 import set_t5 shared.log.debug(f'Load: t5={shared.opts.sd_text_encoder} module="{t5[0]}"') - load_t5(pipe=shared.sd_model, module=t5[0], te3=shared.opts.sd_text_encoder, cache_dir=shared.opts.diffusers_dir) + set_t5(pipe=shared.sd_model, module=t5[0], t5=shared.opts.sd_text_encoder, cache_dir=shared.opts.diffusers_dir) elif hasattr(shared.sd_model, 'text_encoder_3'): - from modules.model_sd3 import load_t5 + from modules.model_t5 import set_t5 shared.log.debug(f'Load: t5={shared.opts.sd_text_encoder} module="text_encoder_3"') - load_t5(pipe=shared.sd_model, module='text_encoder_3', te3=shared.opts.sd_text_encoder, cache_dir=shared.opts.diffusers_dir) + set_t5(pipe=shared.sd_model, module='text_encoder_3', t5=shared.opts.sd_text_encoder, cache_dir=shared.opts.diffusers_dir) def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model', force=False): From d933f9837379d0a6e6d78bf934de4e8ced770403 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Tue, 18 Jun 2024 14:34:35 -0400 Subject: [PATCH 40/81] update changelog --- CHANGELOG.md | 36 +++++++++----------- extensions-builtin/Lora/network_overrides.py | 5 +-- extensions-builtin/Lora/networks.py | 6 ++++ 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1167844ef..398481614 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,44 +2,40 @@ ## Pending -- SC Lora -- HunyuanDiT 1.1 - Diffusers==0.30.0 - https://github.com/huggingface/diffusers/issues/8546 - https://github.com/huggingface/diffusers/pull/8566 - https://github.com/huggingface/diffusers/pull/8584 -## Update for 2024-06-17 +## Update for 2024-06-18 -### Highlights for 2024-06-17 +### Highlights for 2024-06-18 Following zero-day **SD3** release, a week later here's a refresh with more than a few improvements. But there's more than SD3: - support for **PixArt-Sigma** in small/medium/large variants AND using 4/8/16bit quantized T5 text-encoder! +- support for **HunyuanDiT 1.1** - (finally) new release of **Torch-DirectML** -### Models +### Model Improvements -#### Stable Diffusion 3 - -- enable taesd preview and non-full quality mode -- enable base LoRA support -- add support for 4bit quantized t5 text encoder +- **SD3**: enable tiny-VAE (TAESD) preview and non-full quality mode +- SD3: enable base LoRA support +- SD3: add support for 4bit quantized T5 text encoder simply select in *settings -> model -> text encoder* -- simplified loading of model in single-file safetensors format +- SD3: simplified loading of model in single-file safetensors format loading sd3 can now be performed fully offline -- add support for nncf compressed weights, thanks @Disty0! -- add support for sampler shift for Euler FlowMatch +- SD3: add support for nncf compressed weights, thanks @Disty0! +- SD3: add support for sampler shift for Euler FlowMatch see *settings -> samplers*, also available as param in xyz grid higher shift means model will spend more time on structure and less on details -- add support for selecting text encoder in xyz grid - -#### Pixart-Σ - -- Add *small* (512px) and *large* (2k) variations, in addition to existing *medium* (1k) -- Add support for 4/8bit quantized t5 text encoder +- SD3: add support for selecting text encoder in xyz grid +- **Pixart-Σ**: Add *small* (512px) and *large* (2k) variations, in addition to existing *medium* (1k) +- Pixart-Σ: Add support for 4/8bit quantized t5 text encoder *note* by default pixart-Σ uses full fp16 t5 encoder with large memory footprint simply select in *settings -> model -> text encoder* before or after model load +- **HunyuanDiT**: support for model version 1.1 + ### Improvements: General @@ -58,7 +54,7 @@ But there's more than SD3: - fix unsaturated outputs, force apply vae config on model load - fix hidiffusion handling of non-square aspect ratios, thanks @ShenZhang-Shin! - fix control second pass resize -- fix **hunyuandit** set attention processor +- fix hunyuandit set attention processor - fix civitai download without name - fix compatibility with latest adetailer - fix invalid sampler warning diff --git a/extensions-builtin/Lora/network_overrides.py b/extensions-builtin/Lora/network_overrides.py index dacecd0bd..14018fb70 100644 --- a/extensions-builtin/Lora/network_overrides.py +++ b/extensions-builtin/Lora/network_overrides.py @@ -26,9 +26,6 @@ force_models = [ # forced always 'sd3', - 'sc', - 'hunyuandit', - 'kandinsky', ] force_classes = [ # forced always @@ -37,7 +34,7 @@ def check_override(shorthash=''): force = False - force = force or (shared.sd_model_type in force_classes) + force = force or (shared.sd_model_type in force_models) force = force or (shared.sd_model.__class__.__name__ in force_classes) if len(shorthash) < 4: return force diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py index 574b223bf..3ebad5f17 100644 --- a/extensions-builtin/Lora/networks.py +++ b/extensions-builtin/Lora/networks.py @@ -49,6 +49,7 @@ def assign_network_names_to_compvis_modules(sd_model): network_layer_mapping = {} if shared.native: if not hasattr(shared.sd_model, 'text_encoder') or not hasattr(shared.sd_model, 'unet'): + sd_model.network_layer_mapping = {} return for name, module in shared.sd_model.text_encoder.named_modules(): prefix = "lora_te1_" if shared.sd_model_type == "sdxl" else "lora_te_" @@ -66,6 +67,7 @@ def assign_network_names_to_compvis_modules(sd_model): module.network_layer_name = network_name else: if not hasattr(shared.sd_model, 'cond_stage_model'): + sd_model.network_layer_mapping = {} return for name, module in shared.sd_model.cond_stage_model.wrapped.named_modules(): network_name = name.replace(".", "_") @@ -87,10 +89,14 @@ def load_diffusers(name, network_on_disk, lora_scale=1.0) -> network.Network: return cached if not shared.native: return None + if not hasattr(shared.sd_model, 'load_lora_weights'): + shared.log.error(f"LoRA load failed: class={shared.sd_model.__class__} does not implement load lora") + return None try: shared.sd_model.load_lora_weights(network_on_disk.filename) except Exception as e: errors.display(e, "LoRA") + return None if shared.opts.lora_fuse_diffusers: shared.sd_model.fuse_lora(lora_scale=lora_scale) net = network.Network(name, network_on_disk) From 527865db2563da8cd9b415f818f2fd595c675c84 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Tue, 18 Jun 2024 18:16:17 -0400 Subject: [PATCH 41/81] fix pixart --- modules/model_pixart.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/model_pixart.py b/modules/model_pixart.py index 4ae7eb21c..d57079e4e 100644 --- a/modules/model_pixart.py +++ b/modules/model_pixart.py @@ -19,6 +19,7 @@ def load_pixart(checkpoint_info, diffusers_load_config={}): kwargs = { 'transformer': transformer } if t5 is not None: kwargs['text_encoder'] = t5 + diffusers_load_config.pop('variant', None) pipe = diffusers.PixArtSigmaPipeline.from_pretrained( 'PixArt-alpha/PixArt-Sigma-XL-2-1024-MS', cache_dir = shared.opts.diffusers_dir, From aa262db6eb71c5ed73926db9fbbd5c272a3d7bbe Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Wed, 19 Jun 2024 08:52:16 -0400 Subject: [PATCH 42/81] cleanup metadata --- CHANGELOG.md | 5 +++-- modules/processing_diffusers.py | 1 - modules/processing_info.py | 20 ++++++++++++++++---- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 398481614..d1bff2e54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,9 @@ - https://github.com/huggingface/diffusers/pull/8566 - https://github.com/huggingface/diffusers/pull/8584 -## Update for 2024-06-18 +## Update for 2024-06-19 -### Highlights for 2024-06-18 +### Highlights for 2024-06-19 Following zero-day **SD3** release, a week later here's a refresh with more than a few improvements. But there's more than SD3: @@ -65,6 +65,7 @@ But there's more than SD3: - fix sdxl "has been incorrectly initialized" - fix api face-hires - fix api ip-adapter +- cleanup image metadata - restructure api examples: `cli/api-*` - handle theme fallback when invalid theme is specified - remove obsolete training code leftovers diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py index b9448cc38..dca8e0a71 100644 --- a/modules/processing_diffusers.py +++ b/modules/processing_diffusers.py @@ -105,7 +105,6 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): desc='Base', ) shared.state.sampling_steps = base_args.get('prior_num_inference_steps', None) or base_args.get('num_inference_steps', None) or p.steps - p.extra_generation_params['Pipeline'] = shared.sd_model.__class__.__name__ if shared.opts.scheduler_eta is not None and shared.opts.scheduler_eta > 0 and shared.opts.scheduler_eta < 1: p.extra_generation_params["Sampler Eta"] = shared.opts.scheduler_eta output = None diff --git a/modules/processing_info.py b/modules/processing_info.py index c6f572dae..95e573f21 100644 --- a/modules/processing_info.py +++ b/modules/processing_info.py @@ -63,6 +63,10 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No "Comment": comment, "Operations": '; '.join(ops).replace('"', '') if len(p.ops) > 0 else 'none', } + # native + if shared.native: + args['Pipeline'] = shared.sd_model.__class__.__name__ + args['T5'] = None if (not shared.opts.add_model_name_to_info or shared.opts.sd_text_encoder is None or shared.opts.sd_text_encoder == 'None') else shared.opts.sd_text_encoder if 'txt2img' in p.ops: args["Variation seed"] = all_subseeds[index] if p.subseed_strength > 0 else None args["Variation strength"] = p.subseed_strength if p.subseed_strength > 0 else None @@ -143,12 +147,20 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No args['Sampler sigma uncond'] = shared.opts.s_churn if shared.opts.s_churn != shared.opts.data_labels.get('s_churn').default else None args['Sampler sigma noise'] = shared.opts.s_noise if shared.opts.s_noise != shared.opts.data_labels.get('s_noise').default else None args['Sampler sigma tmin'] = shared.opts.s_tmin if shared.opts.s_tmin != shared.opts.data_labels.get('s_tmin').default else None - # tome - args['ToMe'] = shared.opts.tome_ratio if shared.opts.tome_ratio != 0 else None - args['ToDo'] = shared.opts.todo_ratio if shared.opts.todo_ratio != 0 else None + # tome/todo + if shared.opts.token_merging_method == 'ToMe': + args['ToMe'] = shared.opts.tome_ratio if shared.opts.tome_ratio != 0 else None + else: + args['ToDo'] = shared.opts.todo_ratio if shared.opts.todo_ratio != 0 else None args.update(p.extra_generation_params) - params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in args.items() if v is not None]) + for k, v in args.copy().items(): + if v is None: + del args[k] + if isinstance(v, str): + if len(v) == 0 or v == '0x0': + del args[k] + params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in args.items()]) negative_prompt_text = f"\nNegative prompt: {all_negative_prompts[index]}" if all_negative_prompts[index] else "" infotext = f"{all_prompts[index]}{negative_prompt_text}\n{params_text}".strip() return infotext From 34c7f28aa5085fa6a980c62c4caf14336b837376 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Wed, 19 Jun 2024 11:44:48 -0400 Subject: [PATCH 43/81] add collab and python 3.12 checks --- installer.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/installer.py b/installer.py index 9c01b90e5..601cda66b 100644 --- a/installer.py +++ b/installer.py @@ -275,9 +275,12 @@ def install(package, friendly: str = None, ignore: bool = False, reinstall: bool # execute git command @lru_cache() -def git(arg: str, folder: str = None, ignore: bool = False): +def git(arg: str, folder: str = None, ignore: bool = False, optional: bool = False): if args.skip_git: return '' + if optional: + if 'google.colab' in sys.modules: + return '' git_cmd = os.environ.get('GIT', "git") if git_cmd != "git": git_cmd = os.path.abspath(git_cmd) @@ -306,7 +309,7 @@ def branch(folder=None): return None branches = [] try: - b = git('branch --show-current', folder) + b = git('branch --show-current', folder, optional=True) if b == '': branches = git('branch', folder).split('\n') if len(branches) > 0: @@ -315,7 +318,7 @@ def branch(folder=None): b = branches[1].strip() log.debug(f'Git detached head detected: folder="{folder}" reattach={b}') except Exception: - b = git('git rev-parse --abbrev-ref HEAD', folder) + b = git('git rev-parse --abbrev-ref HEAD', folder, optional=True) if 'main' in b: b = 'main' elif 'master' in b: @@ -323,7 +326,7 @@ def branch(folder=None): else: b = b.split('\n')[0].replace('*', '').strip() log.debug(f'Submodule: {folder} / {b}') - git(f'checkout {b}', folder, ignore=True) + git(f'checkout {b}', folder, ignore=True, optional=True) return b @@ -396,6 +399,12 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None): if args.quick: return log.info(f'Python version={platform.python_version()} platform={platform.system()} bin="{sys.executable}" venv="{sys.prefix}"') + if int(sys.version_info.major) == 3 and int(sys.version_info.minor) == 12 and int(sys.version_info.minor) > 3: # TODO python 3.12.4 or higher cause a mess with pydantic + log.error(f"Incompatible Python version: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro} required 3.12.3 or lower") + if reason is not None: + log.error(reason) + if not args.ignore: + sys.exit(1) if not (int(sys.version_info.major) == 3 and int(sys.version_info.minor) in supported_minors): log.error(f"Incompatible Python version: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro} required 3.{supported_minors}") if reason is not None: @@ -1035,16 +1044,21 @@ def get_version(force=False): def check_ui(ver): - if ver is None or 'branch' not in ver or 'ui' not in ver or ver['branch'] == ver['ui']: - return - log.debug(f'Branch mismatch: sdnext={ver["branch"]} ui={ver["ui"]}') + def same(ver): + core = ver['branch'] if ver is not None and 'branch' in ver else 'unknown' + ui = ver['ui'] if ver is not None and 'ui' in ver else 'unknown' + return core == ui or (core == 'master' and ui == 'main') + + if not same(ver): + log.debug(f'Branch mismatch: sdnext={ver["branch"]} ui={ver["ui"]}') cwd = os.getcwd() try: os.chdir('extensions-builtin/sdnext-modernui') - git('checkout ' + ver['branch'], ignore=True) + target = 'dev' if 'dev' in ver['branch'] else 'main' + git('checkout ' + target, ignore=True, optional=True) os.chdir(cwd) ver = get_version(force=True) - if ver['branch'] == ver['ui']: + if not same(ver): log.debug(f'Branch synchronized: {ver["branch"]}') else: log.debug(f'Branch sync failed: sdnext={ver["branch"]} ui={ver["ui"]}') From bf9565cb467d0df9f33b4c956e57f1b3ab8924d4 Mon Sep 17 00:00:00 2001 From: Disty0 Date: Wed, 19 Jun 2024 21:23:47 +0300 Subject: [PATCH 44/81] NNCF compression support on CPU and add INT8 option for T5 --- modules/model_t5.py | 28 ++++++++++++++++++++++++++- modules/sd_models.py | 31 ++++++++++++++++++++---------- modules/sd_models_compile.py | 37 ++++++++++++++++++++---------------- modules/shared.py | 2 +- 4 files changed, 70 insertions(+), 28 deletions(-) diff --git a/modules/model_t5.py b/modules/model_t5.py index 02764ea57..7b735794c 100644 --- a/modules/model_t5.py +++ b/modules/model_t5.py @@ -37,15 +37,41 @@ def load_t5(t5=None, cache_dir=None): cache_dir=cache_dir, torch_dtype=devices.dtype, ) + elif 'int8' in t5.lower(): + modelloader.hf_login() + from installer import install + install('nncf==2.7.0', quiet=True) + from modules.sd_models_compile import nncf_compress_model + from modules.sd_hijack import NNCF_T5DenseGatedActDense # T5DenseGatedActDense uses fp32 + t5 = transformers.T5EncoderModel.from_pretrained( + repo_id, + subfolder='text_encoder_3', + cache_dir=cache_dir, + torch_dtype=devices.dtype, + ) + for i in range(len(t5.encoder.block)): + t5.encoder.block[i].layer[1].DenseReluDense = NNCF_T5DenseGatedActDense( + t5.encoder.block[i].layer[1].DenseReluDense + ) + t5 = nncf_compress_model(t5) else: t5 = None return t5 def set_t5(pipe, module, t5=None, cache_dir=None): - from modules import devices + from modules import devices, shared if pipe is None or not hasattr(pipe, module): return pipe t5 = load_t5(t5=t5, cache_dir=cache_dir) setattr(pipe, module, t5) + if shared.cmd_opts.lowvram or shared.opts.diffusers_seq_cpu_offload: + from accelerate import cpu_offload + getattr(pipe, module).to("cpu") + cpu_offload(getattr(pipe, module), devices.device, offload_buffers=len(getattr(pipe, module)._parameters) > 0) # pylint: disable=protected-access + elif shared.cmd_opts.medvram or shared.opts.diffusers_model_cpu_offload: + if not hasattr(pipe, "_all_hooks") or len(pipe._all_hooks) == 0: # pylint: disable=protected-access + pipe.enable_model_cpu_offload(device=devices.device) + else: + pipe.maybe_free_model_hooks() devices.torch_gc() diff --git a/modules/sd_models.py b/modules/sd_models.py index ef18ebf5e..be01df7b3 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -653,15 +653,10 @@ def copy_diffuser_options(new_pipe, orig_pipe): new_pipe.is_sd1 = getattr(orig_pipe, 'is_sd1', True) -def set_diffuser_options(sd_model, vae = None, op: str = 'model'): +def set_diffuser_options(sd_model, vae = None, op: str = 'model', offload=True): if sd_model is None: shared.log.warning(f'{op} is not loaded') return - if (shared.opts.diffusers_model_cpu_offload or shared.cmd_opts.medvram) and (shared.opts.diffusers_seq_cpu_offload or shared.cmd_opts.lowvram): - shared.log.warning(f'Setting {op}: Model CPU offload and Sequential CPU offload are not compatible') - shared.log.debug(f'Setting {op}: disabling model CPU offload') - shared.opts.diffusers_model_cpu_offload=False - shared.cmd_opts.medvram=False if hasattr(sd_model, "watermark"): sd_model.watermark = NoWatermark() @@ -717,6 +712,20 @@ def set_diffuser_options(sd_model, vae = None, op: str = 'model'): shared.log.debug(f'Setting {op}: enable channels last') sd_model.unet.to(memory_format=torch.channels_last) + if offload: + set_diffuser_offload(sd_model, op) + +def set_diffuser_offload(sd_model, op: str = 'model'): + if sd_model is None: + shared.log.warning(f'{op} is not loaded') + return + if (shared.opts.diffusers_model_cpu_offload or shared.cmd_opts.medvram) and (shared.opts.diffusers_seq_cpu_offload or shared.cmd_opts.lowvram): + shared.log.warning(f'Setting {op}: Model CPU offload and Sequential CPU offload are not compatible') + shared.log.debug(f'Setting {op}: disabling model CPU offload') + shared.opts.diffusers_model_cpu_offload=False + shared.cmd_opts.medvram=False + if not (hasattr(sd_model, "has_accelerate") and sd_model.has_accelerate): + sd_model.has_accelerate = False if hasattr(sd_model, "enable_model_cpu_offload"): if shared.cmd_opts.medvram or shared.opts.diffusers_model_cpu_offload: shared.log.debug(f'Setting {op}: enable model CPU offload') @@ -1130,7 +1139,12 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No sd_model.embedding_db.load_textual_inversion_embeddings(force_reload=True) timer.record("embeddings") - set_diffuser_options(sd_model, vae, op) + set_diffuser_options(sd_model, vae, op, offload=False) + if shared.opts.nncf_compress_weights and not (shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx"): + sd_model = sd_models_compile.nncf_compress_weights(sd_model) # run this before move model so it can be compressed in CPU + timer.record("options") + + set_diffuser_offload(sd_model, op) if op == 'model': sd_vae.apply_vae_config(shared.sd_model.sd_checkpoint_info.filename, vae_file, sd_model) if op == 'refiner' and shared.opts.diffusers_move_refiner: @@ -1145,9 +1159,6 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No if shared.opts.ipex_optimize: sd_model = sd_models_compile.ipex_optimize(sd_model) - if shared.opts.nncf_compress_weights and not (shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx"): - sd_model = sd_models_compile.nncf_compress_weights(sd_model) - if (shared.opts.cuda_compile and shared.opts.cuda_compile_backend != 'none'): sd_model = sd_models_compile.compile_diffusers(sd_model) timer.record("compile") diff --git a/modules/sd_models_compile.py b/modules/sd_models_compile.py index a4b4fb8af..6e629788c 100644 --- a/modules/sd_models_compile.py +++ b/modules/sd_models_compile.py @@ -114,27 +114,32 @@ def ipex_optimize_model(model): shared.log.warning(f"IPEX Optimize: error: {e}") return sd_model +def nncf_send_to_device(model): + for child in model.children(): + if child.__class__.__name__ == "WeightsDecompressor": + child.scale = child.scale.to(devices.device) + child.zero_point = child.zero_point.to(devices.device) + nncf_send_to_device(child) + +def nncf_compress_model(model): + import nncf + model.eval() + backup_embeddings = None + if hasattr(model, "get_input_embeddings"): + backup_embeddings = copy.deepcopy(model.get_input_embeddings()) + model = nncf.compress_weights(model) + nncf_send_to_device(model) + if hasattr(model, "set_input_embeddings") and backup_embeddings is not None: + model.set_input_embeddings(backup_embeddings) + devices.torch_gc(force=True) + return model def nncf_compress_weights(sd_model): try: t0 = time.time() - if sd_model.device.type == "meta": - shared.log.warning("Compress Weights is not compatible with Sequential CPU offload") - return sd_model - - def nncf_compress_model(model): - return_device = model.device - model.eval() - backup_embeddings = None - if hasattr(model, "get_input_embeddings"): - backup_embeddings = copy.deepcopy(model.get_input_embeddings()) - model = nncf.compress_weights(model.to(devices.device)).to(return_device) - if hasattr(model, "set_input_embeddings") and backup_embeddings is not None: - model.set_input_embeddings(backup_embeddings) - devices.torch_gc(force=True) - return model + from installer import install + install('nncf==2.7.0', quiet=True) - import nncf shared.compiled_model_state = CompiledModelState() shared.compiled_model_state.is_compiled = True diff --git a/modules/shared.py b/modules/shared.py index d65084053..fcdf9bbb8 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -391,7 +391,7 @@ def temp_disable_extensions(): "sd_model_refiner": OptionInfo('None', "Refiner model", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints), "sd_vae": OptionInfo("Automatic", "VAE model", gr.Dropdown, lambda: {"choices": shared_items.sd_vae_items()}, refresh=shared_items.refresh_vae_list), "sd_unet": OptionInfo("None", "UNET model", gr.Dropdown, lambda: {"choices": shared_items.sd_unet_items()}, refresh=shared_items.refresh_unet_list), - "sd_text_encoder": OptionInfo('None', "Text encoder model", gr.Dropdown, lambda: {"choices": ['None', 'T5 FP4', 'T5 FP8', 'T5 FP16']}), + "sd_text_encoder": OptionInfo('None', "Text encoder model", gr.Dropdown, lambda: {"choices": ['None', 'T5 FP4', 'T5 FP8', 'T5 INT8', 'T5 FP16']}), "sd_checkpoint_autoload": OptionInfo(True, "Model autoload on start"), "sd_model_dict": OptionInfo('None', "Use separate base dict", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints), "stream_load": OptionInfo(False, "Load models using stream loading method", gr.Checkbox, {"visible": not native }), From 4ccc2d96bc18a383ed9c8de78df508c27e13e324 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Wed, 19 Jun 2024 15:20:05 -0400 Subject: [PATCH 45/81] update changelog --- CHANGELOG.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d1bff2e54..3dec59dce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,6 @@ ## Pending - Diffusers==0.30.0 -- https://github.com/huggingface/diffusers/issues/8546 - https://github.com/huggingface/diffusers/pull/8566 - https://github.com/huggingface/diffusers/pull/8584 @@ -13,7 +12,8 @@ Following zero-day **SD3** release, a week later here's a refresh with more than a few improvements. But there's more than SD3: -- support for **PixArt-Sigma** in small/medium/large variants AND using 4/8/16bit quantized T5 text-encoder! +- support for quantized **T5** text encoder in all models that use T5: FP4/FP8/FP16/INT8 (SD3, PixArt-Σ, etc) +- support for **PixArt-Sigma** in small/medium/large variants - support for **HunyuanDiT 1.1** - (finally) new release of **Torch-DirectML** @@ -21,15 +21,17 @@ But there's more than SD3: - **SD3**: enable tiny-VAE (TAESD) preview and non-full quality mode - SD3: enable base LoRA support -- SD3: add support for 4bit quantized T5 text encoder +- SD3: add support for FP4 quantized T5 text encoder simply select in *settings -> model -> text encoder* +- SD3: add support for INT8 quantized T5 text encoder, thanks @Disty0! +- SD3: enable cpu-offloading for T5 text encoder, thanks @Disty0! - SD3: simplified loading of model in single-file safetensors format - loading sd3 can now be performed fully offline -- SD3: add support for nncf compressed weights, thanks @Disty0! + model load can now be performed fully offline +- SD3: add support for NNCF compressed weights, thanks @Disty0! - SD3: add support for sampler shift for Euler FlowMatch see *settings -> samplers*, also available as param in xyz grid higher shift means model will spend more time on structure and less on details -- SD3: add support for selecting text encoder in xyz grid +- SD3: add support for selecting T5 text encoder variant in XYZ grid - **Pixart-Σ**: Add *small* (512px) and *large* (2k) variations, in addition to existing *medium* (1k) - Pixart-Σ: Add support for 4/8bit quantized t5 text encoder *note* by default pixart-Σ uses full fp16 t5 encoder with large memory footprint @@ -46,6 +48,7 @@ But there's more than SD3: *note*: new directml is finally based on modern `torch` 2.3.1! - extra networks: info display now contains link to source url if model if its known works for civitai and huggingface models +- improved google.colab support - css tweaks for standardui - css tweaks for modernui From eb6e3c337cc9cb1cf43c40a7ae95baf8f8ccce51 Mon Sep 17 00:00:00 2001 From: Disty0 Date: Wed, 19 Jun 2024 22:42:00 +0300 Subject: [PATCH 46/81] NNCF ControlNet support --- modules/control/units/controlnet.py | 13 +++++++++++-- modules/shared.py | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/modules/control/units/controlnet.py b/modules/control/units/controlnet.py index e911bc6ff..b57005f3c 100644 --- a/modules/control/units/controlnet.py +++ b/modules/control/units/controlnet.py @@ -172,10 +172,19 @@ def load(self, model_id: str = None) -> str: self.load_safetensors(model_path) else: self.model = ControlNetModel.from_pretrained(model_path, **self.load_config) - if self.device is not None: - self.model.to(self.device) if self.dtype is not None: self.model.to(self.dtype) + if "ControlNet" in opts.nncf_compress_weights: + try: + log.debug(f'Control {what} model NNCF Compress: id="{model_id}"') + from installer import install + install('nncf==2.7.0', quiet=True) + from modules.sd_models_compile import nncf_compress_model + self.model = nncf_compress_model(self.model) + except Exception as e: + log.error(f'Control {what} model NNCF Compression failed: id="{model_id}" error={e}') + if self.device is not None: + self.model.to(self.device) t1 = time.time() self.model_id = model_id log.debug(f'Control {what} model loaded: id="{model_id}" path="{model_path}" time={t1-t0:.2f}') diff --git a/modules/shared.py b/modules/shared.py index fcdf9bbb8..de2cebf64 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -449,7 +449,7 @@ def temp_disable_extensions(): "deep_cache_interval": OptionInfo(3, "DeepCache cache interval", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}), "nncf_sep": OptionInfo("

Model Compress

", "", gr.HTML), - "nncf_compress_weights": OptionInfo([], "Compress Model weights with NNCF", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": native}), + "nncf_compress_weights": OptionInfo([], "Compress Model weights with NNCF", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}), "ipex_sep": OptionInfo("

IPEX

", "", gr.HTML, {"visible": devices.backend == "ipex"}), "ipex_optimize": OptionInfo([], "IPEX Optimize for Intel GPUs", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "Upscaler"], "visible": devices.backend == "ipex"}), From e3009753e31bcd7c132abd96e2ce6b72c85312eb Mon Sep 17 00:00:00 2001 From: AI-Casanova <54461896+AI-Casanova@users.noreply.github.com> Date: Wed, 19 Jun 2024 23:41:11 -0500 Subject: [PATCH 47/81] Finish SD3 Prompt Parsing, reconfigure Compel Hijack --- modules/prompt_parser_diffusers.py | 54 ++++++++++++++++++------------ modules/sd_models.py | 3 ++ 2 files changed, 35 insertions(+), 22 deletions(-) diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py index 0d410af87..29cf86441 100644 --- a/modules/prompt_parser_diffusers.py +++ b/modules/prompt_parser_diffusers.py @@ -41,8 +41,26 @@ def compel_hijack(self, token_ids: torch.Tensor, return hidden_state -EmbeddingsProvider._encode_token_ids_to_embeddings = compel_hijack # pylint: disable=protected-access +def sd3_compel_hijack(self, token_ids: torch.Tensor, + attention_mask: typing.Optional[torch.Tensor] = None) -> torch.Tensor: + needs_hidden_states = True + text_encoder_output = self.text_encoder(token_ids, attention_mask, output_hidden_states=needs_hidden_states, return_dict=True) + clip_skip = int(self.returned_embeddings_type) + hidden_state = text_encoder_output.hidden_states[-(clip_skip+1)] + + return hidden_state + +def insert_parser_highjack(pipename): + if "StableDiffusion3" in pipename: + EmbeddingsProvider._encode_token_ids_to_embeddings = sd3_compel_hijack # pylint: disable=protected-access + debug("Loading SD3 Parser hijack") + else: + EmbeddingsProvider._encode_token_ids_to_embeddings = compel_hijack # pylint: disable=protected-access + debug("Loading Standard Parser hijack") + + +insert_parser_highjack("Initialize") # from https://github.com/damian0815/compel/blob/main/src/compel/diffusers_textual_inversion_manager.py class DiffusersTextualInversionManager(BaseTextualInversionManager): @@ -217,7 +235,7 @@ def prepare_embedding_providers(pipe, clip_skip) -> list[EmbeddingsProvider]: embeddings_providers = [] if 'StableCascade' in pipe.__class__.__name__: embedding_type = -(clip_skip) - elif 'XL' in pipe.__class__.__name__ or 'SD3' in pipe.__class__.__name__: + elif 'XL' in pipe.__class__.__name__: embedding_type = -(clip_skip + 1) else: embedding_type = clip_skip @@ -237,7 +255,7 @@ def pad_to_same_length(pipe, embeds): if not hasattr(pipe, 'encode_prompt') and 'StableCascade' not in pipe.__class__.__name__: return embeds device = pipe.device if str(pipe.device) != 'meta' else devices.device - if shared.opts.diffusers_zeros_prompt_pad: + if shared.opts.diffusers_zeros_prompt_pad or 'StableDiffusion3' in pipe.__class__.__name__: empty_embed = [torch.zeros((1, 77, embeds[0].shape[2]), device=device, dtype=embeds[0].dtype)] else: try: @@ -276,14 +294,15 @@ def split_prompts(prompt, SD3 = False): if SD3 and prompt3 != " ": ps, ws = get_prompts_with_weights(prompt3) - prompt3 = ", ".join(ps) + prompt3 = " ".join(ps) return prompt, prompt2, prompt3 + def get_weighted_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", clip_skip: int = None): device = pipe.device if str(pipe.device) != 'meta' else devices.device SD3 = hasattr(pipe, 'text_encoder_3') prompt, prompt_2, prompt_3 = split_prompts(prompt, SD3) - neg_prompt, neg_prompt_2, neg_prompt_3 = split_prompts(prompt, SD3) + neg_prompt, neg_prompt_2, neg_prompt_3 = split_prompts(neg_prompt, SD3) if prompt != prompt_2: ps = [get_prompts_with_weights(p) for p in [prompt, prompt_2]] @@ -330,37 +349,28 @@ def get_weighted_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", c debug(f'Prompt: unpadded shape={prompt_embeds[0].shape} TE{i+1} ptokens={torch.count_nonzero(ptokens)} ntokens={torch.count_nonzero(ntokens)} time={(time.time() - t0):.3f}') if SD3: t0 = time.time() - for i in range(len(prompt_embeds)): - pooled_prompt_embeds.append(prompt_embeds[i][ - torch.arange(prompt_embeds[i].shape[0], device=device), - (ptokens.to(dtype=torch.int, device=device) == 49407) - .int() - .argmax(dim=-1), - ]) - negative_pooled_prompt_embeds.append(negative_prompt_embeds[i][ - torch.arange(negative_prompt_embeds[i].shape[0], device=device), - (ntokens.to(dtype=torch.int, device=device) == 49407) - .int() - .argmax(dim=-1), - ]) + pooled_prompt_embeds.append(embedding_providers[0].get_pooled_embeddings(texts=positives[0] if len(positives[0]) == 1 else [" ".join(positives[0])], device=device)) + pooled_prompt_embeds.append(embedding_providers[1].get_pooled_embeddings(texts=positives[-1] if len(positives[-1]) == 1 else [" ".join(positives[-1])], device=device)) + negative_pooled_prompt_embeds.append(embedding_providers[0].get_pooled_embeddings(texts=negatives[0] if len(negatives[0]) == 1 else [" ".join(negatives[0])], device=device)) + negative_pooled_prompt_embeds.append(embedding_providers[1].get_pooled_embeddings(texts=negatives[-1] if len(negatives[-1]) == 1 else [" ".join(negatives[-1])], device=device)) pooled_prompt_embeds = torch.cat(pooled_prompt_embeds, dim=-1) negative_pooled_prompt_embeds = torch.cat(negative_pooled_prompt_embeds, dim=-1) debug(f'Prompt: pooled shape={pooled_prompt_embeds[0].shape} time={(time.time() - t0):.3f}') elif prompt_embeds[-1].shape[-1] > 768: t0 = time.time() if shared.opts.diffusers_pooled == "weighted": - pooled_prompt_embeds = prompt_embeds[-1][ + pooled_prompt_embeds = embedding_providers[-1].text_encoder.text_projection(prompt_embeds[-1][ torch.arange(prompt_embeds[-1].shape[0], device=device), (ptokens.to(dtype=torch.int, device=device) == 49407) .int() .argmax(dim=-1), - ] - negative_pooled_prompt_embeds = negative_prompt_embeds[-1][ + ]) + negative_pooled_prompt_embeds = embedding_providers[-1].text_encoder.text_projection(negative_prompt_embeds[-1][ torch.arange(negative_prompt_embeds[-1].shape[0], device=device), (ntokens.to(dtype=torch.int, device=device) == 49407) .int() .argmax(dim=-1), - ] + ]) else: try: pooled_prompt_embeds = embedding_providers[-1].get_pooled_embeddings(texts=[prompt_2], device=device) if prompt_embeds[-1].shape[-1] > 768 else None diff --git a/modules/sd_models.py b/modules/sd_models.py index 81214bae5..22926447d 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -1158,6 +1158,9 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No sd_model.embedding_db.load_textual_inversion_embeddings(force_reload=True) timer.record("embeddings") + from modules.prompt_parser_diffusers import insert_parser_highjack + insert_parser_highjack(sd_model.__class__.__name__) + set_diffuser_options(sd_model, vae, op) if op == 'model': sd_vae.apply_vae_config(shared.sd_model.sd_checkpoint_info.filename, vae_file, sd_model) From 092a326c09e8ba81e5d913f34a4499ad7bcf92e1 Mon Sep 17 00:00:00 2001 From: Disty0 Date: Thu, 20 Jun 2024 14:47:30 +0300 Subject: [PATCH 48/81] Add torch_gc to state.nextjob, vae and upscale --- modules/devices.py | 9 ++++++--- modules/processing_helpers.py | 1 + modules/processing_vae.py | 2 ++ modules/shared_state.py | 2 ++ 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/modules/devices.py b/modules/devices.py index f3bdbaffe..9f762b544 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -140,13 +140,16 @@ def torch_gc(force=False): used_gpu = round(100 * gpu.get('used', 0) / gpu.get('total', 1)) if gpu.get('total', 1) > 1 else 0 used_ram = round(100 * ram.get('used', 0) / ram.get('total', 1)) if ram.get('total', 1) > 1 else 0 global previous_oom # pylint: disable=global-statement + if force or shared.opts.torch_gc_threshold == 0: + log.debug(f'Forced Torch GC: GPU={used_gpu}% RAM={used_ram}% {mem}') + force = True + elif used_gpu >= shared.opts.torch_gc_threshold or used_ram >= shared.opts.torch_gc_threshold: + log.info(f'High memory utilization: GPU={used_gpu}% RAM={used_ram}% {mem}') + force = True if oom > previous_oom: previous_oom = oom log.warning(f'GPU out-of-memory error: {mem}') force = True - if used_gpu >= shared.opts.torch_gc_threshold or used_ram >= shared.opts.torch_gc_threshold: - log.info(f'High memory utilization: GPU={used_gpu}% RAM={used_ram}% {mem}') - force = True if not force: return diff --git a/modules/processing_helpers.py b/modules/processing_helpers.py index 5baf0193f..486dc09a5 100644 --- a/modules/processing_helpers.py +++ b/modules/processing_helpers.py @@ -400,6 +400,7 @@ def resize_hires(p, latents): # input=latents output=pil if not latent_upscaler else: resized_image = img resized_images.append(resized_image) + devices.torch_gc() return resized_images diff --git a/modules/processing_vae.py b/modules/processing_vae.py index 5db51a176..9b295e39c 100644 --- a/modules/processing_vae.py +++ b/modules/processing_vae.py @@ -140,6 +140,7 @@ def vae_decode(latents, model, output_type='np', full_quality=True): if shared.cmd_opts.profile: t1 = time.time() shared.log.debug(f'Profile: VAE decode: {t1-t0:.2f}') + devices.torch_gc() return imgs @@ -155,4 +156,5 @@ def vae_encode(image, model, full_quality=True): # pylint: disable=unused-variab latents = full_vae_encode(image=tensor, model=shared.sd_model) else: latents = taesd_vae_encode(image=tensor) + devices.torch_gc() return latents diff --git a/modules/shared_state.py b/modules/shared_state.py index 470ee19c7..79ee20f19 100644 --- a/modules/shared_state.py +++ b/modules/shared_state.py @@ -41,10 +41,12 @@ def pause(self): log.debug(f'Requested {"pause" if self.paused else "continue"}') def nextjob(self): + import modules.devices self.do_set_current_image() self.job_no += 1 self.sampling_step = 0 self.current_image_sampling_step = 0 + modules.devices.torch_gc() def dict(self): obj = { From 9be91e2e2d70fd23817e8ea853eacdac9514efe8 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Thu, 20 Jun 2024 09:01:24 -0400 Subject: [PATCH 49/81] update notes --- CHANGELOG.md | 6 +++--- TODO.md | 1 + modules/prompt_parser_diffusers.py | 5 ++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3dec59dce..506d2df5a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,12 +3,11 @@ ## Pending - Diffusers==0.30.0 -- https://github.com/huggingface/diffusers/pull/8566 - https://github.com/huggingface/diffusers/pull/8584 -## Update for 2024-06-19 +## Update for 2024-06-20 -### Highlights for 2024-06-19 +### Highlights for 2024-06-20 Following zero-day **SD3** release, a week later here's a refresh with more than a few improvements. But there's more than SD3: @@ -51,6 +50,7 @@ But there's more than SD3: - improved google.colab support - css tweaks for standardui - css tweaks for modernui +- additional torch gc checks, thanks @Disty0! ### Fixes diff --git a/TODO.md b/TODO.md index fd704b838..0647494dc 100644 --- a/TODO.md +++ b/TODO.md @@ -11,6 +11,7 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma - diffusers public callbacks - include reference styles - lora: sc lora, dora, etc +- sd3 controlnet: ## Experimental diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py index 9174426e4..6404b47c0 100644 --- a/modules/prompt_parser_diffusers.py +++ b/modules/prompt_parser_diffusers.py @@ -12,10 +12,9 @@ debug = shared.log.trace if os.environ.get('SD_PROMPT_DEBUG', None) is not None else lambda *args, **kwargs: None debug('Trace: PROMPT') orig_encode_token_ids_to_embeddings = EmbeddingsProvider._encode_token_ids_to_embeddings # pylint: disable=protected-access -token_dict = None -token_type = None +token_dict = None # used by helper get_tokens +token_type = None # used by helper get_tokens cache = {} -cache_type = None def compel_hijack(self, token_ids: torch.Tensor, From 50431c0a9273cc568affc11327955330b2aa3a5f Mon Sep 17 00:00:00 2001 From: Disty0 Date: Thu, 20 Jun 2024 16:17:05 +0300 Subject: [PATCH 50/81] ROCm fix memory exceptions --- CHANGELOG.md | 1 + modules/sd_vae_approx.py | 11 +++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 506d2df5a..4073b1809 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -68,6 +68,7 @@ But there's more than SD3: - fix sdxl "has been incorrectly initialized" - fix api face-hires - fix api ip-adapter +- fix memory exceptions with ROCm, thanks @Disty0! - cleanup image metadata - restructure api examples: `cli/api-*` - handle theme fallback when invalid theme is specified diff --git a/modules/sd_vae_approx.py b/modules/sd_vae_approx.py index e66b78011..1e5984145 100644 --- a/modules/sd_vae_approx.py +++ b/modules/sd_vae_approx.py @@ -34,19 +34,22 @@ def forward(self, x): def nn_approximation(sample): # Approximate NN global sd_vae_approx_model # pylint: disable=global-statement + # ROCm throws memory exceptions and crashes the GPU with it if we use approx on the GPU + device = devices.device if devices.backend != "rocm" else "cpu" + dtype = devices.dtype_vae if devices.backend != "rocm" else torch.float32 if sd_vae_approx_model is None: model_path = os.path.join(paths.models_path, "VAE-approx", "model.pt") sd_vae_approx_model = VAEApprox() if not os.path.exists(model_path): model_path = os.path.join(paths.script_path, "models", "VAE-approx", "model.pt") - approx_weights = torch.load(model_path, map_location='cpu' if devices.device.type != 'cuda' else None) + approx_weights = torch.load(model_path, map_location='cpu' if devices.device.type != 'cuda' or devices.backend == "rocm" else None) sd_vae_approx_model.load_state_dict(approx_weights) sd_vae_approx_model.eval() - sd_vae_approx_model.to(devices.device, sample.dtype) + sd_vae_approx_model.to(device, dtype) shared.log.debug(f'VAE load: type=approximate model={model_path}') try: - in_sample = sample.to(devices.device).unsqueeze(0) - sd_vae_approx_model.to(devices.device, devices.dtype) + in_sample = sample.to(device, dtype).unsqueeze(0) + sd_vae_approx_model.to(device, dtype) x_sample = sd_vae_approx_model(in_sample) x_sample = x_sample[0].detach().cpu() return x_sample From 4e80e6c40ccdc889d102e3fe1dc4749b7c50cbb4 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Thu, 20 Jun 2024 11:45:49 -0400 Subject: [PATCH 51/81] add option to disable text-encoder cache --- modules/prompt_parser_diffusers.py | 29 ++++++++++++++++------------- modules/shared.py | 7 ++++--- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py index 6404b47c0..13f50e432 100644 --- a/modules/prompt_parser_diffusers.py +++ b/modules/prompt_parser_diffusers.py @@ -132,7 +132,7 @@ def encode_prompts(pipe, p, prompts: list, negative_prompts: list, steps: int, c if 'StableDiffusion' not in pipe.__class__.__name__ and 'DemoFusion' not in pipe.__class__.__name__ and 'StableCascade' not in pipe.__class__.__name__: shared.log.warning(f"Prompt parser not supported: {pipe.__class__.__name__}") return - elif prompts == cache.get('prompts', None) and negative_prompts == cache.get('negative_prompts', None) and clip_skip == cache.get('clip_skip', None) and cache.get('model_type', None) == shared.sd_model_type and steps == cache.get('steps', None): + elif shared.opts.sd_textencoder_cache and prompts == cache.get('prompts', None) and negative_prompts == cache.get('negative_prompts', None) and clip_skip == cache.get('clip_skip', None) and cache.get('model_type', None) == shared.sd_model_type and steps == cache.get('steps', None): p.prompt_embeds = cache.get('prompt_embeds', None) p.positive_pooleds = cache.get('positive_pooleds', None) p.negative_embeds = cache.get('negative_embeds', None) @@ -163,18 +163,21 @@ def encode_prompts(pipe, p, prompts: list, negative_prompts: list, steps: int, c if negative_pooled is not None: p.negative_pooleds.append(torch.cat([negative_pooled] * len(negative_prompts), dim=0)) - cache.update({ - 'prompt_embeds': p.prompt_embeds, - 'negative_embeds': p.negative_embeds, - 'positive_pooleds': p.positive_pooleds, - 'negative_pooleds': p.negative_pooleds, - 'scheduled_prompt': p.scheduled_prompt, - 'prompts': prompts, - 'negative_prompts': negative_prompts, - 'clip_skip': clip_skip, - 'steps': steps, - 'model_type': shared.sd_model_type - }) + if shared.opts.sd_textencoder_cache: + cache.update({ + 'prompt_embeds': p.prompt_embeds, + 'negative_embeds': p.negative_embeds, + 'positive_pooleds': p.positive_pooleds, + 'negative_pooleds': p.negative_pooleds, + 'scheduled_prompt': p.scheduled_prompt, + 'prompts': prompts, + 'negative_prompts': negative_prompts, + 'clip_skip': clip_skip, + 'steps': steps, + 'model_type': shared.sd_model_type + }) + else: + cache.clear() if debug_enabled: get_tokens('positive', prompts[0]) get_tokens('negative', negative_prompts[0]) diff --git a/modules/shared.py b/modules/shared.py index de2cebf64..81b32fa8b 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -386,19 +386,20 @@ def temp_disable_extensions(): sdp_options_default = ['Flash attention', 'Memory attention', 'Math attention'] options_templates.update(options_section(('sd', "Execution & Models"), { - "sd_backend": OptionInfo(default_backend, "Execution backend", gr.Radio, {"choices": ["original", "diffusers"] }), + "sd_backend": OptionInfo(default_backend, "Execution backend", gr.Radio, {"choices": ["diffusers", "original"] }), "sd_model_checkpoint": OptionInfo(default_checkpoint, "Base model", gr.Dropdown, lambda: {"choices": list_checkpoint_tiles()}, refresh=refresh_checkpoints), "sd_model_refiner": OptionInfo('None', "Refiner model", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints), "sd_vae": OptionInfo("Automatic", "VAE model", gr.Dropdown, lambda: {"choices": shared_items.sd_vae_items()}, refresh=shared_items.refresh_vae_list), "sd_unet": OptionInfo("None", "UNET model", gr.Dropdown, lambda: {"choices": shared_items.sd_unet_items()}, refresh=shared_items.refresh_unet_list), "sd_text_encoder": OptionInfo('None', "Text encoder model", gr.Dropdown, lambda: {"choices": ['None', 'T5 FP4', 'T5 FP8', 'T5 INT8', 'T5 FP16']}), - "sd_checkpoint_autoload": OptionInfo(True, "Model autoload on start"), "sd_model_dict": OptionInfo('None', "Use separate base dict", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints), + "sd_checkpoint_autoload": OptionInfo(True, "Model autoload on start"), + "sd_textencoder_cache": OptionInfo(True, "Cache text encoder results"), "stream_load": OptionInfo(False, "Load models using stream loading method", gr.Checkbox, {"visible": not native }), "model_reuse_dict": OptionInfo(False, "Reuse loaded model dictionary", gr.Checkbox, {"visible": False}), - "prompt_attention": OptionInfo("Full parser", "Prompt attention parser", gr.Radio, {"choices": ["Full parser", "Compel parser", "A1111 parser", "Fixed attention"] }), "prompt_mean_norm": OptionInfo(False, "Prompt attention normalization", gr.Checkbox), "comma_padding_backtrack": OptionInfo(20, "Prompt padding", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1, "visible": not native }), + "prompt_attention": OptionInfo("Full parser", "Prompt attention parser", gr.Radio, {"choices": ["Full parser", "Compel parser", "A1111 parser", "Fixed attention"] }), "sd_checkpoint_cache": OptionInfo(0, "Cached models", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1, "visible": not native }), "sd_vae_checkpoint_cache": OptionInfo(0, "Cached VAEs", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1, "visible": False}), "sd_disable_ckpt": OptionInfo(False, "Disallow models in ckpt format", gr.Checkbox, {"visible": False}), From 21b73df51afe1c5e852ca1b316aa89d651eb4b44 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Thu, 20 Jun 2024 11:57:26 -0400 Subject: [PATCH 52/81] correct python check --- installer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installer.py b/installer.py index 601cda66b..aaaad060b 100644 --- a/installer.py +++ b/installer.py @@ -399,7 +399,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None): if args.quick: return log.info(f'Python version={platform.python_version()} platform={platform.system()} bin="{sys.executable}" venv="{sys.prefix}"') - if int(sys.version_info.major) == 3 and int(sys.version_info.minor) == 12 and int(sys.version_info.minor) > 3: # TODO python 3.12.4 or higher cause a mess with pydantic + if int(sys.version_info.major) == 3 and int(sys.version_info.minor) == 12 and int(sys.version_info.micro) > 3: # TODO python 3.12.4 or higher cause a mess with pydantic log.error(f"Incompatible Python version: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro} required 3.12.3 or lower") if reason is not None: log.error(reason) From 1a6d313340501e3ec7e851bfd4c7c075e6f9e175 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Thu, 20 Jun 2024 13:46:31 -0400 Subject: [PATCH 53/81] cleanup --- modules/pag/pipe_sdxl.py | 56 ++++++++++---------- modules/sd_models.py | 2 + modules/sd_samplers_common.py | 7 ++- scripts/{face-details.py => face_details.py} | 0 4 files changed, 34 insertions(+), 31 deletions(-) rename scripts/{face-details.py => face_details.py} (100%) diff --git a/modules/pag/pipe_sdxl.py b/modules/pag/pipe_sdxl.py index 13ba99bd5..28690c577 100644 --- a/modules/pag/pipe_sdxl.py +++ b/modules/pag/pipe_sdxl.py @@ -461,8 +461,10 @@ def __init__( image_encoder=image_encoder, feature_extractor=feature_extractor, ) - self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt) - self.register_to_config(requires_aesthetics_score=requires_aesthetics_score) + if 'force_zeros_for_empty_prompt' in self.config: + self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt) + if 'requires_aesthetics_score' in self.config: + self.register_to_config(requires_aesthetics_score=requires_aesthetics_score) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.default_sample_size = self.unet.config.sample_size @@ -1500,7 +1502,7 @@ def __call__( else: replace_processor = PAGIdentitySelfAttnProcessor() - if(self.pag_applied_layers_index): + if self.pag_applied_layers_index: drop_layers = self.pag_applied_layers_index for drop_layer in drop_layers: layer_number = int(drop_layer[1:]) @@ -1517,7 +1519,7 @@ def __call__( raise ValueError( f"Invalid layer index: {drop_layer}. Available layers: {len(down_layers)} down layers, {len(mid_layers)} mid layers, {len(up_layers)} up layers." ) - elif(self.pag_applied_layers): + elif self.pag_applied_layers: drop_full_layers = self.pag_applied_layers for drop_full_layer in drop_full_layers: try: @@ -1621,7 +1623,7 @@ def __call__( if XLA_AVAILABLE: xm.mark_step() - if not output_type == "latent": + if output_type != "latent": # make sure the VAE is in float32 mode, as it overflows in float16 needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast @@ -1656,7 +1658,7 @@ def __call__( else: image = latents - if not output_type == "latent": + if output_type != "latent": # apply watermark if available if self.watermark is not None: image = self.watermark.apply_watermark(image) @@ -1671,7 +1673,7 @@ def __call__( #Change the attention layers back to original ones after PAG was applied if self.do_adversarial_guidance: - if(self.pag_applied_layers_index): + if self.pag_applied_layers_index: drop_layers = self.pag_applied_layers_index for drop_layer in drop_layers: layer_number = int(drop_layer[1:]) @@ -1685,26 +1687,22 @@ def __call__( else: raise ValueError(f"Invalid layer type: {drop_layer[0]}") except IndexError: - raise ValueError( - f"Invalid layer index: {drop_layer}. Available layers: {len(down_layers)} down layers, {len(mid_layers)} mid layers, {len(up_layers)} up layers." - ) - elif(self.pag_applied_layers): - drop_full_layers = self.pag_applied_layers - for drop_full_layer in drop_full_layers: - try: - if drop_full_layer == "down": - for down_layer in down_layers: - down_layer.processor = AttnProcessor2_0() - elif drop_full_layer == "mid": - for mid_layer in mid_layers: - mid_layer.processor = AttnProcessor2_0() - elif drop_full_layer == "up": - for up_layer in up_layers: - up_layer.processor = AttnProcessor2_0() - else: - raise ValueError(f"Invalid layer type: {drop_full_layer}") - except IndexError: - raise ValueError( - f"Invalid layer index: {drop_full_layer}. Available layers are: down, mid and up. If you need to specify each layer index, you can use `pag_applied_layers_index`" - ) + raise ValueError(f"Invalid layer index: {drop_layer}. Available layers: {len(down_layers)} down layers, {len(mid_layers)} mid layers, {len(up_layers)} up layers.") + elif self.pag_applied_layers: + drop_full_layers = self.pag_applied_layers + for drop_full_layer in drop_full_layers: + try: + if drop_full_layer == "down": + for down_layer in down_layers: + down_layer.processor = AttnProcessor2_0() + elif drop_full_layer == "mid": + for mid_layer in mid_layers: + mid_layer.processor = AttnProcessor2_0() + elif drop_full_layer == "up": + for up_layer in up_layers: + up_layer.processor = AttnProcessor2_0() + else: + raise ValueError(f"Invalid layer type: {drop_full_layer}") + except IndexError: + raise ValueError(f"Invalid layer index: {drop_full_layer}. Available layers are: down, mid and up. If you need to specify each layer index, you can use `pag_applied_layers_index`") return StableDiffusionXLPipelineOutput(images=image) diff --git a/modules/sd_models.py b/modules/sd_models.py index be01df7b3..9de486987 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -1113,6 +1113,8 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No if sd_model is None: shared.log.error('Diffuser model not loaded') return + if 'requires_aesthetics_score' in sd_model.config: + sd_model.register_to_config(requires_aesthetics_score=False) sd_model.sd_model_hash = checkpoint_info.calculate_shorthash() # pylint: disable=attribute-defined-outside-init sd_model.sd_checkpoint_info = checkpoint_info # pylint: disable=attribute-defined-outside-init sd_model.sd_model_checkpoint = checkpoint_info.filename # pylint: disable=attribute-defined-outside-init diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index c6ef9131f..8d6694f5c 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -40,8 +40,11 @@ def single_sample_to_image(sample, approximation=None): warn_once('Unknown decode type') approximation = 0 # normal sample is [4,64,64] - if sample.dtype == torch.bfloat16: - sample = sample.to(torch.float16) + try: + if sample.dtype == torch.bfloat16: + sample = sample.to(torch.float16) + except Exception as e: + warn_once(f'live preview: {e}') if len(sample.shape) > 4: # likely unknown video latent (e.g. svd) return Image.new(mode="RGB", size=(512, 512)) if len(sample) == 16: # sd_cascade diff --git a/scripts/face-details.py b/scripts/face_details.py similarity index 100% rename from scripts/face-details.py rename to scripts/face_details.py From 008b6effb538864af276de4294aaa125c97724b3 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Thu, 20 Jun 2024 15:47:01 -0400 Subject: [PATCH 54/81] remove pag extra config --- modules/pag/pipe_sdxl.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/pag/pipe_sdxl.py b/modules/pag/pipe_sdxl.py index 28690c577..89a1caa76 100644 --- a/modules/pag/pipe_sdxl.py +++ b/modules/pag/pipe_sdxl.py @@ -461,10 +461,10 @@ def __init__( image_encoder=image_encoder, feature_extractor=feature_extractor, ) - if 'force_zeros_for_empty_prompt' in self.config: - self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt) - if 'requires_aesthetics_score' in self.config: - self.register_to_config(requires_aesthetics_score=requires_aesthetics_score) + # if 'force_zeros_for_empty_prompt' in self.config: + # self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt) + # if 'requires_aesthetics_score' in self.config: + # self.register_to_config(requires_aesthetics_score=requires_aesthetics_score) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.default_sample_size = self.unet.config.sample_size From ea61900a4cdc83f91518614af952ba11d824b5da Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Thu, 20 Jun 2024 18:03:15 -0400 Subject: [PATCH 55/81] fix bfloat and pag --- modules/pag/pipe_sdxl.py | 7 +++---- modules/sd_samplers_common.py | 1 - modules/sd_vae_approx.py | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/modules/pag/pipe_sdxl.py b/modules/pag/pipe_sdxl.py index 89a1caa76..429384ea3 100644 --- a/modules/pag/pipe_sdxl.py +++ b/modules/pag/pipe_sdxl.py @@ -461,10 +461,9 @@ def __init__( image_encoder=image_encoder, feature_extractor=feature_extractor, ) - # if 'force_zeros_for_empty_prompt' in self.config: - # self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt) - # if 'requires_aesthetics_score' in self.config: - # self.register_to_config(requires_aesthetics_score=requires_aesthetics_score) + if 'requires_aesthetics_score' in self.config: + self.register_to_config(requires_aesthetics_score=requires_aesthetics_score) + self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.default_sample_size = self.unet.config.sample_size diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index 8d6694f5c..54a38cf55 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -51,7 +51,6 @@ def single_sample_to_image(sample, approximation=None): sd_cascade = True if len(sample.shape) == 4 and sample.shape[0]: # likely animatediff latent sample = sample.permute(1, 0, 2, 3)[0] - if shared.native: # [-x,x] to [-5,5] sample_max = torch.max(sample) if sample_max > 5: diff --git a/modules/sd_vae_approx.py b/modules/sd_vae_approx.py index 1e5984145..2b4399edb 100644 --- a/modules/sd_vae_approx.py +++ b/modules/sd_vae_approx.py @@ -51,7 +51,7 @@ def nn_approximation(sample): # Approximate NN in_sample = sample.to(device, dtype).unsqueeze(0) sd_vae_approx_model.to(device, dtype) x_sample = sd_vae_approx_model(in_sample) - x_sample = x_sample[0].detach().cpu() + x_sample = x_sample[0].to(torch.float32).detach().cpu() return x_sample except Exception as e: shared.log.error(f'VAE decode approximate: {e}') From e51599cc6fee243cc0ba26a1c21df592233fe3bc Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Thu, 20 Jun 2024 18:21:42 -0400 Subject: [PATCH 56/81] disable model load fallback on unknown model --- modules/sd_models.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/modules/sd_models.py b/modules/sd_models.py index 9de486987..11bb5602a 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -255,15 +255,16 @@ def select_checkpoint(op='model'): shared.log.info(" or use --ckpt-dir to specify folder with sd models") shared.log.info(" or use --ckpt to force using specific model") return None - checkpoint_info = next(iter(checkpoints_list.values())) + # checkpoint_info = next(iter(checkpoints_list.values())) if model_checkpoint is not None: if model_checkpoint != 'model.ckpt' and model_checkpoint != 'runwayml/stable-diffusion-v1-5': - shared.log.warning(f"Selected checkpoint not found: {model_checkpoint}") + shared.log.warning(f'Selected: {op}="{model_checkpoint}" not found') else: shared.log.info("Selecting first available checkpoint") # shared.log.warning(f"Loading fallback checkpoint: {checkpoint_info.title}") - shared.opts.data['sd_model_checkpoint'] = checkpoint_info.title - shared.log.info(f'Select: {op}="{checkpoint_info.title if checkpoint_info is not None else None}"') + # shared.opts.data['sd_model_checkpoint'] = checkpoint_info.title + else: + shared.log.info(f'Select: {op}="{checkpoint_info.title if checkpoint_info is not None else None}"') return checkpoint_info @@ -936,6 +937,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No checkpoint_info = checkpoint_info or select_checkpoint(op=op) if checkpoint_info is None: + print('HERE1') unload_model_weights(op=op) return @@ -1574,6 +1576,7 @@ def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model', else: load_diffuser(checkpoint_info, already_loaded_state_dict=state_dict, timer=timer, op=op) if load_dict and next_checkpoint_info is not None: + print('HERE2') model_data.sd_dict = shared.opts.sd_model_dict shared.opts.data["sd_model_checkpoint"] = next_checkpoint_info.title reload_model_weights(reuse_dict=True) # ok we loaded dict now lets redo and load model on top of it @@ -1587,6 +1590,7 @@ def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model', shared.opts.data["sd_model_refiner"] = checkpoint_info.title return model_data.sd_refiner + print('HERE3') # fallback shared.log.info(f"Loading using fallback: {op} model={checkpoint_info.title}") try: From f38fc1790614150c4d884073a430ae10603dd717 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Fri, 21 Jun 2024 08:17:07 -0400 Subject: [PATCH 57/81] update requirements --- CHANGELOG.md | 9 ++------- requirements.txt | 4 ++-- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4073b1809..219e735b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,8 @@ # Change Log for SD.Next -## Pending +## Update for 2024-06-21 -- Diffusers==0.30.0 -- https://github.com/huggingface/diffusers/pull/8584 - -## Update for 2024-06-20 - -### Highlights for 2024-06-20 +### Highlights for 2024-06-21 Following zero-day **SD3** release, a week later here's a refresh with more than a few improvements. But there's more than SD3: diff --git a/requirements.txt b/requirements.txt index 681f1b4d1..555d99e65 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,7 +27,7 @@ fasteners orjson invisible-watermark pi-heif -diffusers==0.29.0 +diffusers==0.29.1 safetensors==0.4.3 tensordict==0.1.2 peft==0.11.1 @@ -54,7 +54,7 @@ protobuf==4.25.3 pytorch_lightning==1.9.4 tokenizers==0.19.1 transformers==4.41.2 -urllib3==1.26.18 +urllib3==1.26.19 Pillow==10.3.0 timm==0.9.16 pydantic==1.10.15 From 0aaabfc2e6b2e343f9c16d058253e6b5fcc0c5fd Mon Sep 17 00:00:00 2001 From: Disty0 Date: Fri, 21 Jun 2024 15:18:09 +0300 Subject: [PATCH 58/81] NNCF fix Lora support without reloading --- extensions-builtin/Lora/network_lora.py | 3 ++- extensions-builtin/Lora/networks.py | 5 ----- modules/sd_models_compile.py | 3 --- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/extensions-builtin/Lora/network_lora.py b/extensions-builtin/Lora/network_lora.py index 76a8322da..5194222a0 100644 --- a/extensions-builtin/Lora/network_lora.py +++ b/extensions-builtin/Lora/network_lora.py @@ -24,7 +24,8 @@ def create_module(self, weights, key, none_ok=False): weight = weights.get(key) if weight is None and none_ok: return None - is_linear = type(self.sd_module) in [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear, torch.nn.MultiheadAttention, diffusers_lora.LoRACompatibleLinear] + linear_modules = [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear, torch.nn.MultiheadAttention, diffusers_lora.LoRACompatibleLinear] + is_linear = type(self.sd_module) in linear_modules or self.sd_module.__class__.__name__ == "NNCFLinear" is_conv = type(self.sd_module) in [torch.nn.Conv2d, diffusers_lora.LoRACompatibleConv] if is_linear: weight = weight.reshape(weight.shape[0], -1) diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py index 3ebad5f17..71b5b29dc 100644 --- a/extensions-builtin/Lora/networks.py +++ b/extensions-builtin/Lora/networks.py @@ -185,13 +185,10 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No shared.compiled_model_state.lora_model = [] if recompile_model: backup_cuda_compile = shared.opts.cuda_compile - backup_nncf_compress_weights = shared.opts.nncf_compress_weights sd_models.unload_model_weights(op='model') shared.opts.cuda_compile = False - shared.opts.nncf_compress_weights = [] sd_models.reload_model_weights(op='model') shared.opts.cuda_compile = backup_cuda_compile - shared.opts.nncf_compress_weights = backup_nncf_compress_weights loaded_networks.clear() for i, (network_on_disk, name) in enumerate(zip(networks_on_disk, names)): @@ -235,8 +232,6 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No if recompile_model: shared.log.info("LoRA recompiling model") backup_lora_model = shared.compiled_model_state.lora_model - if shared.opts.nncf_compress_weights and not (shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx"): - shared.sd_model = sd_models_compile.nncf_compress_weights(shared.sd_model) if shared.opts.cuda_compile: shared.sd_model = sd_models_compile.compile_diffusers(shared.sd_model) diff --git a/modules/sd_models_compile.py b/modules/sd_models_compile.py index 6e629788c..ed3109869 100644 --- a/modules/sd_models_compile.py +++ b/modules/sd_models_compile.py @@ -140,9 +140,6 @@ def nncf_compress_weights(sd_model): from installer import install install('nncf==2.7.0', quiet=True) - shared.compiled_model_state = CompiledModelState() - shared.compiled_model_state.is_compiled = True - sd_model = apply_compile_to_model(sd_model, nncf_compress_model, shared.opts.nncf_compress_weights, op="nncf") t1 = time.time() From a1beb777fcde7a75ead74e1e980a35696bf78031 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Fri, 21 Jun 2024 08:35:32 -0400 Subject: [PATCH 59/81] update notes --- CHANGELOG.md | 18 ++++++++++++------ modules/sd_models.py | 16 ++++++++-------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 219e735b7..581940f55 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,24 +4,31 @@ ### Highlights for 2024-06-21 -Following zero-day **SD3** release, a week later here's a refresh with more than a few improvements. -But there's more than SD3: -- support for quantized **T5** text encoder in all models that use T5: FP4/FP8/FP16/INT8 (SD3, PixArt-Σ, etc) -- support for **PixArt-Sigma** in small/medium/large variants +Following zero-day **SD3** release, a week later here's a refresh with 10+ improvements +including full prompt attention, support for compressed weights, additional text-encoder quantization modes. + +But there's more than SD3: +- support for quantized **T5** text encoder in all models that use T5: FP4/FP8/FP16/INT8 (SD3, PixArt-Σ, etc) +- support for **PixArt-Sigma** in small/medium/large variants - support for **HunyuanDiT 1.1** - (finally) new release of **Torch-DirectML** +- over 20 overall fixes ### Model Improvements - **SD3**: enable tiny-VAE (TAESD) preview and non-full quality mode - SD3: enable base LoRA support - SD3: add support for FP4 quantized T5 text encoder - simply select in *settings -> model -> text encoder* + simply select in *settings -> model -> text encoder* + *note* for SD3 with T5, set SD.Next to use FP16 precision, not BF16 precision - SD3: add support for INT8 quantized T5 text encoder, thanks @Disty0! - SD3: enable cpu-offloading for T5 text encoder, thanks @Disty0! - SD3: simplified loading of model in single-file safetensors format model load can now be performed fully offline - SD3: add support for NNCF compressed weights, thanks @Disty0! +- SD3: full support for prompt parsing and attention, thanks @AI-Casanova! +- SD3: ability to target different prompts to each of text-encoders, thanks @AI-Casanova! + example: `dog TE2: cat TE3: bird` - SD3: add support for sampler shift for Euler FlowMatch see *settings -> samplers*, also available as param in xyz grid higher shift means model will spend more time on structure and less on details @@ -32,7 +39,6 @@ But there's more than SD3: simply select in *settings -> model -> text encoder* before or after model load - **HunyuanDiT**: support for model version 1.1 - ### Improvements: General - support FP4 quantized T5 text encoder, in addtion to existing FP8 and FP16 diff --git a/modules/sd_models.py b/modules/sd_models.py index 80a852b0a..e27372492 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -562,30 +562,30 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False): # guess by size if os.path.isfile(f) and f.endswith('.safetensors'): size = round(os.path.getsize(f) / 1024 / 1024) - if size < 128: + if (size < 128): warn(f'Model size smaller than expected: {f} size={size} MB') elif (size >= 316 and size <= 324) or (size >= 156 and size <= 164): # 320 or 160 warn(f'Model detected as VAE model, but attempting to load as model: {op}={f} size={size} MB') guess = 'VAE' - elif size >= 4970 and size <= 4976: # 4973 + elif (size >= 4970 and size <= 4976): # 4973 guess = 'Stable Diffusion 2' # SD v2 but could be eps or v-prediction # elif size < 0: # unknown # guess = 'Stable Diffusion 2B' - elif size >= 5791 and size <= 5799: # 5795 + elif (size >= 5791 and size <= 5799): # 5795 if op == 'model': warn(f'Model detected as SD-XL refiner model, but attempting to load a base model: {op}={f} size={size} MB') guess = 'Stable Diffusion XL Refiner' elif (size >= 6611 and size <= 7220): # 6617, HassakuXL is 6776, monkrenRealisticINT_v10 is 7217 guess = 'Stable Diffusion XL' - elif size >= 3361 and size <= 3369: # 3368 + elif (size >= 3361 and size <= 3369): # 3368 guess = 'Stable Diffusion Upscale' - elif size >= 4891 and size <= 4899: # 4897 + elif (size >= 4891 and size <= 4899): # 4897 guess = 'Stable Diffusion XL Inpaint' - elif size >= 9791 and size <= 9799: # 9794 + elif (size >= 9791 and size <= 9799): # 9794 guess = 'Stable Diffusion XL Instruct' - elif size > 3138 and size < 3142: #3140 + elif (size > 3138 and size < 3142): #3140 guess = 'Stable Diffusion XL' - elif size > 5692 and size < 5698 or size > 4134 and size < 4138: + elif (size > 5692 and size < 5698) or (size > 4134 and size < 4138) or (size > 10362 and size < 10366): guess = 'Stable Diffusion 3' # guess by name """ From 323b20813aa1493302d6d48b46f9bec981a45ea2 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Fri, 21 Jun 2024 08:58:33 -0400 Subject: [PATCH 60/81] fix prompt parsing --- modules/prompt_parser_diffusers.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py index 3f5f5de56..5a2b38163 100644 --- a/modules/prompt_parser_diffusers.py +++ b/modules/prompt_parser_diffusers.py @@ -295,7 +295,7 @@ def split_prompts(prompt, SD3 = False): prompt3 = " " if prompt3.strip() == "" else prompt3.strip() if SD3 and prompt3 != " ": - ps, ws = get_prompts_with_weights(prompt3) + ps, _ws = get_prompts_with_weights(prompt3) prompt3 = " ".join(ps) return prompt, prompt2, prompt3 @@ -385,11 +385,15 @@ def get_weighted_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", c prompt_embeds = torch.cat(prompt_embeds, dim=-1) if len(prompt_embeds) > 1 else prompt_embeds[0] negative_prompt_embeds = torch.cat(negative_prompt_embeds, dim=-1) if len(negative_prompt_embeds) > 1 else \ negative_prompt_embeds[0] + if pooled_prompt_embeds == []: + pooled_prompt_embeds = None + if negative_pooled_prompt_embeds == []: + negative_pooled_prompt_embeds = None debug(f'Prompt: positive={prompt_embeds.shape if prompt_embeds is not None else None} pooled={pooled_prompt_embeds.shape if pooled_prompt_embeds is not None else None} negative={negative_prompt_embeds.shape if negative_prompt_embeds is not None else None} pooled={negative_pooled_prompt_embeds.shape if negative_pooled_prompt_embeds is not None else None}') if prompt_embeds.shape[1] != negative_prompt_embeds.shape[1]: [prompt_embeds, negative_prompt_embeds] = pad_to_same_length(pipe, [prompt_embeds, negative_prompt_embeds]) if SD3: - t5_prompt_embed = pipe._get_t5_prompt_embeds( + t5_prompt_embed = pipe._get_t5_prompt_embeds( # pylint: disable=protected-access prompt=prompt_3, num_images_per_prompt=prompt_embeds.shape[0], device=pipe.device, @@ -397,7 +401,7 @@ def get_weighted_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", c prompt_embeds = torch.nn.functional.pad( prompt_embeds, (0, t5_prompt_embed.shape[-1] - prompt_embeds.shape[-1])) prompt_embeds = torch.cat([prompt_embeds, t5_prompt_embed], dim=-2) - t5_negative_prompt_embed = pipe._get_t5_prompt_embeds( + t5_negative_prompt_embed = pipe._get_t5_prompt_embeds( # pylint: disable=protected-access prompt=neg_prompt_3, num_images_per_prompt=prompt_embeds.shape[0], device=pipe.device, From bc0e45d7304912f7632ab8750b0edd50051485ff Mon Sep 17 00:00:00 2001 From: Disty0 Date: Fri, 21 Jun 2024 16:09:39 +0300 Subject: [PATCH 61/81] Update changelog --- CHANGELOG.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 581940f55..4a8f6b344 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,7 +25,6 @@ But there's more than SD3: - SD3: enable cpu-offloading for T5 text encoder, thanks @Disty0! - SD3: simplified loading of model in single-file safetensors format model load can now be performed fully offline -- SD3: add support for NNCF compressed weights, thanks @Disty0! - SD3: full support for prompt parsing and attention, thanks @AI-Casanova! - SD3: ability to target different prompts to each of text-encoders, thanks @AI-Casanova! example: `dog TE2: cat TE3: bird` @@ -43,7 +42,7 @@ But there's more than SD3: - support FP4 quantized T5 text encoder, in addtion to existing FP8 and FP16 - support for T5 text-encoder loader in **all** models that use T5 - *example*: load FP4 or FP8 quantized T5 text-encoder into PixArt Sigma or Stable Cascade! + *example*: load FP4 or FP8 quantized T5 text-encoder into PixArt Sigma! - support for `torch-directml` **0.2.2**, thanks @lshqqytiger! *note*: new directml is finally based on modern `torch` 2.3.1! - extra networks: info display now contains link to source url if model if its known @@ -53,6 +52,13 @@ But there's more than SD3: - css tweaks for modernui - additional torch gc checks, thanks @Disty0! +**NNCF**, thanks @Disty0! + - SD3 and PixArt support + - moved the first compression step to CPU + - sequential cpu offload (lowvram) support + - Lora support without reloading the model + - ControlNet compression support + ### Fixes - fix unsaturated outputs, force apply vae config on model load From 43a54317ee3a7ee422acd83437968a244bf2c945 Mon Sep 17 00:00:00 2001 From: Disty0 Date: Fri, 21 Jun 2024 18:09:56 +0300 Subject: [PATCH 62/81] Fix face hires with lowvram --- CHANGELOG.md | 1 + modules/sd_models.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a8f6b344..19c778116 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -76,6 +76,7 @@ But there's more than SD3: - fix api face-hires - fix api ip-adapter - fix memory exceptions with ROCm, thanks @Disty0! +- fix face-hires with lowvram, thanks @Disty0! - cleanup image metadata - restructure api examples: `cli/api-*` - handle theme fallback when invalid theme is specified diff --git a/modules/sd_models.py b/modules/sd_models.py index e27372492..8f04b4d22 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -764,7 +764,7 @@ def move_model(model, device=None, force=False): if model is None or device is None: return if getattr(model, 'vae', None) is not None and get_diffusers_task(model) != DiffusersTaskType.TEXT_2_IMAGE: - if device == devices.device: # force vae back to gpu if not in txt2img mode + if device == devices.device and not model.vae.device.type == "meta": # force vae back to gpu if not in txt2img mode model.vae.to(device) if hasattr(model.vae, '_hf_hook'): debug_move(f'Model move: to={device} class={model.vae.__class__} fn={sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access From 8eb26b202c1482a37e696d01e0dd220107c56552 Mon Sep 17 00:00:00 2001 From: Disty0 Date: Fri, 21 Jun 2024 18:21:48 +0300 Subject: [PATCH 63/81] Update changelog --- CHANGELOG.md | 2 +- modules/sd_models.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 19c778116..19dec1762 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,7 +52,7 @@ But there's more than SD3: - css tweaks for modernui - additional torch gc checks, thanks @Disty0! -**NNCF**, thanks @Disty0! +**Improvements: NNCF**, thanks @Disty0! - SD3 and PixArt support - moved the first compression step to CPU - sequential cpu offload (lowvram) support diff --git a/modules/sd_models.py b/modules/sd_models.py index 8f04b4d22..c4a896de6 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -764,7 +764,7 @@ def move_model(model, device=None, force=False): if model is None or device is None: return if getattr(model, 'vae', None) is not None and get_diffusers_task(model) != DiffusersTaskType.TEXT_2_IMAGE: - if device == devices.device and not model.vae.device.type == "meta": # force vae back to gpu if not in txt2img mode + if device == devices.device and model.vae.device.type != "meta": # force vae back to gpu if not in txt2img mode model.vae.to(device) if hasattr(model.vae, '_hf_hook'): debug_move(f'Model move: to={device} class={model.vae.__class__} fn={sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access From 77657611c005ff024af3af02eb21a2c34d544942 Mon Sep 17 00:00:00 2001 From: Disty0 Date: Fri, 21 Jun 2024 19:30:20 +0300 Subject: [PATCH 64/81] Fix Diffusers requires_aesthetics_score --- modules/sd_models.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/modules/sd_models.py b/modules/sd_models.py index c4a896de6..7ab95193b 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -1299,10 +1299,19 @@ def switch_pipe(cls: diffusers.DiffusionPipeline, pipeline: diffusers.DiffusionP def set_diffuser_pipe(pipe, new_pipe_type): + n = getattr(pipe.__class__, '__name__', '') + + if new_pipe_type == DiffusersTaskType.TEXT_2_IMAGE and 'StableDiffusionXL' in n and 'requires_aesthetics_score' in pipe.config: + # Diffusers adds requires_aesthetics_score with img2img and complains if requires_aesthetics_score exist in txt2img + internal_dict = dict(pipe._internal_dict) + internal_dict.pop('requires_aesthetics_score', None) + del pipe._internal_dict + pipe.register_to_config(**internal_dict) + if get_diffusers_task(pipe) == new_pipe_type: return pipe + # skip specific pipelines - n = getattr(pipe.__class__, '__name__', '') if n in ['StableDiffusionReferencePipeline', 'StableDiffusionAdapterPipeline', 'AnimateDiffPipeline', 'AnimateDiffSDXLPipeline']: return pipe if 'Onnx' in pipe.__class__.__name__: From b036c2fc3b73b861147475e076aaed7bb1d657f8 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Fri, 21 Jun 2024 12:57:15 -0400 Subject: [PATCH 65/81] improve gc threshold --- CHANGELOG.md | 2 ++ modules/control/processors.py | 2 +- modules/devices.py | 15 +++++++++------ 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 19dec1762..76dac5867 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ But there's more than SD3: - support for **PixArt-Sigma** in small/medium/large variants - support for **HunyuanDiT 1.1** - (finally) new release of **Torch-DirectML** +- additional efficiencies for users with low vram gpus - over 20 overall fixes ### Model Improvements @@ -47,6 +48,7 @@ But there's more than SD3: *note*: new directml is finally based on modern `torch` 2.3.1! - extra networks: info display now contains link to source url if model if its known works for civitai and huggingface models +- force gc for lowvram users and improve gc logging - improved google.colab support - css tweaks for standardui - css tweaks for modernui diff --git a/modules/control/processors.py b/modules/control/processors.py index eccaf1e30..19683f6ea 100644 --- a/modules/control/processors.py +++ b/modules/control/processors.py @@ -139,7 +139,7 @@ def reset(self, processor_id: str = None): self.model = None self.processor_id = processor_id # self.override = None - devices.torch_gc() + # devices.torch_gc() self.load_config = { 'cache_dir': cache_dir } def config(self, processor_id = None): diff --git a/modules/devices.py b/modules/devices.py index 9f762b544..d72555192 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -140,11 +140,8 @@ def torch_gc(force=False): used_gpu = round(100 * gpu.get('used', 0) / gpu.get('total', 1)) if gpu.get('total', 1) > 1 else 0 used_ram = round(100 * ram.get('used', 0) / ram.get('total', 1)) if ram.get('total', 1) > 1 else 0 global previous_oom # pylint: disable=global-statement - if force or shared.opts.torch_gc_threshold == 0: - log.debug(f'Forced Torch GC: GPU={used_gpu}% RAM={used_ram}% {mem}') - force = True - elif used_gpu >= shared.opts.torch_gc_threshold or used_ram >= shared.opts.torch_gc_threshold: - log.info(f'High memory utilization: GPU={used_gpu}% RAM={used_ram}% {mem}') + threshold = 0 if (shared.cmd_opts.lowvram and not shared.cmd_opts.use_zluda) else shared.opts.torch_gc_threshold + if force or threshold == 0 or used_gpu >= threshold or used_ram >= threshold: force = True if oom > previous_oom: previous_oom = oom @@ -163,7 +160,13 @@ def torch_gc(force=False): except Exception: pass t1 = time.time() - log.debug(f'GC: collected={collected} device={torch.device(get_optimal_device_name())} {memstats.memory_stats()} time={round(t1 - t0, 2)}') + mem = memstats.memory_stats() + saved = round(gpu.get('used', 0) - mem.get('gpu', {}).get('used', 0), 2) + before = { 'gpu': gpu.get('used', 0), 'ram': ram.get('used', 0) } + after = { 'gpu': mem.get('gpu', {}).get('used', 0), 'ram': mem.get('ram', {}).get('used', 0), 'retries': mem.get('retries', 0), 'oom': mem.get('oom', 0) } + utilization = { 'gpu': used_gpu, 'ram': used_ram, 'threshold': threshold } + results = { 'collected': collected, 'saved': saved } + log.debug(f'GC: utilization={utilization} gc={results} beofre={before} after={after} device={torch.device(get_optimal_device_name())} fn={sys._getframe(1).f_code.co_name} time={round(t1 - t0, 2)}') # pylint: disable=protected-access def set_cuda_sync_mode(mode): From 0f459411e46fb1d484313992de28c6b9c6f653c5 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Fri, 21 Jun 2024 12:59:35 -0400 Subject: [PATCH 66/81] extra check --- modules/sd_models.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/modules/sd_models.py b/modules/sd_models.py index 7ab95193b..4aa561437 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -1299,18 +1299,17 @@ def switch_pipe(cls: diffusers.DiffusionPipeline, pipeline: diffusers.DiffusionP def set_diffuser_pipe(pipe, new_pipe_type): - n = getattr(pipe.__class__, '__name__', '') + if get_diffusers_task(pipe) == new_pipe_type: + return pipe - if new_pipe_type == DiffusersTaskType.TEXT_2_IMAGE and 'StableDiffusionXL' in n and 'requires_aesthetics_score' in pipe.config: - # Diffusers adds requires_aesthetics_score with img2img and complains if requires_aesthetics_score exist in txt2img - internal_dict = dict(pipe._internal_dict) + n = getattr(pipe.__class__, '__name__', '') + if new_pipe_type == DiffusersTaskType.TEXT_2_IMAGE and 'StableDiffusionXL' in n and 'requires_aesthetics_score' in pipe.config and hasattr(pipe, '_internal_dict'): + # diffusers adds requires_aesthetics_score with img2img and complains if requires_aesthetics_score exist in txt2img + internal_dict = dict(pipe._internal_dict) # pylint: disable=protected-access internal_dict.pop('requires_aesthetics_score', None) del pipe._internal_dict pipe.register_to_config(**internal_dict) - if get_diffusers_task(pipe) == new_pipe_type: - return pipe - # skip specific pipelines if n in ['StableDiffusionReferencePipeline', 'StableDiffusionAdapterPipeline', 'AnimateDiffPipeline', 'AnimateDiffSDXLPipeline']: return pipe From bbaec297207b74a96d6a3bc1d5741e22f444bf6e Mon Sep 17 00:00:00 2001 From: Disty0 Date: Fri, 21 Jun 2024 21:07:15 +0300 Subject: [PATCH 67/81] Cleanup --- modules/intel/ipex/diffusers.py | 19 ++++++++----------- modules/sd_hijack_dynamic_atten.py | 27 ++++++++++++--------------- modules/sd_models.py | 6 +++--- 3 files changed, 23 insertions(+), 29 deletions(-) diff --git a/modules/intel/ipex/diffusers.py b/modules/intel/ipex/diffusers.py index 732a18568..4f294ce6b 100644 --- a/modules/intel/ipex/diffusers.py +++ b/modules/intel/ipex/diffusers.py @@ -70,8 +70,8 @@ class SlicedAttnProcessor: # pylint: disable=too-few-public-methods def __init__(self, slice_size): self.slice_size = slice_size - def __call__(self, attn: Attention, hidden_states: torch.FloatTensor, - encoder_hidden_states=None, attention_mask=None) -> torch.FloatTensor: # pylint: disable=too-many-statements, too-many-locals, too-many-branches + def __call__(self, attn: Attention, hidden_states: torch.Tensor, + encoder_hidden_states=None, attention_mask=None) -> torch.Tensor: # pylint: disable=too-many-statements, too-many-locals, too-many-branches residual = hidden_states @@ -188,14 +188,11 @@ class AttnProcessor: Default processor for performing attention-related computations. """ - def __call__(self, attn: Attention, hidden_states: torch.FloatTensor, - encoder_hidden_states=None, attention_mask=None, - temb=None, scale: float = 1.0) -> torch.Tensor: # pylint: disable=too-many-statements, too-many-locals, too-many-branches + def __call__(self, attn, hidden_states: torch.Tensor, encoder_hidden_states=None, attention_mask=None, + temb=None, *args, **kwargs) -> torch.Tensor: # pylint: disable=too-many-statements, too-many-locals, too-many-branches residual = hidden_states - args = () if USE_PEFT_BACKEND else (scale,) - if attn.spatial_norm is not None: hidden_states = attn.spatial_norm(hidden_states, temb) @@ -213,15 +210,15 @@ def __call__(self, attn: Attention, hidden_states: torch.FloatTensor, if attn.group_norm is not None: hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) - query = attn.to_q(hidden_states, *args) + query = attn.to_q(hidden_states) if encoder_hidden_states is None: encoder_hidden_states = hidden_states elif attn.norm_cross: encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) - key = attn.to_k(encoder_hidden_states, *args) - value = attn.to_v(encoder_hidden_states, *args) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) query = attn.head_to_batch_dim(query) key = attn.head_to_batch_dim(key) @@ -292,7 +289,7 @@ def __call__(self, attn: Attention, hidden_states: torch.FloatTensor, hidden_states = attn.batch_to_head_dim(hidden_states) # linear proj - hidden_states = attn.to_out[0](hidden_states, *args) + hidden_states = attn.to_out[0](hidden_states) # dropout hidden_states = attn.to_out[1](hidden_states) diff --git a/modules/sd_hijack_dynamic_atten.py b/modules/sd_hijack_dynamic_atten.py index 5ba9dc6e4..b2d6fdc42 100644 --- a/modules/sd_hijack_dynamic_atten.py +++ b/modules/sd_hijack_dynamic_atten.py @@ -110,8 +110,8 @@ def __init__(self): if not hasattr(F, "scaled_dot_product_attention"): raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.") - def __call__( - self, attn, hidden_states: torch.FloatTensor, encoder_hidden_states=None, attention_mask=None, temb=None, scale: float = 1.0) -> torch.FloatTensor: + def __call__(self, attn, hidden_states: torch.Tensor, encoder_hidden_states=None, attention_mask=None, temb=None, *args, **kwargs) -> torch.Tensor: + residual = hidden_states if attn.spatial_norm is not None: hidden_states = attn.spatial_norm(hidden_states, temb) @@ -135,16 +135,15 @@ def __call__( if attn.group_norm is not None: hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) - args = () if USE_PEFT_BACKEND else (scale,) - query = attn.to_q(hidden_states, *args) + query = attn.to_q(hidden_states) if encoder_hidden_states is None: encoder_hidden_states = hidden_states elif attn.norm_cross: encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) - key = attn.to_k(encoder_hidden_states, *args) - value = attn.to_v(encoder_hidden_states, *args) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) inner_dim = key.shape[-1] head_dim = inner_dim // attn.heads @@ -167,7 +166,7 @@ def __call__( hidden_states = hidden_states.to(query.dtype) # linear proj - hidden_states = attn.to_out[0](hidden_states, *args) + hidden_states = attn.to_out[0](hidden_states) # dropout hidden_states = attn.to_out[1](hidden_states) @@ -190,13 +189,11 @@ class DynamicAttnProcessorBMM: based on AttnProcessor V1 """ - def __call__(self, attn, hidden_states: torch.FloatTensor, encoder_hidden_states=None, attention_mask=None, - temb=None, scale: float = 1.0) -> torch.Tensor: # pylint: disable=too-many-statements, too-many-locals, too-many-branches + def __call__(self, attn, hidden_states: torch.Tensor, encoder_hidden_states=None, attention_mask=None, + temb=None, *args, **kwargs) -> torch.Tensor: # pylint: disable=too-many-statements, too-many-locals, too-many-branches residual = hidden_states - args = () if USE_PEFT_BACKEND else (scale,) - if attn.spatial_norm is not None: hidden_states = attn.spatial_norm(hidden_states, temb) @@ -214,15 +211,15 @@ def __call__(self, attn, hidden_states: torch.FloatTensor, encoder_hidden_states if attn.group_norm is not None: hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) - query = attn.to_q(hidden_states, *args) + query = attn.to_q(hidden_states) if encoder_hidden_states is None: encoder_hidden_states = hidden_states elif attn.norm_cross: encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) - key = attn.to_k(encoder_hidden_states, *args) - value = attn.to_v(encoder_hidden_states, *args) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) query = attn.head_to_batch_dim(query) key = attn.head_to_batch_dim(key) @@ -294,7 +291,7 @@ def __call__(self, attn, hidden_states: torch.FloatTensor, encoder_hidden_states hidden_states = attn.batch_to_head_dim(hidden_states) # linear proj - hidden_states = attn.to_out[0](hidden_states, *args) + hidden_states = attn.to_out[0](hidden_states) # dropout hidden_states = attn.to_out[1](hidden_states) diff --git a/modules/sd_models.py b/modules/sd_models.py index 4aa561437..6efd56762 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -1299,9 +1299,6 @@ def switch_pipe(cls: diffusers.DiffusionPipeline, pipeline: diffusers.DiffusionP def set_diffuser_pipe(pipe, new_pipe_type): - if get_diffusers_task(pipe) == new_pipe_type: - return pipe - n = getattr(pipe.__class__, '__name__', '') if new_pipe_type == DiffusersTaskType.TEXT_2_IMAGE and 'StableDiffusionXL' in n and 'requires_aesthetics_score' in pipe.config and hasattr(pipe, '_internal_dict'): # diffusers adds requires_aesthetics_score with img2img and complains if requires_aesthetics_score exist in txt2img @@ -1310,6 +1307,9 @@ def set_diffuser_pipe(pipe, new_pipe_type): del pipe._internal_dict pipe.register_to_config(**internal_dict) + if get_diffusers_task(pipe) == new_pipe_type: + return pipe + # skip specific pipelines if n in ['StableDiffusionReferencePipeline', 'StableDiffusionAdapterPipeline', 'AnimateDiffPipeline', 'AnimateDiffSDXLPipeline']: return pipe From e4a8919ca196a495e32d8d02968a4d816ae17fed Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 22 Jun 2024 08:35:18 -0400 Subject: [PATCH 68/81] one more fix for requires_aesthetics_score --- modules/processing_class.py | 2 ++ modules/sd_models.py | 17 +++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/modules/processing_class.py b/modules/processing_class.py index bf37c8e2d..e61cbe401 100644 --- a/modules/processing_class.py +++ b/modules/processing_class.py @@ -516,6 +516,8 @@ def switch_class(p: StableDiffusionProcessing, new_class: type, dct: dict = None for k, v in dct.items(): if k in possible: kwargs[k] = v + if new_class == StableDiffusionProcessingTxt2Img: + sd_models.clean_diffuser_pipe(shared.sd_model) debug(f"Switching class: {p.__class__.__name__} -> {new_class.__name__} fn={sys._getframe(1).f_code.co_name}") # pylint: disable=protected-access p.__class__ = new_class p.__init__(**kwargs) diff --git a/modules/sd_models.py b/modules/sd_models.py index 6efd56762..bfcd7a0e2 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -562,7 +562,7 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False): # guess by size if os.path.isfile(f) and f.endswith('.safetensors'): size = round(os.path.getsize(f) / 1024 / 1024) - if (size < 128): + if size < 128: warn(f'Model size smaller than expected: {f} size={size} MB') elif (size >= 316 and size <= 324) or (size >= 156 and size <= 164): # 320 or 160 warn(f'Model detected as VAE model, but attempting to load as model: {op}={f} size={size} MB') @@ -937,7 +937,6 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No checkpoint_info = checkpoint_info or select_checkpoint(op=op) if checkpoint_info is None: - print('HERE1') unload_model_weights(op=op) return @@ -1115,8 +1114,6 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No if sd_model is None: shared.log.error('Diffuser model not loaded') return - if 'requires_aesthetics_score' in sd_model.config: - sd_model.register_to_config(requires_aesthetics_score=False) sd_model.sd_model_hash = checkpoint_info.calculate_shorthash() # pylint: disable=attribute-defined-outside-init sd_model.sd_checkpoint_info = checkpoint_info # pylint: disable=attribute-defined-outside-init sd_model.sd_model_checkpoint = checkpoint_info.filename # pylint: disable=attribute-defined-outside-init @@ -1298,15 +1295,21 @@ def switch_pipe(cls: diffusers.DiffusionPipeline, pipeline: diffusers.DiffusionP return pipeline -def set_diffuser_pipe(pipe, new_pipe_type): +def clean_diffuser_pipe(pipe): n = getattr(pipe.__class__, '__name__', '') - if new_pipe_type == DiffusersTaskType.TEXT_2_IMAGE and 'StableDiffusionXL' in n and 'requires_aesthetics_score' in pipe.config and hasattr(pipe, '_internal_dict'): + if 'StableDiffusionXL' in n and 'requires_aesthetics_score' in pipe.config and hasattr(pipe, '_internal_dict'): # diffusers adds requires_aesthetics_score with img2img and complains if requires_aesthetics_score exist in txt2img internal_dict = dict(pipe._internal_dict) # pylint: disable=protected-access internal_dict.pop('requires_aesthetics_score', None) del pipe._internal_dict pipe.register_to_config(**internal_dict) + +def set_diffuser_pipe(pipe, new_pipe_type): + n = getattr(pipe.__class__, '__name__', '') + if new_pipe_type == DiffusersTaskType.TEXT_2_IMAGE: + clean_diffuser_pipe(pipe) + if get_diffusers_task(pipe) == new_pipe_type: return pipe @@ -1587,7 +1590,6 @@ def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model', else: load_diffuser(checkpoint_info, already_loaded_state_dict=state_dict, timer=timer, op=op) if load_dict and next_checkpoint_info is not None: - print('HERE2') model_data.sd_dict = shared.opts.sd_model_dict shared.opts.data["sd_model_checkpoint"] = next_checkpoint_info.title reload_model_weights(reuse_dict=True) # ok we loaded dict now lets redo and load model on top of it @@ -1601,7 +1603,6 @@ def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model', shared.opts.data["sd_model_refiner"] = checkpoint_info.title return model_data.sd_refiner - print('HERE3') # fallback shared.log.info(f"Loading using fallback: {op} model={checkpoint_info.title}") try: From 01ca2d40fb95015f939f05b35077730a02fed519 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 22 Jun 2024 08:43:06 -0400 Subject: [PATCH 69/81] cleanup --- modules/processing_helpers.py | 3 +-- modules/sd_models.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/modules/processing_helpers.py b/modules/processing_helpers.py index 486dc09a5..110b32b93 100644 --- a/modules/processing_helpers.py +++ b/modules/processing_helpers.py @@ -351,11 +351,10 @@ def validate_sample(tensor): cast = sample.astype(np.uint8) if len(w) > 0: nans = np.isnan(sample).sum() - shared.log.error(f'Failed to validate samples: sample={sample.shape} invalid={nans}') cast = np.nan_to_num(sample) minimum, maximum, mean = np.min(cast), np.max(cast), np.mean(cast) cast = cast.astype(np.uint8) - shared.log.warning(f'Attempted to correct samples: min={minimum:.2f} max={maximum:.2f} mean={mean:.2f}') + shared.log.error(f'Failed to validate samples: sample={sample.shape} min={minimum:.2f} max={maximum:.2f} mean={mean:.2f} invalid={nans}') return cast diff --git a/modules/sd_models.py b/modules/sd_models.py index bfcd7a0e2..3146b4c2a 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -1296,8 +1296,7 @@ def switch_pipe(cls: diffusers.DiffusionPipeline, pipeline: diffusers.DiffusionP def clean_diffuser_pipe(pipe): - n = getattr(pipe.__class__, '__name__', '') - if 'StableDiffusionXL' in n and 'requires_aesthetics_score' in pipe.config and hasattr(pipe, '_internal_dict'): + if pipe is not None and shared.sd_model_type == 'sdxl' and 'requires_aesthetics_score' in pipe.config and hasattr(pipe, '_internal_dict'): # diffusers adds requires_aesthetics_score with img2img and complains if requires_aesthetics_score exist in txt2img internal_dict = dict(pipe._internal_dict) # pylint: disable=protected-access internal_dict.pop('requires_aesthetics_score', None) From 3856cb213aee391680af0d3a433f8682fcd8a713 Mon Sep 17 00:00:00 2001 From: Disty0 Date: Sat, 22 Jun 2024 15:43:40 +0300 Subject: [PATCH 70/81] Fix SD3 prompt parser with offload --- modules/prompt_parser_diffusers.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py index 5a2b38163..18dadc408 100644 --- a/modules/prompt_parser_diffusers.py +++ b/modules/prompt_parser_diffusers.py @@ -393,20 +393,23 @@ def get_weighted_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", c if prompt_embeds.shape[1] != negative_prompt_embeds.shape[1]: [prompt_embeds, negative_prompt_embeds] = pad_to_same_length(pipe, [prompt_embeds, negative_prompt_embeds]) if SD3: + device = pipe.device if str(pipe.device) != 'meta' else devices.device t5_prompt_embed = pipe._get_t5_prompt_embeds( # pylint: disable=protected-access - prompt=prompt_3, - num_images_per_prompt=prompt_embeds.shape[0], - device=pipe.device, - ) + prompt=prompt_3, + num_images_per_prompt=prompt_embeds.shape[0], + device=device, + ) prompt_embeds = torch.nn.functional.pad( - prompt_embeds, (0, t5_prompt_embed.shape[-1] - prompt_embeds.shape[-1])) + prompt_embeds, (0, t5_prompt_embed.shape[-1] - prompt_embeds.shape[-1]) + ).to(device) prompt_embeds = torch.cat([prompt_embeds, t5_prompt_embed], dim=-2) t5_negative_prompt_embed = pipe._get_t5_prompt_embeds( # pylint: disable=protected-access prompt=neg_prompt_3, num_images_per_prompt=prompt_embeds.shape[0], - device=pipe.device, + device=device, ) negative_prompt_embeds = torch.nn.functional.pad( - negative_prompt_embeds, (0, t5_negative_prompt_embed.shape[-1] - negative_prompt_embeds.shape[-1])) + negative_prompt_embeds, (0, t5_negative_prompt_embed.shape[-1] - negative_prompt_embeds.shape[-1]) + ).to(device) negative_prompt_embeds = torch.cat([negative_prompt_embeds, t5_negative_prompt_embed], dim=-2) return prompt_embeds, pooled_prompt_embeds, negative_prompt_embeds, negative_pooled_prompt_embeds From 2f9f2c5c52fbba10bfdb326a6bd9b2b2ae05c6a8 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 22 Jun 2024 18:00:01 -0400 Subject: [PATCH 71/81] fix inpaint pipeline missing requires_aesthetics_score --- modules/processing_args.py | 2 ++ modules/sd_models.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/modules/processing_args.py b/modules/processing_args.py index 163c51efd..7f47b6352 100644 --- a/modules/processing_args.py +++ b/modules/processing_args.py @@ -27,6 +27,7 @@ def task_specific_kwargs(p, model): 'height': 8 * math.ceil(p.height / 8), } elif (sd_models.get_diffusers_task(model) == sd_models.DiffusersTaskType.IMAGE_2_IMAGE or is_img2img_model) and len(getattr(p, 'init_images', [])) > 0: + model.register_to_config(requires_aesthetics_score = False) p.ops.append('img2img') task_args = { 'image': p.init_images, @@ -41,6 +42,7 @@ def task_specific_kwargs(p, model): 'strength': p.denoising_strength, } elif (sd_models.get_diffusers_task(model) == sd_models.DiffusersTaskType.INPAINTING or is_img2img_model) and len(getattr(p, 'init_images', [])) > 0: + model.register_to_config(requires_aesthetics_score = False) p.ops.append('inpaint') width, height = processing_helpers.resize_init_images(p) task_args = { diff --git a/modules/sd_models.py b/modules/sd_models.py index 3146b4c2a..f039ea316 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -38,6 +38,7 @@ sd_metadata_timer = 0 debug_move = shared.log.trace if os.environ.get('SD_MOVE_DEBUG', None) is not None else lambda *args, **kwargs: None debug_load = os.environ.get('SD_LOAD_DEBUG', None) +debug_process = shared.log.trace if os.environ.get('SD_PROCESS_DEBUG', None) is not None else lambda *args, **kwargs: None diffusers_version = int(diffusers.__version__.split('.')[1]) @@ -1297,6 +1298,7 @@ def switch_pipe(cls: diffusers.DiffusionPipeline, pipeline: diffusers.DiffusionP def clean_diffuser_pipe(pipe): if pipe is not None and shared.sd_model_type == 'sdxl' and 'requires_aesthetics_score' in pipe.config and hasattr(pipe, '_internal_dict'): + debug_process(f'Pipeline clean: {pipe.__class__.__name__}') # diffusers adds requires_aesthetics_score with img2img and complains if requires_aesthetics_score exist in txt2img internal_dict = dict(pipe._internal_dict) # pylint: disable=protected-access internal_dict.pop('requires_aesthetics_score', None) From a09fc2d32e9b1b428202fdb3eaa6214dc2dddb8f Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 22 Jun 2024 18:03:12 -0400 Subject: [PATCH 72/81] fix apply pag with batch count --- modules/processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/processing.py b/modules/processing.py index 4782c8b4d..d453e0b15 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -158,7 +158,6 @@ def process_images(p: StableDiffusionProcessing) -> Processed: shared.prompt_styles.apply_styles_to_extra(p) shared.prompt_styles.extract_comments(p) - pag.apply(p) if shared.opts.cuda_compile_backend == 'none': sd_models.apply_token_merging(p.sd_model) sd_hijack_freeu.apply_freeu(p, not shared.native) @@ -273,6 +272,7 @@ def infotext(_inxex=0): # dummy function overriden if there are iterations extra_network_data = None debug(f'Processing inner: args={vars(p)}') for n in range(p.n_iter): + pag.apply(p) debug(f'Processing inner: iteration={n+1}/{p.n_iter}') p.iteration = n if shared.state.skipped: From e8165c3d638cd9f237f4bd122eb9a50f95d5a841 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 22 Jun 2024 18:40:06 -0400 Subject: [PATCH 73/81] xyz grid add lora selector --- CHANGELOG.md | 1 + modules/shared.py | 2 +- scripts/xyz_grid.py | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76dac5867..3f4c804e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ But there's more than SD3: *example*: load FP4 or FP8 quantized T5 text-encoder into PixArt Sigma! - support for `torch-directml` **0.2.2**, thanks @lshqqytiger! *note*: new directml is finally based on modern `torch` 2.3.1! +- xyz grid: add support for LoRA selector - extra networks: info display now contains link to source url if model if its known works for civitai and huggingface models - force gc for lowvram users and improve gc logging diff --git a/modules/shared.py b/modules/shared.py index 81b32fa8b..ea551ea3b 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -821,7 +821,7 @@ def temp_disable_extensions(): "extra_networks_sep2": OptionInfo("

Extra networks general

", "", gr.HTML), "extra_network_reference": OptionInfo(False, "Use reference values when available", gr.Checkbox), "extra_network_skip_indexing": OptionInfo(False, "Build info on first access", gr.Checkbox), - "extra_networks_default_multiplier": OptionInfo(1.0, "Default multiplier for extra networks", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), + "extra_networks_default_multiplier": OptionInfo(1.0, "Default strength for extra networks", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), "diffusers_convert_embed": OptionInfo(False, "Auto-convert SD 1.5 embeddings to SDXL ", gr.Checkbox, {"visible": native}), "extra_networks_sep3": OptionInfo("

Extra networks settings

", "", gr.HTML), "extra_networks_styles": OptionInfo(True, "Show built-in styles"), diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index c4b44282d..7de4b138f 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -65,6 +65,7 @@ def apply_sampler(p, x, xs): else: p.sampler_name = sampler_name + def apply_hr_sampler_name(p, x, xs): hr_sampler_name = sd_samplers.samplers_map.get(x.lower(), None) if hr_sampler_name is None: @@ -72,6 +73,7 @@ def apply_hr_sampler_name(p, x, xs): else: p.hr_sampler_name = hr_sampler_name + def confirm_samplers(p, xs): for x in xs: if x.lower() not in sd_samplers.samplers_map: @@ -138,6 +140,19 @@ def apply_vae(p, x, xs): sd_vae.reload_vae_weights(shared.sd_model, vae_file=find_vae(x)) +def list_lora(): + import sys + lora = [v for k, v in sys.modules.items() if k == 'networks'][0] + loras = [v.name for v in lora.available_networks.values()] + return ['None'] + loras + + +def apply_lora(p, x, xs): + if x == 'None': + return + p.prompt = p.prompt + f" " + + def apply_te(p, x, xs): shared.opts.data["sd_text_encoder"] = x sd_models.reload_text_encoder() @@ -235,6 +250,7 @@ def __init__(self, *args, **kwargs): AxisOption("Prompt S/R", str, apply_prompt, fmt=format_value), AxisOption("Model", str, apply_checkpoint, fmt=format_value, cost=1.0, choices=lambda: sorted(sd_models.checkpoints_list)), AxisOption("VAE", str, apply_vae, cost=0.7, choices=lambda: ['None'] + list(sd_vae.vae_dict)), + AxisOption("LoRA", str, apply_lora, cost=0.5, choices=list_lora), AxisOption("Text encoder", str, apply_te, cost=0.7, choices=lambda: ['None', 'T5 FP4', 'T5 FP8', 'T5 FP16']), AxisOption("Styles", str, apply_styles, choices=lambda: [s.name for s in shared.prompt_styles.styles.values()]), AxisOption("Seed", int, apply_field("seed")), From 1aab44cb496b1eb996308dcae56d439d3228d9df Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 22 Jun 2024 19:39:17 -0400 Subject: [PATCH 74/81] fix --- modules/processing_args.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/processing_args.py b/modules/processing_args.py index 7f47b6352..91f10c80d 100644 --- a/modules/processing_args.py +++ b/modules/processing_args.py @@ -27,7 +27,8 @@ def task_specific_kwargs(p, model): 'height': 8 * math.ceil(p.height / 8), } elif (sd_models.get_diffusers_task(model) == sd_models.DiffusersTaskType.IMAGE_2_IMAGE or is_img2img_model) and len(getattr(p, 'init_images', [])) > 0: - model.register_to_config(requires_aesthetics_score = False) + if shared.sd_model_type == 'sdxl': + model.register_to_config(requires_aesthetics_score = False) p.ops.append('img2img') task_args = { 'image': p.init_images, @@ -42,7 +43,8 @@ def task_specific_kwargs(p, model): 'strength': p.denoising_strength, } elif (sd_models.get_diffusers_task(model) == sd_models.DiffusersTaskType.INPAINTING or is_img2img_model) and len(getattr(p, 'init_images', [])) > 0: - model.register_to_config(requires_aesthetics_score = False) + if shared.sd_model_type == 'sdxl': + model.register_to_config(requires_aesthetics_score = False) p.ops.append('inpaint') width, height = processing_helpers.resize_init_images(p) task_args = { From 0a9cfc8621e5dc3c9b6ff4dc1ef30ccb9e008ba9 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 22 Jun 2024 19:50:17 -0400 Subject: [PATCH 75/81] backup vae on load and restore when set to none --- CHANGELOG.md | 12 +++++++----- modules/sd_vae.py | 14 +++++++++++--- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f4c804e4..4d44a2f09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ But there's more than SD3: - support for quantized **T5** text encoder in all models that use T5: FP4/FP8/FP16/INT8 (SD3, PixArt-Σ, etc) - support for **PixArt-Sigma** in small/medium/large variants - support for **HunyuanDiT 1.1** +- additional **NNCF weights compression** support: SD3, PixArt, ControlNet, Lora - (finally) new release of **Torch-DirectML** - additional efficiencies for users with low vram gpus - over 20 overall fixes @@ -47,6 +48,7 @@ But there's more than SD3: - support for `torch-directml` **0.2.2**, thanks @lshqqytiger! *note*: new directml is finally based on modern `torch` 2.3.1! - xyz grid: add support for LoRA selector +- vae load: store original vae so it can be restored when set to none - extra networks: info display now contains link to source url if model if its known works for civitai and huggingface models - force gc for lowvram users and improve gc logging @@ -56,11 +58,11 @@ But there's more than SD3: - additional torch gc checks, thanks @Disty0! **Improvements: NNCF**, thanks @Disty0! - - SD3 and PixArt support - - moved the first compression step to CPU - - sequential cpu offload (lowvram) support - - Lora support without reloading the model - - ControlNet compression support +- SD3 and PixArt support +- moved the first compression step to CPU +- sequential cpu offload (lowvram) support +- Lora support without reloading the model +- ControlNet compression support ### Fixes diff --git a/modules/sd_vae.py b/modules/sd_vae.py index 94a6c6b49..53b89161f 100644 --- a/modules/sd_vae.py +++ b/modules/sd_vae.py @@ -259,6 +259,11 @@ def reload_vae_weights(sd_model=None, vae_file=unspecified): vae_file, vae_source = resolve_vae(checkpoint_file) else: vae_source = "function-argument" + if vae_file is None or vae_file == 'None': + if hasattr(sd_model, 'original_vae'): + sd_models.set_diffuser_options(sd_model, vae=sd_model.original_vae, op='vae') + shared.log.info("VAE restored") + return None if loaded_vae_file == vae_file: return None if not shared.native and (shared.cmd_opts.lowvram or shared.cmd_opts.medvram): @@ -276,11 +281,14 @@ def reload_vae_weights(sd_model=None, vae_file=unspecified): if vae_file is not None: shared.log.info(f"VAE weights loaded: {vae_file}") else: - if hasattr(shared.sd_model, "vae") and hasattr(shared.sd_model, "sd_checkpoint_info"): - vae = load_vae_diffusers(shared.sd_model.sd_checkpoint_info.filename, vae_file, vae_source) + if hasattr(sd_model, "vae") and hasattr(sd_model, "sd_checkpoint_info"): + vae = load_vae_diffusers(sd_model.sd_checkpoint_info.filename, vae_file, vae_source) if vae is not None: + if not hasattr(sd_model, 'original_vae'): + sd_model.original_vae = sd_model.vae + sd_models.move_model(sd_model.original_vae, devices.cpu) sd_models.set_diffuser_options(sd_model, vae=vae, op='vae') - apply_vae_config(shared.sd_model.sd_checkpoint_info.filename, vae_file, sd_model) + apply_vae_config(sd_model.sd_checkpoint_info.filename, vae_file, sd_model) if not shared.cmd_opts.lowvram and not shared.cmd_opts.medvram: sd_models.move_model(sd_model, devices.device) From 6ad519e51f38b8b7e123b157deb50ce5ed437e5d Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 22 Jun 2024 20:16:32 -0400 Subject: [PATCH 76/81] css --- javascript/extraNetworks.js | 2 +- modules/shared.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/javascript/extraNetworks.js b/javascript/extraNetworks.js index ad224759a..e0294b4f4 100644 --- a/javascript/extraNetworks.js +++ b/javascript/extraNetworks.js @@ -461,7 +461,7 @@ function setupExtraNetworksForTab(tabname) { en.style.position = 'absolute'; en.style.right = '0'; en.style.top = '13em'; - en.style.height = '-webkit-fill-available'; + en.style.height = 'auto'; en.style.transition = 'width 0.3s ease'; en.style.width = `${window.opts.extra_networks_sidebar_width}vw`; gradioApp().getElementById(`${tabname}_settings`).parentNode.style.width = `${100 - 2 - window.opts.extra_networks_sidebar_width}vw`; diff --git a/modules/shared.py b/modules/shared.py index ea551ea3b..1cf31c491 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -813,7 +813,7 @@ def temp_disable_extensions(): "extra_networks_sort": OptionInfo("Default", "Sort order", gr.Dropdown, {"choices": ['Default', 'Name [A-Z]', 'Name [Z-A]', 'Date [Newest]', 'Date [Oldest]', 'Size [Largest]', 'Size [Smallest]']}), "extra_networks_view": OptionInfo("gallery", "UI view", gr.Radio, {"choices": ["gallery", "list"]}), "extra_networks_card_cover": OptionInfo("sidebar", "UI position", gr.Radio, {"choices": ["cover", "inline", "sidebar"]}), - "extra_networks_height": OptionInfo(53, "UI height (%)", gr.Slider, {"minimum": 10, "maximum": 100, "step": 1}), + "extra_networks_height": OptionInfo(55, "UI height (%)", gr.Slider, {"minimum": 10, "maximum": 100, "step": 1}), "extra_networks_sidebar_width": OptionInfo(35, "UI sidebar width (%)", gr.Slider, {"minimum": 10, "maximum": 80, "step": 1}), "extra_networks_card_size": OptionInfo(160, "UI card size (px)", gr.Slider, {"minimum": 20, "maximum": 2000, "step": 1}), "extra_networks_card_square": OptionInfo(True, "UI disable variable aspect ratio"), From 4d0ba8692a0e829cb0a80aab9211b467be7d7490 Mon Sep 17 00:00:00 2001 From: Disty0 Date: Sun, 23 Jun 2024 04:50:30 +0300 Subject: [PATCH 77/81] Add gc to prompt parser --- modules/prompt_parser_diffusers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py index 18dadc408..3d1ef1f5b 100644 --- a/modules/prompt_parser_diffusers.py +++ b/modules/prompt_parser_diffusers.py @@ -199,6 +199,7 @@ def encode_prompts(pipe, p, prompts: list, negative_prompts: list, steps: int, c if debug_enabled: get_tokens('positive', prompts[0]) get_tokens('negative', negative_prompts[0]) + devices.torch_gc() debug(f"Prompt encode: time={(time.time() - t0):.3f}") return From 61d4acda68a69500b08ff81766c9f2f2d6b693dc Mon Sep 17 00:00:00 2001 From: Disty0 Date: Sun, 23 Jun 2024 15:22:34 +0300 Subject: [PATCH 78/81] SD3 send TE back to CPU when using cpu offload --- modules/prompt_parser_diffusers.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py index 3d1ef1f5b..b212d08a6 100644 --- a/modules/prompt_parser_diffusers.py +++ b/modules/prompt_parser_diffusers.py @@ -168,6 +168,11 @@ def encode_prompts(pipe, p, prompts: list, negative_prompts: list, steps: int, c p.negative_embeds = [] p.negative_pooleds = [] + if (shared.cmd_opts.medvram or shared.opts.diffusers_model_cpu_offload) and hasattr(pipe, "_all_hooks") and hasattr(pipe, "maybe_free_model_hooks"): + # if the last job is interrupted, model will stay in the vram and cause oom, send everything back to cpu before continuing + pipe.maybe_free_model_hooks() + devices.torch_gc() + for i in range(max(len(positive_schedule), len(negative_schedule))): positive_prompt = positive_schedule[i % len(positive_schedule)] negative_prompt = negative_schedule[i % len(negative_schedule)] @@ -199,8 +204,11 @@ def encode_prompts(pipe, p, prompts: list, negative_prompts: list, steps: int, c if debug_enabled: get_tokens('positive', prompts[0]) get_tokens('negative', negative_prompts[0]) - devices.torch_gc() + if (shared.cmd_opts.medvram or shared.opts.diffusers_model_cpu_offload) and hasattr(pipe, "_all_hooks") and hasattr(pipe, "maybe_free_model_hooks"): + # text encoder will stay in the vram and cause oom, send everything back to cpu before continuing + pipe.maybe_free_model_hooks() debug(f"Prompt encode: time={(time.time() - t0):.3f}") + devices.torch_gc() return From 72f6cf6a1f639512effa3b7749a169cde91e73df Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 23 Jun 2024 08:50:06 -0400 Subject: [PATCH 79/81] add ms florence --- CHANGELOG.md | 7 ++++-- installer.py | 1 + modules/api/models.py | 2 +- modules/ui_postprocessing.py | 2 +- modules/vqa.py | 41 ++++++++++++++++++++++++++++++++++++ 5 files changed, 49 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d44a2f09..19c962a9b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,8 @@ # Change Log for SD.Next -## Update for 2024-06-21 +## Update for 2024-06-23 -### Highlights for 2024-06-21 +### Highlights for 2024-06-23 Following zero-day **SD3** release, a week later here's a refresh with 10+ improvements including full prompt attention, support for compressed weights, additional text-encoder quantization modes. @@ -12,6 +12,7 @@ But there's more than SD3: - support for **PixArt-Sigma** in small/medium/large variants - support for **HunyuanDiT 1.1** - additional **NNCF weights compression** support: SD3, PixArt, ControlNet, Lora +- integration of **MS Florence** VLM/VQA *Base* and *Large* models - (finally) new release of **Torch-DirectML** - additional efficiencies for users with low vram gpus - over 20 overall fixes @@ -39,6 +40,8 @@ But there's more than SD3: *note* by default pixart-Σ uses full fp16 t5 encoder with large memory footprint simply select in *settings -> model -> text encoder* before or after model load - **HunyuanDiT**: support for model version 1.1 +- **MS Florence**: integration of Microsoft Florence VLM/VQA Base and Large models + simply select in *process -> visual query*! ### Improvements: General diff --git a/installer.py b/installer.py index aaaad060b..2bffe8276 100644 --- a/installer.py +++ b/installer.py @@ -894,6 +894,7 @@ def install_submodules(force=True): branch(name) except Exception: log.error(f'Error updating submodule: {submodule}') + setup_logging() if args.profile: print_profile(pr, 'Submodule') return '\n'.join(res) diff --git a/modules/api/models.py b/modules/api/models.py index 8437e91ba..5813fdcc6 100644 --- a/modules/api/models.py +++ b/modules/api/models.py @@ -313,7 +313,7 @@ class ResInterrogate(BaseModel): class ReqVQA(BaseModel): image: str = Field(default="", title="Image", description="Image to work on, must be a Base64 string containing the image's data.") - model: str = Field(default="Moondream 2", title="Model", description="The interrogate model used.") + model: str = Field(default="MS Florence 2 Base", title="Model", description="The interrogate model used.") question: str = Field(default="describe the image", title="Question", description="Question to ask the model.") class ResVQA(BaseModel): diff --git a/modules/ui_postprocessing.py b/modules/ui_postprocessing.py index 42d1f9847..b1948a27b 100644 --- a/modules/ui_postprocessing.py +++ b/modules/ui_postprocessing.py @@ -80,7 +80,7 @@ def create_ui(): with gr.Row(): vqa_answer = gr.Textbox(label="Answer", lines=3) with gr.Row(elem_id='interrogate_buttons_query'): - vqa_model = gr.Dropdown(list(vqa.MODELS), value='Moondream 2', label='VQA Model') + vqa_model = gr.Dropdown(list(vqa.MODELS), value='MS Florence 2 Base', label='VQA Model') vqa_submit = gr.Button("Interrogate", elem_id="interrogate_btn_interrogate", variant='primary') vqa_submit.click(vqa.interrogate, inputs=[vqa_question, vqa_image, vqa_model], outputs=[vqa_answer]) diff --git a/modules/vqa.py b/modules/vqa.py index 8344b15bf..357a604d9 100644 --- a/modules/vqa.py +++ b/modules/vqa.py @@ -8,6 +8,8 @@ model = None loaded: str = None MODELS = { + "MS Florence 2 Base": "microsoft/Florence-2-base", # 0.5GB + "MS Florence 2 Large": "microsoft/Florence-2-large", # 1.5GB "Moondream 2": "vikhyatk/moondream2", # 3.7GB "GIT TextCaps Base": "microsoft/git-base-textcaps", # 0.7GB "GIT VQA Base": "microsoft/git-base-vqav2", # 0.7GB @@ -124,7 +126,44 @@ def moondream(question: str, image: Image.Image, repo: str = None): return response +def florence(question: str, image: Image.Image, repo: str = None): + global processor, model, loaded # pylint: disable=global-statement + if model is None or loaded != repo: + model = transformers.AutoModelForCausalLM.from_pretrained(repo, trust_remote_code=True) + processor = transformers.AutoProcessor.from_pretrained(repo, trust_remote_code=True) + loaded = repo + model.eval() + model.to(devices.device, devices.dtype) + shared.log.debug(f'VQA: class={model.__class__.__name__} processor={processor.__class__} model={repo}') + + if question.startswith('<'): + task = question.split('>', 1)[0] + '>' + else: + task = '' + question = task + question + inputs = processor(text=question, images=image, return_tensors="pt") + input_ids = inputs['input_ids'].to(devices.device) + pixel_values = inputs['pixel_values'].to(devices.device, devices.dtype) + with devices.inference_context(): + generated_ids = model.generate( + input_ids=input_ids, + pixel_values=pixel_values, + max_new_tokens=1024, + num_beams=3, + do_sample=False + ) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] + response = processor.post_process_generation(generated_text, task="task", image_size=(image.width, image.height)) + + if 'task' in response: + response = response['task'] + shared.log.debug(f'VQA: task={task} response="{response}"') + return response + + def interrogate(vqa_question, vqa_image, vqa_model_req): + from installer import install + install('flash_attn', quiet=True) vqa_model = MODELS.get(vqa_model_req, None) shared.log.debug(f'VQA: model="{vqa_model}" question="{vqa_question}" image={vqa_image}') if vqa_image is None: @@ -146,6 +185,8 @@ def interrogate(vqa_question, vqa_image, vqa_model_req): answer = pix(vqa_question, vqa_image, vqa_model) if 'moondream2' in vqa_model.lower(): answer = moondream(vqa_question, vqa_image, vqa_model) + if 'florence' in vqa_model.lower(): + answer = florence(vqa_question, vqa_image, vqa_model) else: answer = 'unknown model' if model is not None: From 5f249f3aefa58b1aad282d587c464d25c396d74d Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 23 Jun 2024 10:53:43 -0400 Subject: [PATCH 80/81] do not apply pag for img2img --- modules/pag/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/pag/__init__.py b/modules/pag/__init__.py index 484fccc13..83eba5f0b 100644 --- a/modules/pag/__init__.py +++ b/modules/pag/__init__.py @@ -18,6 +18,9 @@ def apply(p: processing.StableDiffusionProcessing): # pylint: disable=arguments- return None if p.pag_scale == 0: return + if sd_models.get_diffusers_task(shared.sd_model) != sd_models.DiffusersTaskType.TEXT_2_IMAGE: + shared.log.warning(f'PAG: pipeline={c} not implemented') + return None if detect.is_sd15(c): orig_pipeline = shared.sd_model shared.sd_model = sd_models.switch_pipe(StableDiffusionPAGPipeline, shared.sd_model) From 168e10437f9f6cbe1fce65c6063b21118cf46df0 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 23 Jun 2024 10:58:55 -0400 Subject: [PATCH 81/81] update changelog --- CHANGELOG.md | 11 ++++++----- modules/pag/__init__.py | 1 - 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 19c962a9b..b2b10a416 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,17 +4,17 @@ ### Highlights for 2024-06-23 -Following zero-day **SD3** release, a week later here's a refresh with 10+ improvements +Following zero-day **SD3** release, a 10 days later here's a refresh with 10+ improvements including full prompt attention, support for compressed weights, additional text-encoder quantization modes. But there's more than SD3: -- support for quantized **T5** text encoder in all models that use T5: FP4/FP8/FP16/INT8 (SD3, PixArt-Σ, etc) +- support for quantized **T5** text encoder *FP16/FP8/FP4/INT8* in all models that use T5: SD3, PixArt-Σ, etc. - support for **PixArt-Sigma** in small/medium/large variants - support for **HunyuanDiT 1.1** -- additional **NNCF weights compression** support: SD3, PixArt, ControlNet, Lora +- additional **NNCF weights compression** support: SD3, PixArt, ControlNet, Lora - integration of **MS Florence** VLM/VQA *Base* and *Large* models - (finally) new release of **Torch-DirectML** -- additional efficiencies for users with low vram gpus +- additional efficiencies for users with low VRAM GPUs - over 20 overall fixes ### Model Improvements @@ -43,7 +43,7 @@ But there's more than SD3: - **MS Florence**: integration of Microsoft Florence VLM/VQA Base and Large models simply select in *process -> visual query*! -### Improvements: General +### General Improvements - support FP4 quantized T5 text encoder, in addtion to existing FP8 and FP16 - support for T5 text-encoder loader in **all** models that use T5 @@ -85,6 +85,7 @@ But there's more than SD3: - fix api ip-adapter - fix memory exceptions with ROCm, thanks @Disty0! - fix face-hires with lowvram, thanks @Disty0! +- fix pag incorrectly resetting pipeline - cleanup image metadata - restructure api examples: `cli/api-*` - handle theme fallback when invalid theme is specified diff --git a/modules/pag/__init__.py b/modules/pag/__init__.py index 83eba5f0b..29cdee8ca 100644 --- a/modules/pag/__init__.py +++ b/modules/pag/__init__.py @@ -15,7 +15,6 @@ def apply(p: processing.StableDiffusionProcessing): # pylint: disable=arguments- c = shared.sd_model.__class__ if shared.sd_loaded else None if c == StableDiffusionPAGPipeline or c == StableDiffusionXLPAGPipeline: unapply() - return None if p.pag_scale == 0: return if sd_models.get_diffusers_task(shared.sd_model) != sd_models.DiffusersTaskType.TEXT_2_IMAGE:
Type{page.title}
Alias{getattr(item, 'alias', 'N/A')}
Filename{item.filename}
Hash{getattr(item, 'hash', 'N/A')}
Size{round(stat.st_size/1024/1024, 2) if stat is not None else 'N/A'} MB
Last modified{datetime.fromtimestamp(stat.st_mtime) if stat is not None else 'N/A'}
Source URL{url}