diff --git a/CHANGELOG.md b/CHANGELOG.md index e47aeda32..b2b10a416 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,96 @@ # Change Log for SD.Next +## Update for 2024-06-23 + +### Highlights for 2024-06-23 + +Following zero-day **SD3** release, a 10 days later here's a refresh with 10+ improvements +including full prompt attention, support for compressed weights, additional text-encoder quantization modes. + +But there's more than SD3: +- support for quantized **T5** text encoder *FP16/FP8/FP4/INT8* in all models that use T5: SD3, PixArt-Σ, etc. +- support for **PixArt-Sigma** in small/medium/large variants +- support for **HunyuanDiT 1.1** +- additional **NNCF weights compression** support: SD3, PixArt, ControlNet, Lora +- integration of **MS Florence** VLM/VQA *Base* and *Large* models +- (finally) new release of **Torch-DirectML** +- additional efficiencies for users with low VRAM GPUs +- over 20 overall fixes + +### Model Improvements + +- **SD3**: enable tiny-VAE (TAESD) preview and non-full quality mode +- SD3: enable base LoRA support +- SD3: add support for FP4 quantized T5 text encoder + simply select in *settings -> model -> text encoder* + *note* for SD3 with T5, set SD.Next to use FP16 precision, not BF16 precision +- SD3: add support for INT8 quantized T5 text encoder, thanks @Disty0! +- SD3: enable cpu-offloading for T5 text encoder, thanks @Disty0! +- SD3: simplified loading of model in single-file safetensors format + model load can now be performed fully offline +- SD3: full support for prompt parsing and attention, thanks @AI-Casanova! +- SD3: ability to target different prompts to each of text-encoders, thanks @AI-Casanova! + example: `dog TE2: cat TE3: bird` +- SD3: add support for sampler shift for Euler FlowMatch + see *settings -> samplers*, also available as param in xyz grid + higher shift means model will spend more time on structure and less on details +- SD3: add support for selecting T5 text encoder variant in XYZ grid +- **Pixart-Σ**: Add *small* (512px) and *large* (2k) variations, in addition to existing *medium* (1k) +- Pixart-Σ: Add support for 4/8bit quantized t5 text encoder + *note* by default pixart-Σ uses full fp16 t5 encoder with large memory footprint + simply select in *settings -> model -> text encoder* before or after model load +- **HunyuanDiT**: support for model version 1.1 +- **MS Florence**: integration of Microsoft Florence VLM/VQA Base and Large models + simply select in *process -> visual query*! + +### General Improvements + +- support FP4 quantized T5 text encoder, in addtion to existing FP8 and FP16 +- support for T5 text-encoder loader in **all** models that use T5 + *example*: load FP4 or FP8 quantized T5 text-encoder into PixArt Sigma! +- support for `torch-directml` **0.2.2**, thanks @lshqqytiger! + *note*: new directml is finally based on modern `torch` 2.3.1! +- xyz grid: add support for LoRA selector +- vae load: store original vae so it can be restored when set to none +- extra networks: info display now contains link to source url if model if its known + works for civitai and huggingface models +- force gc for lowvram users and improve gc logging +- improved google.colab support +- css tweaks for standardui +- css tweaks for modernui +- additional torch gc checks, thanks @Disty0! + +**Improvements: NNCF**, thanks @Disty0! +- SD3 and PixArt support +- moved the first compression step to CPU +- sequential cpu offload (lowvram) support +- Lora support without reloading the model +- ControlNet compression support + +### Fixes + +- fix unsaturated outputs, force apply vae config on model load +- fix hidiffusion handling of non-square aspect ratios, thanks @ShenZhang-Shin! +- fix control second pass resize +- fix hunyuandit set attention processor +- fix civitai download without name +- fix compatibility with latest adetailer +- fix invalid sampler warning +- fix starting from non git repo +- fix control api negative prompt handling +- fix saving style without name provided +- fix t2i-color adapter +- fix sdxl "has been incorrectly initialized" +- fix api face-hires +- fix api ip-adapter +- fix memory exceptions with ROCm, thanks @Disty0! +- fix face-hires with lowvram, thanks @Disty0! +- fix pag incorrectly resetting pipeline +- cleanup image metadata +- restructure api examples: `cli/api-*` +- handle theme fallback when invalid theme is specified +- remove obsolete training code leftovers + ## Update for 2024-06-13 ### Highlights for 2024-06-13 diff --git a/TODO.md b/TODO.md index fd704b838..0647494dc 100644 --- a/TODO.md +++ b/TODO.md @@ -11,6 +11,7 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma - diffusers public callbacks - include reference styles - lora: sc lora, dora, etc +- sd3 controlnet: ## Experimental diff --git a/cli/simple-control.py b/cli/api-control.py similarity index 98% rename from cli/simple-control.py rename to cli/api-control.py index a0246fdcc..a735bce4f 100755 --- a/cli/simple-control.py +++ b/cli/api-control.py @@ -132,7 +132,7 @@ def get_image(encoded, output): if __name__ == "__main__": - parser = argparse.ArgumentParser(description = 'simple-img2img') + parser = argparse.ArgumentParser(description = 'api-img2img') parser.add_argument('--init', required=False, default=None, help='init image') parser.add_argument('--input', required=False, default=None, help='input image') parser.add_argument('--mask', required=False, help='mask image') diff --git a/cli/api-faceid.py b/cli/api-faceid.py new file mode 100755 index 000000000..dd9645cea --- /dev/null +++ b/cli/api-faceid.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python +import os +import io +import time +import base64 +import logging +import argparse +import requests +import urllib3 +from PIL import Image + +sd_url = os.environ.get('SDAPI_URL', "http://127.0.0.1:7860") +sd_username = os.environ.get('SDAPI_USR', None) +sd_password = os.environ.get('SDAPI_PWD', None) + +logging.basicConfig(level = logging.INFO, format = '%(asctime)s %(levelname)s: %(message)s') +log = logging.getLogger(__name__) +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +options = { + "save_images": False, + "send_images": True, +} + + +def auth(): + if sd_username is not None and sd_password is not None: + return requests.auth.HTTPBasicAuth(sd_username, sd_password) + return None + + +def post(endpoint: str, dct: dict = None): + req = requests.post(f'{sd_url}{endpoint}', json = dct, timeout=300, verify=False, auth=auth()) + if req.status_code != 200: + return { 'error': req.status_code, 'reason': req.reason, 'url': req.url } + else: + return req.json() + + +def encode(f): + image = Image.open(f) + if image.mode == 'RGBA': + image = image.convert('RGB') + with io.BytesIO() as stream: + image.save(stream, 'JPEG') + image.close() + values = stream.getvalue() + encoded = base64.b64encode(values).decode() + return encoded + + +def generate(args): # pylint: disable=redefined-outer-name + t0 = time.time() + if args.model is not None: + post('/sdapi/v1/options', { 'sd_model_checkpoint': args.model }) + post('/sdapi/v1/reload-checkpoint') # needed if running in api-only to trigger new model load + options['prompt'] = args.prompt + options['negative_prompt'] = args.negative + options['steps'] = int(args.steps) + options['seed'] = int(args.seed) + options['sampler_name'] = args.sampler + options['width'] = args.width + options['height'] = args.height + options['face'] = { + 'mode': 'FaceID', + 'ip_model': 'FaceID Base', + 'source_images': [encode(args.face)], + } + data = post('/sdapi/v1/txt2img', options) + t1 = time.time() + if 'images' in data: + for i in range(len(data['images'])): + b64 = data['images'][i].split(',',1)[0] + info = data['info'] + image = Image.open(io.BytesIO(base64.b64decode(b64))) + log.info(f'received image: size={image.size} time={t1-t0:.2f} info="{info}"') + if args.output: + image.save(args.output) + log.info(f'image saved: size={image.size} filename={args.output}') + + else: + log.warning(f'no images received: {data}') + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description = 'api-faceid') + parser.add_argument('--width', required=False, default=512, help='image width') + parser.add_argument('--height', required=False, default=512, help='image height') + parser.add_argument('--face', required=False, help='face image') + parser.add_argument('--prompt', required=False, default='', help='prompt text') + parser.add_argument('--negative', required=False, default='', help='negative prompt text') + parser.add_argument('--steps', required=False, default=20, help='number of steps') + parser.add_argument('--seed', required=False, default=-1, help='initial seed') + parser.add_argument('--sampler', required=False, default='Euler a', help='sampler name') + parser.add_argument('--output', required=False, default=None, help='output image file') + parser.add_argument('--model', required=False, help='model name') + args = parser.parse_args() + log.info(f'img2img: {args}') + generate(args) + +""" +request.face.mode, +request.face.source_images, +request.face.ip_model, +request.face.ip_override_sampler, +request.face.ip_cache_model, +request.face.ip_strength, +request.face.ip_structure, +request.face.id_strength, +request.face.id_conditioning, +request.face.id_cache, +request.face.pm_trigger, +request.face.pm_strength, +request.face.pm_start, +request.face.fs_cache +""" diff --git a/cli/simple-img2img.py b/cli/api-img2img.py similarity index 98% rename from cli/simple-img2img.py rename to cli/api-img2img.py index 7cbfa14b7..3a2961e5b 100755 --- a/cli/simple-img2img.py +++ b/cli/api-img2img.py @@ -83,7 +83,7 @@ def generate(args): # pylint: disable=redefined-outer-name if __name__ == "__main__": - parser = argparse.ArgumentParser(description = 'simple-img2img') + parser = argparse.ArgumentParser(description = 'api-img2img') parser.add_argument('--init', required=True, help='init image') parser.add_argument('--mask', required=False, help='mask image') parser.add_argument('--prompt', required=False, default='', help='prompt text') diff --git a/cli/simple-info.py b/cli/api-info.py similarity index 96% rename from cli/simple-info.py rename to cli/api-info.py index 4d1fd6d75..83e4dfe2e 100755 --- a/cli/simple-info.py +++ b/cli/api-info.py @@ -50,7 +50,7 @@ def info(args): # pylint: disable=redefined-outer-name if __name__ == "__main__": - parser = argparse.ArgumentParser(description = 'simple-info') + parser = argparse.ArgumentParser(description = 'api-info') parser.add_argument('--input', required=True, help='input image') args = parser.parse_args() log.info(f'info: {args}') diff --git a/cli/api-json.py b/cli/api-json.py new file mode 100755 index 000000000..e8c5270fb --- /dev/null +++ b/cli/api-json.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python + +# curl -vX POST http://localhost:7860/sdapi/v1/txt2img --header "Content-Type: application/json" -d @3261.json +import os +import json +import logging +import argparse +import requests +import urllib3 + + +sd_url = os.environ.get('SDAPI_URL', "http://127.0.0.1:7860") +sd_username = os.environ.get('SDAPI_USR', None) +sd_password = os.environ.get('SDAPI_PWD', None) +options = { + "save_images": True, + "send_images": True, +} + +logging.basicConfig(level = logging.INFO, format = '%(asctime)s %(levelname)s: %(message)s') +log = logging.getLogger(__name__) +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + + +def auth(): + if sd_username is not None and sd_password is not None: + return requests.auth.HTTPBasicAuth(sd_username, sd_password) + return None + + +def post(endpoint: str, payload: dict = None): + if 'sdapi' not in endpoint: + endpoint = f'sdapi/v1/{endpoint}' + if 'http' not in endpoint: + endpoint = f'{sd_url}/{endpoint}' + req = requests.post(endpoint, json = payload, timeout=300, verify=False, auth=auth()) + return { 'error': req.status_code, 'reason': req.reason, 'url': req.url } if req.status_code != 200 else req.json() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description = 'api-txt2img') + parser.add_argument('endpoint', nargs=1, help='endpoint') + parser.add_argument('json', nargs=1, help='json data or file') + args = parser.parse_args() + log.info(f'api-json: {args}') + if os.path.isfile(args.json[0]): + with open(args.json[0], 'r', encoding='ascii') as f: + dct = json.load(f) # TODO fails with b64 encoded images inside json due to string encoding + else: + dct = json.loads(args.json[0]) + res = post(endpoint=args.endpoint[0], payload=dct) + print(res) diff --git a/cli/simple-mask.py b/cli/api-mask.py similarity index 97% rename from cli/simple-mask.py rename to cli/api-mask.py index 2ea12234e..0a1372138 100755 --- a/cli/simple-mask.py +++ b/cli/api-mask.py @@ -73,7 +73,7 @@ def info(args): # pylint: disable=redefined-outer-name if __name__ == "__main__": - parser = argparse.ArgumentParser(description = 'simple-info') + parser = argparse.ArgumentParser(description = 'api-mask') parser.add_argument('--input', required=True, help='input image') parser.add_argument('--mask', required=False, help='input mask') parser.add_argument('--type', required=False, help='output mask type') diff --git a/cli/simple-preprocess.py b/cli/api-preprocess.py similarity index 97% rename from cli/simple-preprocess.py rename to cli/api-preprocess.py index 2b96750bf..084f6a0b4 100755 --- a/cli/simple-preprocess.py +++ b/cli/api-preprocess.py @@ -67,7 +67,7 @@ def info(args): # pylint: disable=redefined-outer-name if __name__ == "__main__": - parser = argparse.ArgumentParser(description = 'simple-info') + parser = argparse.ArgumentParser(description = 'api-preprocess') parser.add_argument('--input', required=True, help='input image') parser.add_argument('--model', required=True, help='preprocessing model') parser.add_argument('--output', required=False, help='output image') diff --git a/cli/idle.py b/cli/api-progress.py similarity index 100% rename from cli/idle.py rename to cli/api-progress.py diff --git a/cli/simple-txt2img.js b/cli/api-txt2img.js similarity index 100% rename from cli/simple-txt2img.js rename to cli/api-txt2img.js diff --git a/cli/simple-txt2img.py b/cli/api-txt2img.py similarity index 93% rename from cli/simple-txt2img.py rename to cli/api-txt2img.py index d3287ee46..a00515fe5 100755 --- a/cli/simple-txt2img.py +++ b/cli/api-txt2img.py @@ -48,7 +48,10 @@ def generate(args): # pylint: disable=redefined-outer-name options['sampler_name'] = args.sampler options['width'] = int(args.width) options['height'] = int(args.height) - options['restore_faces'] = args.faces + if args.faces: + options['restore_faces'] = args.faces + options['denoising_strength'] = 0.5 + options['hr_sampler_name'] = args.sampler data = post('/sdapi/v1/txt2img', options) t1 = time.time() if 'images' in data: @@ -65,7 +68,7 @@ def generate(args): # pylint: disable=redefined-outer-name if __name__ == "__main__": - parser = argparse.ArgumentParser(description = 'simple-txt2img') + parser = argparse.ArgumentParser(description = 'api-txt2img') parser.add_argument('--prompt', required=False, default='', help='prompt text') parser.add_argument('--negative', required=False, default='', help='negative prompt text') parser.add_argument('--width', required=False, default=512, help='image width') diff --git a/cli/simple-upscale.py b/cli/api-upscale.py similarity index 97% rename from cli/simple-upscale.py rename to cli/api-upscale.py index b5a2bb5dd..082e008a8 100755 --- a/cli/simple-upscale.py +++ b/cli/api-upscale.py @@ -80,7 +80,7 @@ def upscale(args): # pylint: disable=redefined-outer-name if __name__ == "__main__": - parser = argparse.ArgumentParser(description = 'simple-upscale') + parser = argparse.ArgumentParser(description = 'api-upscale') parser.add_argument('--input', required=True, help='input image') parser.add_argument('--output', required=True, help='output image') parser.add_argument('--upscaler', required=False, default='Nearest', help='upscaler name') diff --git a/cli/simple-vqa.py b/cli/api-vqa.py similarity index 96% rename from cli/simple-vqa.py rename to cli/api-vqa.py index 0ac181b7c..73de8dbc8 100755 --- a/cli/simple-vqa.py +++ b/cli/api-vqa.py @@ -55,7 +55,7 @@ def info(args): # pylint: disable=redefined-outer-name if __name__ == "__main__": - parser = argparse.ArgumentParser(description = 'simple-info') + parser = argparse.ArgumentParser(description = 'api-vqa') parser.add_argument('--input', required=True, help='input image') parser.add_argument('--model', required=False, help='vqa model') parser.add_argument('--question', required=False, help='question') diff --git a/cli/image-encode.py b/cli/image-encode.py new file mode 100755 index 000000000..0769c2544 --- /dev/null +++ b/cli/image-encode.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +import io +import os +import sys +import base64 +from PIL import Image +from rich import print # pylint: disable=redefined-builtin + + +def encode(file: str): + image = Image.open(file) if os.path.exists(file) else None + print(f'Input: file={file} image={image}') + if image is None: + return None + if image.mode != 'RGB': + image = image.convert('RGB') + with io.BytesIO() as stream: + image.save(stream, 'JPEG') + image.close() + values = stream.getvalue() + encoded = base64.b64encode(values).decode() + return encoded + + +if __name__ == "__main__": + sys.argv.pop(0) + fn = sys.argv[0] if len(sys.argv) > 0 else '' + b64 = encode(fn) + print('=== BEGIN ===') + print(f'{b64}') + print('=== END ===') + diff --git a/cli/latents.py b/cli/latents.py deleted file mode 100755 index 717f17352..000000000 --- a/cli/latents.py +++ /dev/null @@ -1,170 +0,0 @@ -#!/usr/bin/env python - -import os -import sys -import json -import pathlib -import argparse -import warnings - -import cv2 -import numpy as np -import torch -from PIL import Image -from torchvision import transforms -from tqdm import tqdm -from util import Map - -from rich.pretty import install as pretty_install -from rich.traceback import install as traceback_install -from rich.console import Console - -console = Console(log_time=True, log_time_format='%H:%M:%S-%f') -pretty_install(console=console) -traceback_install(console=console, extra_lines=1, width=console.width, word_wrap=False, indent_guides=False) - -sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'modules', 'lora')) -import library.model_util as model_util -import library.train_util as train_util - -warnings.filterwarnings('ignore') -device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') -options = Map({ - 'batch': 1, - 'input': '', - 'json': '', - 'max': 1024, - 'min': 256, - 'noupscale': False, - 'precision': 'fp32', - 'resolution': '512,512', - 'steps': 64, - 'vae': 'stabilityai/sd-vae-ft-mse' -}) -vae = None - - -def get_latents(local_vae, images, weight_dtype): - image_transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.5], [0.5]) ]) - img_tensors = [image_transforms(image) for image in images] - img_tensors = torch.stack(img_tensors) - img_tensors = img_tensors.to(device, weight_dtype) - with torch.no_grad(): - latents = local_vae.encode(img_tensors).latent_dist.sample().float().to('cpu').numpy() - return latents, [images[0].shape[0], images[0].shape[1]] - - -def get_npz_filename_wo_ext(data_dir, image_key): - return os.path.join(data_dir, os.path.splitext(os.path.basename(image_key))[0]) - - -def create_vae_latents(local_params): - args = Map({**options, **local_params}) - console.log(f'create vae latents args: {args}') - image_paths = train_util.glob_images(args.input) - if os.path.exists(args.json): - with open(args.json, 'rt', encoding='utf-8') as f: - metadata = json.load(f) - else: - return - if args.precision == 'fp16': - weight_dtype = torch.float16 - elif args.precision == 'bf16': - weight_dtype = torch.bfloat16 - else: - weight_dtype = torch.float32 - global vae # pylint: disable=global-statement - if vae is None: - vae = model_util.load_vae(args.vae, weight_dtype) - vae.eval() - vae.to(device, dtype=weight_dtype) - max_reso = tuple([int(t) for t in args.resolution.split(',')]) - assert len(max_reso) == 2, f'illegal resolution: {args.resolution}' - bucket_manager = train_util.BucketManager(args.noupscale, max_reso, args.min, args.max, args.steps) - if not args.noupscale: - bucket_manager.make_buckets() - img_ar_errors = [] - def process_batch(is_last): - for bucket in bucket_manager.buckets: - if (is_last and len(bucket) > 0) or len(bucket) >= args.batch: - latents, original_size = get_latents(vae, [img for _, img in bucket], weight_dtype) - assert latents.shape[2] == bucket[0][1].shape[0] // 8 and latents.shape[3] == bucket[0][1].shape[1] // 8, f'latent shape {latents.shape}, {bucket[0][1].shape}' - for (image_key, _), latent in zip(bucket, latents): - npz_file_name = get_npz_filename_wo_ext(args.input, image_key) - # np.savez(npz_file_name, latent) - kwargs = {} - np.savez( - npz_file_name, - latents=latent, - original_size=np.array(original_size), - crop_ltrb=np.array([0, 0]), - **kwargs, - ) - bucket.clear() - data = [[(None, ip)] for ip in image_paths] - bucket_counts = {} - for data_entry in tqdm(data, smoothing=0.0): - if data_entry[0] is None: - continue - img_tensor, image_path = data_entry[0] - if img_tensor is not None: - image = transforms.functional.to_pil_image(img_tensor) - else: - image = Image.open(image_path) - image_key = os.path.basename(image_path) - image_key = os.path.join(os.path.basename(pathlib.Path(image_path).parent), pathlib.Path(image_path).stem) - if image_key not in metadata: - metadata[image_key] = {} - reso, resized_size, ar_error = bucket_manager.select_bucket(image.width, image.height) - img_ar_errors.append(abs(ar_error)) - bucket_counts[reso] = bucket_counts.get(reso, 0) + 1 - metadata[image_key]['train_resolution'] = (reso[0] - reso[0] % 8, reso[1] - reso[1] % 8) - if not args.noupscale: - assert resized_size[0] == reso[0] or resized_size[1] == reso[1], f'internal error, resized size not match: {reso}, {resized_size}, {image.width}, {image.height}' - assert resized_size[0] >= reso[0] and resized_size[1] >= reso[1], f'internal error, resized size too small: {reso}, {resized_size}, {image.width}, {image.height}' - assert resized_size[0] >= reso[0] and resized_size[1] >= reso[1], f'internal error resized size is small: {resized_size}, {reso}' - image = np.array(image) - if resized_size[0] != image.shape[1] or resized_size[1] != image.shape[0]: - image = cv2.resize(image, resized_size, interpolation=cv2.INTER_AREA) - if resized_size[0] > reso[0]: - trim_size = resized_size[0] - reso[0] - image = image[:, trim_size//2:trim_size//2 + reso[0]] - if resized_size[1] > reso[1]: - trim_size = resized_size[1] - reso[1] - image = image[trim_size//2:trim_size//2 + reso[1]] - assert image.shape[0] == reso[1] and image.shape[1] == reso[0], f'internal error, illegal trimmed size: {image.shape}, {reso}' - bucket_manager.add_image(reso, (image_key, image)) - process_batch(False) - - process_batch(True) - vae.to('cpu') - - bucket_manager.sort() - img_ar_errors = np.array(img_ar_errors) - for i, reso in enumerate(bucket_manager.resos): - count = bucket_counts.get(reso, 0) - if count > 0: - console.log(f'vae latents bucket: {i+1}/{len(bucket_manager.resos)} resolution: {reso} images: {count} mean-ar-error: {np.mean(img_ar_errors)}') - with open(args.json, 'wt', encoding='utf-8') as f: - json.dump(metadata, f, indent=2) - - -def unload_vae(): - global vae # pylint: disable=global-statement - vae = None - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('input', type=str, help='directory for train images') - parser.add_argument('--json', type=str, required=True, help='metadata file to input') - parser.add_argument('--vae', type=str, required=True, help='model name or path to encode latents') - parser.add_argument('--batch', type=int, default=1, help='batch size in inference') - parser.add_argument('--resolution', type=str, default='512,512', help='max resolution in fine tuning (width,height)') - parser.add_argument('--min', type=int, default=256, help='minimum resolution for buckets') - parser.add_argument('--max', type=int, default=1024, help='maximum resolution for buckets') - parser.add_argument('--steps', type=int, default=64, help='steps of resolution for buckets, divisible by 8') - parser.add_argument('--noupscale', action='store_true', help='make bucket for each image without upscaling') - parser.add_argument('--precision', type=str, default='fp32', choices=['fp32', 'fp16', 'bf16'], help='use precision') - params = parser.parse_args() - create_vae_latents(vars(params)) diff --git a/cli/model-jit.py b/cli/model-jit.py deleted file mode 100755 index e4af79e95..000000000 --- a/cli/model-jit.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python -import os -import time -import functools -import argparse -import logging -import warnings -from dataclasses import dataclass - -logging.getLogger("DeepSpeed").disabled = True -warnings.filterwarnings(action="ignore", category=FutureWarning) -warnings.filterwarnings(action="ignore", category=DeprecationWarning) - -import torch -import diffusers - -n_warmup = 5 -n_traces = 10 -n_runs = 100 -args = {} -pipe = None -log = logging.getLogger("sd") - - -def setup_logging(): - from rich.theme import Theme - from rich.logging import RichHandler - from rich.console import Console - from rich.traceback import install - log.setLevel(logging.DEBUG) - console = Console(log_time=True, log_time_format='%H:%M:%S-%f', theme=Theme({ "traceback.border": "black", "traceback.border.syntax_error": "black", "inspect.value.border": "black" })) - logging.basicConfig(level=logging.ERROR, format='%(asctime)s | %(name)s | %(levelname)s | %(module)s | %(message)s', handlers=[logging.NullHandler()]) # redirect default logger to null - rh = RichHandler(show_time=True, omit_repeated_times=False, show_level=True, show_path=False, markup=False, rich_tracebacks=True, log_time_format='%H:%M:%S-%f', level=logging.DEBUG, console=console) - rh.setLevel(logging.DEBUG) - log.addHandler(rh) - logging.getLogger("diffusers").setLevel(logging.ERROR) - logging.getLogger("torch").setLevel(logging.ERROR) - warnings.filterwarnings(action="ignore", category=torch.jit.TracerWarning) - install(console=console, extra_lines=1, max_frames=10, width=console.width, word_wrap=False, indent_guides=False, suppress=[]) - - -def generate_inputs(): - if args.type == 'sd15': - sample = torch.randn(2, 4, 64, 64).half().cuda() - timestep = torch.rand(1).half().cuda() * 999 - encoder_hidden_states = torch.randn(2, 77, 768).half().cuda() - return sample, timestep, encoder_hidden_states - if args.type == 'sdxl': - sample = torch.randn(2, 4, 64, 64).half().cuda() - timestep = torch.rand(1).half().cuda() * 999 - encoder_hidden_states = torch.randn(2, 77, 768).half().cuda() - text_embeds = torch.randn(1, 77, 2048).half().cuda() - return sample, timestep, encoder_hidden_states, text_embeds - - -def load_model(): - log.info(f'versions: torch={torch.__version__} diffusers={diffusers.__version__}') - diffusers_load_config = { - "low_cpu_mem_usage": True, - "torch_dtype": torch.float16, - "safety_checker": None, - "requires_safety_checker": False, - "load_connected_pipeline": True, - "use_safetensors": True, - } - pipeline = diffusers.StableDiffusionPipeline if args.type == 'sd15' else diffusers.StableDiffusionXLPipeline - global pipe # pylint: disable=global-statement - t0 = time.time() - pipe = pipeline.from_single_file(args.model, **diffusers_load_config).to('cuda') - size = os.path.getsize(args.model) - log.info(f'load: model={args.model} type={args.type} time={time.time() - t0:.3f}s size={size / 1024 / 1024:.3f}mb') - - -def load_trace(fn: str): - - @dataclass - class UNet2DConditionOutput: - sample: torch.FloatTensor - - class TracedUNet(torch.nn.Module): - def __init__(self): - super().__init__() - self.in_channels = pipe.unet.in_channels - self.device = pipe.unet.device - - def forward(self, latent_model_input, t, encoder_hidden_states): - sample = unet_traced(latent_model_input, t, encoder_hidden_states)[0] - return UNet2DConditionOutput(sample=sample) - - t0 = time.time() - unet_traced = torch.jit.load(fn) - pipe.unet = TracedUNet() - size = os.path.getsize(fn) - log.info(f'load: optimized={fn} time={time.time() - t0:.3f}s size={size / 1024 / 1024:.3f}mb') - - -def trace_model(): - log.info(f'tracing model: {args.model}') - torch.set_grad_enabled(False) - unet = pipe.unet - unet.eval() - # unet.to(memory_format=torch.channels_last) # use channels_last memory format - unet.forward = functools.partial(unet.forward, return_dict=False) # set return_dict=False as default - - # warmup - t0 = time.time() - for _ in range(n_warmup): - with torch.inference_mode(): - inputs = generate_inputs() - _output = unet(*inputs) - log.info(f'warmup: time={time.time() - t0:.3f}s passes={n_warmup}') - - # trace - t0 = time.time() - unet_traced = torch.jit.trace(unet, inputs, check_trace=True) - unet_traced.eval() - log.info(f'trace: time={time.time() - t0:.3f}s') - - # optimize graph - t0 = time.time() - for _ in range(n_traces): - with torch.inference_mode(): - inputs = generate_inputs() - _output = unet_traced(*inputs) - log.info(f'optimize: time={time.time() - t0:.3f}s passes={n_traces}') - - # save the model - if args.save: - t0 = time.time() - basename, _ext = os.path.splitext(args.model) - fn = f"{basename}.pt" - unet_traced.save(fn) - size = os.path.getsize(fn) - log.info(f'save: optimized={fn} time={time.time() - t0:.3f}s size={size / 1024 / 1024:.3f}mb') - return fn - - pipe.unet = unet_traced - return None - - -def benchmark_model(msg: str): - with torch.inference_mode(): - inputs = generate_inputs() - torch.cuda.synchronize() - for n in range(n_runs): - if n > n_runs / 10: - t0 = time.time() - _output = pipe.unet(*inputs) - torch.cuda.synchronize() - t1 = time.time() - log.info(f"benchmark unet: {t1 - t0:.3f}s passes={n_runs} type={msg}") - return t1 - t0 - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description = 'SD.Next') - parser.add_argument('--model', type=str, default='', required=True, help='model path') - parser.add_argument('--type', type=str, default='sd15', choices=['sd15', 'sdxl'], required=False, help='model type, default: %(default)s') - parser.add_argument('--benchmark', default = False, action='store_true', help = "run benchmarks, default: %(default)s") - parser.add_argument('--trace', default = True, action='store_true', help = "run jit tracing, default: %(default)s") - parser.add_argument('--save', default = False, action='store_true', help = "save optimized unet, default: %(default)s") - args = parser.parse_args() - setup_logging() - log.info('sdnext model jit tracing') - if not os.path.isfile(args.model): - log.error(f"invalid model path: {args.model}") - exit(1) - load_model() - if args.benchmark: - time0 = benchmark_model('original') - unet_saved = trace_model() - if unet_saved is not None: - load_trace(unet_saved) - if args.benchmark: - time1 = benchmark_model('traced') - log.info(f'benchmark speedup: {100 * (time0 - time1) / time0:.3f}%') diff --git a/cli/torch-compile.py b/cli/torch-compile.py deleted file mode 100755 index 891f27dc5..000000000 --- a/cli/torch-compile.py +++ /dev/null @@ -1,99 +0,0 @@ -#!/usr/bin/env python -# pylint: disable=cell-var-from-loop -""" -Test Torch Dynamo functionality and backends -""" -import json -import warnings - -import numpy as np -import torch -from torchvision.models import resnet18 - - -print('torch:', torch.__version__) -try: - # must be imported explicitly or namespace is not found - import torch._dynamo as dynamo # pylint: disable=ungrouped-imports -except Exception as err: - print('torch without dynamo support', err) - - -N_ITERS = 20 -torch._dynamo.config.verbose=True # pylint: disable=protected-access -warnings.filterwarnings('ignore', category=UserWarning) # disable those for now as many backends reports tons -# torch.set_float32_matmul_precision('high') # enable to test in fp32 - - -def timed(fn): # returns the result of running `fn()` and the time it took for `fn()` to run in ms using CUDA events - start = torch.cuda.Event(enable_timing=True) - end = torch.cuda.Event(enable_timing=True) - start.record() - result = fn() - end.record() - torch.cuda.synchronize() - return result, start.elapsed_time(end) - - -def generate_data(b): - return ( - torch.randn(b, 3, 128, 128).to(torch.float32).cuda(), - torch.randint(1000, (b,)).cuda(), - ) - - -def init_model(): - return resnet18().to(torch.float32).cuda() - - -def evaluate(mod, val): - return mod(val) - - -if __name__ == '__main__': - # first pass, dynamo is going to be slower as it compiles - model = init_model() - inp = generate_data(16)[0] - - # repeat test - results = {} - times = [] - print('eager initial eval:', timed(lambda: evaluate(model, inp))[1]) - for _i in range(N_ITERS): - inp = generate_data(16)[0] - _res, time = timed(lambda: evaluate(model, inp)) # noqa: B023 - times.append(time) - results['default'] = np.median(times) - - print('dynamo available backends:', dynamo.list_backends()) - for backend in dynamo.list_backends(): - try: - # required before changing backends - torch._dynamo.reset() # pylint: disable=protected-access - eval_dyn = dynamo.optimize(backend)(evaluate) - print('dynamo initial eval:', backend, timed(lambda: eval_dyn(model, inp))[1]) # noqa: B023 - times = [] - for _i in range(N_ITERS): - inp = generate_data(16)[0] - _res, time = timed(lambda: eval_dyn(model, inp)) # noqa: B023 - times.append(time) - results[backend] = np.median(times) - except Exception as err: - lines = str(err).split('\n') - print('dyanmo backend failed:', backend, lines[0]) # print just first error line as backtraces can be quite long - results[backend] = 'error' - - # print stats - print(json.dumps(results, indent = 4)) - -""" -Reference: -Training & Inference backends: - dynamo.optimize("inductor") - Uses TorchInductor backend with AotAutograd and cudagraphs by leveraging codegened Triton kernels - dynamo.optimize("aot_nvfuser") - nvFuser with AotAutograd - dynamo.optimize("aot_cudagraphs") - cudagraphs with AotAutograd -Inference-only backends: - dynamo.optimize("ofi") - Uses Torchscript optimize_for_inference - dynamo.optimize("fx2trt") - Uses Nvidia TensorRT for inference optimizations - dynamo.optimize("onnxrt") - Uses ONNXRT for inference on CPU/GPU -""" diff --git a/cli/train.py b/cli/train.py deleted file mode 100755 index 9e551ddf5..000000000 --- a/cli/train.py +++ /dev/null @@ -1,443 +0,0 @@ -#!/usr/bin/env python - -""" -Examples: -- sd15: train.py --type lora --tag girl --comments sdnext --input ~/generative/Input/mia --process original,interrogate,resize --name mia -- sdxl: train.py --type lora --tag girl --comments sdnext --input ~/generative/Input/mia --process original,interrogate,resize --precision fp32 --optimizer Adafactor --sdxl --name miaxl -- offline: train.py --type lora --tag girl --comments sdnext --input ~/generative/Input/mia --model /home/vlado/dev/sdnext/models/Stable-diffusion/sdxl/miaanimeSFWNSFWSDXL_v40.safetensors --dir /home/vlado/dev/sdnext/models/Lora/ --precision fp32 --optimizer Adafactor --sdxl --name miaxl -""" - -# system imports -import os -import re -import gc -import sys -import json -import shutil -import pathlib -import asyncio -import logging -import tempfile -import argparse - -# local imports -import util -import sdapi -import options - - -# globals -args = None -log = logging.getLogger('train') -valid_steps = ['original', 'face', 'body', 'blur', 'range', 'upscale', 'restore', 'interrogate', 'resize', 'square', 'segment'] -log_file = os.path.join(os.path.dirname(__file__), 'train.log') -server_ok = False - -# methods - -def setup_logging(): - from rich.theme import Theme - from rich.logging import RichHandler - from rich.console import Console - from rich.pretty import install as pretty_install - from rich.traceback import install as traceback_install - console = Console(log_time=True, log_time_format='%H:%M:%S-%f', theme=Theme({ - "traceback.border": "black", - "traceback.border.syntax_error": "black", - "inspect.value.border": "black", - })) - # logging.getLogger("urllib3").setLevel(logging.ERROR) - # logging.getLogger("httpx").setLevel(logging.ERROR) - level = logging.DEBUG if args.debug else logging.INFO - logging.basicConfig(level=logging.ERROR, format='%(asctime)s | %(name)s | %(levelname)s | %(module)s | %(message)s', filename=log_file, filemode='a', encoding='utf-8', force=True) - log.setLevel(logging.DEBUG) # log to file is always at level debug for facility `sd` - pretty_install(console=console) - traceback_install(console=console, extra_lines=1, width=console.width, word_wrap=False, indent_guides=False, suppress=[]) - rh = RichHandler(show_time=True, omit_repeated_times=False, show_level=True, show_path=False, markup=False, rich_tracebacks=True, log_time_format='%H:%M:%S-%f', level=level, console=console) - rh.set_name(level) - while log.hasHandlers() and len(log.handlers) > 0: - log.removeHandler(log.handlers[0]) - log.addHandler(rh) - - -def mem_stats(): - gc.collect() - import torch - if torch.cuda.is_available(): - with torch.no_grad(): - torch.cuda.empty_cache() - with torch.cuda.device('cuda'): - torch.cuda.empty_cache() - torch.cuda.ipc_collect() - mem = util.get_memory() - peak = { 'active': mem['gpu-active']['peak'], 'allocated': mem['gpu-allocated']['peak'], 'reserved': mem['gpu-reserved']['peak'] } - log.debug(f"memory cpu: {mem.ram} gpu current: {mem.gpu} gpu peak: {peak}") - - -def parse_args(): - global args # pylint: disable=global-statement - parser = argparse.ArgumentParser(description = 'SD.Next Train') - - group_server = parser.add_argument_group('Server') - group_server.add_argument('--server', type=str, default='http://127.0.0.1:7860', required=False, help='server url, default: %(default)s') - group_server.add_argument('--user', type=str, default=None, required=False, help='server url, default: %(default)s') - group_server.add_argument('--password', type=str, default=None, required=False, help='server url, default: %(default)s') - group_server.add_argument('--dir', type=str, default=None, required=False, help='folder with trained networks, default: use server setting') - - group_main = parser.add_argument_group('Main') - group_main.add_argument('--type', type=str, choices=['embedding', 'ti', 'lora', 'lyco', 'dreambooth', 'hypernetwork'], default=None, required=True, help='training type') - group_main.add_argument('--model', type=str, default='', required=False, help='base model to use for training, default: current loaded model') - group_main.add_argument('--name', type=str, default=None, required=True, help='output filename') - group_main.add_argument('--tag', type=str, default='person', required=False, help='primary tags, default: %(default)s') - group_main.add_argument('--comments', type=str, default='', required=False, help='comments to be added to trained model metadata, default: %(default)s') - - group_data = parser.add_argument_group('Dataset') - group_data.add_argument('--input', type=str, default=None, required=True, help='input folder with training images') - group_data.add_argument('--interim', type=str, default='', required=False, help='where to store processed images, default is system temp/train') - group_data.add_argument('--process', type=str, default='original,interrogate,resize,square', required=False, help=f'list of possible processing steps: {valid_steps}, default: %(default)s') - - group_train = parser.add_argument_group('Train') - group_train.add_argument('--gradient', type=int, default=1, required=False, help='gradient accumulation steps, default: %(default)s') - group_train.add_argument('--steps', type=int, default=2500, required=False, help='training steps, default: %(default)s') - group_train.add_argument('--batch', type=int, default=1, required=False, help='batch size, default: %(default)s') - group_train.add_argument('--lr', type=float, default=1e-04, required=False, help='model learning rate, default: %(default)s') - group_train.add_argument('--dim', type=int, default=32, required=False, help='network dimension or number of vectors, default: %(default)s') - - # lora params - group_train.add_argument('--repeats', type=int, default=1, required=False, help='number of repeats per image, default: %(default)s') - group_train.add_argument('--alpha', type=float, default=0, required=False, help='lora/lyco alpha for weights scaling, default: dim/2') - group_train.add_argument('--algo', type=str, default=None, choices=['locon', 'loha', 'lokr', 'ia3'], required=False, help='alternative lyco algoritm, default: %(default)s') - group_train.add_argument('--args', type=str, default=None, required=False, help='lora/lyco additional network arguments, default: %(default)s') - group_train.add_argument('--optimizer', type=str, default='AdamW', required=False, help='optimizer type, default: %(default)s') - group_train.add_argument('--precision', type=str, choices=['fp16', 'fp32'], default='fp16', required=False, help='training precision, default: %(default)s') - group_train.add_argument('--sdxl', default = False, action='store_true', help = "run sdxl training, default: %(default)s") - # AdamW (default), AdamW8bit, PagedAdamW8bit, Lion8bit, PagedLion8bit, Lion, SGDNesterov, SGDNesterov8bit, DAdaptation(DAdaptAdamPreprint), DAdaptAdaGrad, DAdaptAdam, DAdaptAdan, DAdaptAdanIP, DAdaptLion, DAdaptSGD, AdaFactor - - group_other = parser.add_argument_group('Other') - group_other.add_argument('--overwrite', default = False, action='store_true', help = "overwrite existing training, default: %(default)s") - group_other.add_argument('--experimental', default = False, action='store_true', help = "enable experimental options, default: %(default)s") - group_other.add_argument('--debug', default = False, action='store_true', help = "enable debug level logging, default: %(default)s") - - args = parser.parse_args() - - -def prepare_server(): - global server_ok # pylint: disable=global-statement - try: - server_status = util.Map(sdapi.progresssync()) - server_state = server_status['state'] - server_ok = True - except Exception: - log.warning(f'sdnext server error: {server_status}') - server_ok = False - if server_ok and server_state['job_count'] > 0: - log.error(f'sdnext server not idle: {server_state}') - exit(1) - if server_ok: - server_options = util.Map(sdapi.options()) - server_options.options.save_training_settings_to_txt = False - server_options.options.training_enable_tensorboard = False - server_options.options.training_tensorboard_save_images = False - server_options.options.pin_memory = True - server_options.options.save_optimizer_state = False - server_options.options.training_image_repeats_per_epoch = args.repeats - server_options.options.training_write_csv_every = 0 - sdapi.postsync('/sdapi/v1/options', server_options.options) - log.info('updated server options') - - -def verify_args(): - server_options = util.Map(sdapi.options()) - if args.model != '': - if not os.path.isfile(args.model): - log.error(f'cannot find loaded model: {args.model}') - exit(1) - if server_ok: - server_options.options.sd_model_checkpoint = args.model - sdapi.postsync('/sdapi/v1/options', server_options.options) - elif server_ok: - args.model = server_options.options.sd_model_checkpoint.split(' [')[0] - if args.sdxl and (server_options.sd_backend != 'diffusers' or server_options.diffusers_pipeline != 'Stable Diffusion XL'): - log.warning('server checkpoint is not sdxl') - else: - log.error('no model specified') - exit(1) - base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - if args.type == 'lora' and not server_ok and not args.dir: - log.error('offline lora training requires --dir ') - exit(1) - if args.type == 'lora': - import transformers - if transformers.__version__ != '4.30.2': - log.error(f'lora training requires specific transformers version: current {transformers.__version__} required transformers==4.30.2') - exit(1) - args.lora_dir = server_options.options.lora_dir or args.dir - if not os.path.isabs(args.lora_dir): - args.lora_dir = os.path.join(base_dir, args.lora_dir) - args.lyco_dir = server_options.options.lyco_dir or args.dir - if not os.path.isabs(args.lyco_dir): - args.lyco_dir = os.path.join(base_dir, args.lyco_dir) - args.embeddings_dir = server_options.options.embeddings_dir or args.dir - if not os.path.isfile(args.model): - args.ckpt_dir = server_options.options.ckpt_dir - if not os.path.isabs(args.ckpt_dir): - args.ckpt_dir = os.path.join(base_dir, args.ckpt_dir) - attempt = os.path.abspath(os.path.join(args.ckpt_dir, args.model)) - args.model = attempt if os.path.isfile(attempt) else args.model - if not os.path.isfile(args.model): - attempt = os.path.abspath(os.path.join(args.ckpt_dir, args.model + '.safetensors')) - args.model = attempt if os.path.isfile(attempt) else args.model - if not os.path.isfile(args.model): - log.error(f'cannot find loaded model: {args.model}') - exit(1) - if not os.path.exists(args.input) or not os.path.isdir(args.input): - log.error(f'cannot find training folder: {args.input}') - exit(1) - if not os.path.exists(args.lora_dir) or not os.path.isdir(args.lora_dir): - log.error(f'cannot find lora folder: {args.lora_dir}') - exit(1) - if not os.path.exists(args.lyco_dir) or not os.path.isdir(args.lyco_dir): - log.error(f'cannot find lyco folder: {args.lyco_dir}') - exit(1) - if args.interim != '': - args.process_dir = args.interim - else: - args.process_dir = os.path.join(tempfile.gettempdir(), 'train', args.name) - log.debug(f'args: {vars(args)}') - log.debug(f'server flags: {server_options.flags}') - log.debug(f'server options: {server_options.options}') - - -async def training_loop(): - async def async_train(): - res = await sdapi.post('/sdapi/v1/train/embedding', options.embedding) - log.info(f'train embedding result: {res}') - - async def async_monitor(): - from tqdm.rich import tqdm - await asyncio.sleep(3) - res = util.Map(sdapi.progress()) - with tqdm(desc='train embedding', total=res.state.job_count) as pbar: - while res.state.job_no < res.state.job_count and not res.state.interrupted and not res.state.skipped: - await asyncio.sleep(2) - prev_job = res.state.job_no - res = util.Map(sdapi.progress()) - loss = re.search(r"Loss: (.*?)(?=\<)", res.textinfo) - if loss: - pbar.set_postfix({ 'loss': loss.group(0) }) - pbar.update(res.state.job_no - prev_job) - - a = asyncio.create_task(async_train()) - b = asyncio.create_task(async_monitor()) - await asyncio.gather(a, b) # wait for both pipeline and monitor to finish - - -def train_embedding(): - log.info(f'{args.type} options: {options.embedding}') - create_options = util.Map({ - "name": args.name, - "num_vectors_per_token": args.dim, - "overwrite_old": False, - "init_text": args.tag, - }) - fn = os.path.join(args.embeddings_dir, args.name) + '.pt' - if os.path.exists(fn) and args.overwrite: - log.warning(f'delete existing embedding {fn}') - os.remove(fn) - else: - log.error(f'embedding exists {fn}') - return - log.info(f'create embedding {create_options}') - res = sdapi.postsync('/sdapi/v1/create/embedding', create_options) - if 'info' in res and 'error' in res['info']: # formatted error - log.error(res.info) - elif 'info' in res: # no error - asyncio.run(training_loop()) - else: # unknown error - log.error(f'create embedding error {res}') - - -def train_lora(): - fn = os.path.join(options.lora.output_dir, args.name) - for ext in ['.ckpt', '.pt', '.safetensors']: - if os.path.exists(fn + ext): - if args.overwrite: - log.warning(f'delete existing lora: {fn + ext}') - os.remove(fn + ext) - else: - log.error(f'lora exists: {fn + ext}') - return - log.info(f'{args.type} options: {options.lora}') - # lora imports - lora_path = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, 'modules', 'lora')) - lycoris_path = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, 'modules', 'lycoris')) - sys.path.append(lora_path) - if args.type == 'lyco': - sys.path.append(lycoris_path) - log.debug('importing lora lib') - if not args.sdxl: - import train_network - trainer = train_network.NetworkTrainer() - trainer.train(options.lora) - else: - import sdxl_train_network - trainer = sdxl_train_network.SdxlNetworkTrainer() - trainer.train(options.lora) - if args.type == 'lyco': - log.debug('importing lycoris lib') - import importlib - _network_module = importlib.import_module(options.lora.network_module) - - -def prepare_options(): - if args.type == 'embedding': - log.info('train embedding') - options.lora.in_json = None - if args.type == 'dreambooth': - log.info('train using dreambooth style training') - options.lora.vae_batch_size = args.batch - options.lora.in_json = None - if args.type == 'lora': - log.info('train using lora style training') - options.lora.output_dir = args.lora_dir - options.lora.in_json = os.path.join(args.process_dir, args.name + '.json') - if args.type == 'lyco': - log.info('train using lycoris network') - options.lora.output_dir = args.lora_dir - options.lora.network_module = 'lycoris.kohya' - options.lora.in_json = os.path.join(args.process_dir, args.name + '.json') - # lora specific - options.lora.save_model_as = 'safetensors' - options.lora.pretrained_model_name_or_path = args.model - options.lora.output_name = args.name - options.lora.max_train_steps = args.steps - options.lora.network_dim = args.dim - options.lora.network_alpha = args.dim // 2 if args.alpha == 0 else args.alpha - options.lora.network_args = [] - options.lora.training_comment = args.comments - options.lora.sdpa = True - options.lora.optimizer_type = args.optimizer - if args.algo is not None: - options.lora.network_args.append(f'algo={args.algo}') - if args.args is not None: - for net_arg in args.args: - options.lora.network_args.append(net_arg) - options.lora.gradient_accumulation_steps = args.gradient - options.lora.learning_rate = args.lr - options.lora.train_batch_size = args.batch - options.lora.train_data_dir = args.process_dir - options.lora.no_half_vae = args.precision == 'fp16' - # embedding specific - options.embedding.embedding_name = args.name - options.embedding.learn_rate = str(args.lr) - options.embedding.batch_size = args.batch - options.embedding.steps = args.steps - options.embedding.data_root = args.process_dir - options.embedding.log_directory = os.path.join(args.process_dir, 'log') - options.embedding.gradient_step = args.gradient - - -def process_inputs(): - import process - import filetype - pathlib.Path(args.process_dir).mkdir(parents=True, exist_ok=True) - processing_options = args.process.split(',') if isinstance(args.process, str) else args.process - processing_options = [opt.strip() for opt in re.split(',| ', args.process)] - log.info(f'processing steps: {processing_options}') - for step in processing_options: - if step not in valid_steps: - log.error(f'invalid processing step: {[step]}') - exit(1) - for root, _sub_dirs, folder in os.walk(args.input): - files = [os.path.join(root, f) for f in folder if filetype.is_image(os.path.join(root, f))] - log.info(f'processing input images: {len(files)}') - if os.path.exists(args.process_dir): - if args.overwrite: - log.warning(f'removing existing processed folder: {args.process_dir}') - shutil.rmtree(args.process_dir, ignore_errors=True) - else: - log.info(f'processed folder exists: {args.process_dir}') - steps = [step for step in processing_options if step in ['face', 'body', 'original']] - process.reset() - options.process.target_size = 1024 if args.sdxl else 512 - metadata = {} - for step in steps: - if step == 'face': - opts = [step for step in processing_options if step not in ['body', 'original']] - if step == 'body': - opts = [step for step in processing_options if step not in ['face', 'original', 'upscale', 'restore']] # body does not perform upscale or restore - if step == 'original': - opts = [step for step in processing_options if step not in ['face', 'body', 'upscale', 'restore', 'blur', 'range', 'segment']] # original does not perform most steps - log.info(f'processing current step: {opts}') - tag = step - if tag == 'original' and args.tag is not None: - concept = args.tag.split(',')[0].strip() - else: - concept = step - if args.type in ['lora', 'lyco', 'dreambooth']: - folder = os.path.join(args.process_dir, str(args.repeats) + '_' + concept) # separate concepts per folder - if args.type in ['embedding']: - folder = os.path.join(args.process_dir) # everything into same folder - log.info(f'processing concept: {concept}') - log.info(f'processing output folder: {folder}') - pathlib.Path(folder).mkdir(parents=True, exist_ok=True) - results = {} - if server_ok: - for f in files: - res = process.file(filename = f, folder = folder, tag = args.tag, requested = opts) - if res.image: # valid result - results[res.type] = results.get(res.type, 0) + 1 - results['total'] = results.get('total', 0) + 1 - rel_path = res.basename.replace(os.path.commonpath([res.basename, args.process_dir]), '') - if rel_path.startswith(os.path.sep): - rel_path = rel_path[1:] - metadata[rel_path] = { 'caption': res.caption, 'tags': ','.join(res.tags) } - if options.lora.in_json is None: - with open(res.output.replace(options.process.format, '.txt'), "w", encoding='utf-8') as outfile: - outfile.write(res.caption) - log.info(f"processing {'saved' if res.image is not None else 'skipped'}: {f} => {res.output} {res.ops} {res.message}") - else: - log.info('processing skipped: offline') - folders = [os.path.join(args.process_dir, folder) for folder in os.listdir(args.process_dir) if os.path.isdir(os.path.join(args.process_dir, folder))] - log.info(f'input datasets {folders}') - if options.lora.in_json is not None: - with open(options.lora.in_json, "w", encoding='utf-8') as outfile: # write json at the end only - outfile.write(json.dumps(metadata, indent=2)) - for folder in folders: # create latents - import latents - latents.create_vae_latents(util.Map({ 'input': folder, 'json': options.lora.in_json })) - latents.unload_vae() - r = { 'inputs': len(files), 'outputs': results, 'metadata': options.lora.in_json } - log.info(f'processing steps result: {r}') - if args.gradient < 0: - log.info(f"setting gradient accumulation to number of images: {results['total']}") - options.lora.gradient_accumulation_steps = results['total'] - options.embedding.gradient_step = results['total'] - process.unload() - - -if __name__ == '__main__': - parse_args() - setup_logging() - log.info('SD.Next Train') - sdapi.sd_url = args.server - if args.user is not None: - sdapi.sd_username = args.user - if args.password is not None: - sdapi.sd_password = args.password - prepare_server() - verify_args() - prepare_options() - mem_stats() - process_inputs() - mem_stats() - try: - if args.type == 'embedding': - train_embedding() - if args.type == 'lora' or args.type == 'lyco' or args.type == 'dreambooth': - train_lora() - except KeyboardInterrupt: - log.error('interrupt requested') - sdapi.interrupt() - mem_stats() - log.info('done') diff --git a/cli/zluda-python.py b/cli/zluda-python.py index 31ee96362..e0399d096 100644 --- a/cli/zluda-python.py +++ b/cli/zluda-python.py @@ -13,7 +13,7 @@ def __init__(self, env_globals, env_locals): def execute(self, s: str): try: - exec(s, self.env_globals, self.env_locals) + exec(s, self.env_globals, self.env_locals) # pylint: disable=exec-used except Exception as e: print(f'{e.__class__.__name__}: {e}') diff --git a/extensions-builtin/Lora/network_lora.py b/extensions-builtin/Lora/network_lora.py index 76a8322da..5194222a0 100644 --- a/extensions-builtin/Lora/network_lora.py +++ b/extensions-builtin/Lora/network_lora.py @@ -24,7 +24,8 @@ def create_module(self, weights, key, none_ok=False): weight = weights.get(key) if weight is None and none_ok: return None - is_linear = type(self.sd_module) in [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear, torch.nn.MultiheadAttention, diffusers_lora.LoRACompatibleLinear] + linear_modules = [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear, torch.nn.MultiheadAttention, diffusers_lora.LoRACompatibleLinear] + is_linear = type(self.sd_module) in linear_modules or self.sd_module.__class__.__name__ == "NNCFLinear" is_conv = type(self.sd_module) in [torch.nn.Conv2d, diffusers_lora.LoRACompatibleConv] if is_linear: weight = weight.reshape(weight.shape[0], -1) diff --git a/extensions-builtin/Lora/network_overrides.py b/extensions-builtin/Lora/network_overrides.py index 724e47c70..14018fb70 100644 --- a/extensions-builtin/Lora/network_overrides.py +++ b/extensions-builtin/Lora/network_overrides.py @@ -1,7 +1,7 @@ from modules import shared -maybe_diffusers = [ +maybe_diffusers = [ # forced if lora_maybe_diffusers is enabled 'aaebf6360f7d', # sd15-lcm '3d18b05e4f56', # sdxl-lcm 'b71dcb732467', # sdxl-tcd @@ -19,15 +19,26 @@ '8cca3706050b', # hyper-sdxl-1step ] -force_diffusers = [ +force_diffusers = [ # forced always '816d0eed49fd', # flash-sdxl 'c2ec22757b46', # flash-sd15 ] -def check_override(shorthash): +force_models = [ # forced always + 'sd3', +] + +force_classes = [ # forced always +] + + +def check_override(shorthash=''): + force = False + force = force or (shared.sd_model_type in force_models) + force = force or (shared.sd_model.__class__.__name__ in force_classes) if len(shorthash) < 4: - return False - force = any(x.startswith(shorthash) for x in maybe_diffusers) if shared.opts.lora_maybe_diffusers else False + return force + force = force or (any(x.startswith(shorthash) for x in maybe_diffusers) if shared.opts.lora_maybe_diffusers else False) force = force or any(x.startswith(shorthash) for x in force_diffusers) if force and shared.opts.lora_maybe_diffusers: shared.log.debug('LoRA override: force diffusers') diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py index e564e2b67..71b5b29dc 100644 --- a/extensions-builtin/Lora/networks.py +++ b/extensions-builtin/Lora/networks.py @@ -49,6 +49,7 @@ def assign_network_names_to_compvis_modules(sd_model): network_layer_mapping = {} if shared.native: if not hasattr(shared.sd_model, 'text_encoder') or not hasattr(shared.sd_model, 'unet'): + sd_model.network_layer_mapping = {} return for name, module in shared.sd_model.text_encoder.named_modules(): prefix = "lora_te1_" if shared.sd_model_type == "sdxl" else "lora_te_" @@ -66,6 +67,7 @@ def assign_network_names_to_compvis_modules(sd_model): module.network_layer_name = network_name else: if not hasattr(shared.sd_model, 'cond_stage_model'): + sd_model.network_layer_mapping = {} return for name, module in shared.sd_model.cond_stage_model.wrapped.named_modules(): network_name = name.replace(".", "_") @@ -87,7 +89,14 @@ def load_diffusers(name, network_on_disk, lora_scale=1.0) -> network.Network: return cached if not shared.native: return None - shared.sd_model.load_lora_weights(network_on_disk.filename) + if not hasattr(shared.sd_model, 'load_lora_weights'): + shared.log.error(f"LoRA load failed: class={shared.sd_model.__class__} does not implement load lora") + return None + try: + shared.sd_model.load_lora_weights(network_on_disk.filename) + except Exception as e: + errors.display(e, "LoRA") + return None if shared.opts.lora_fuse_diffusers: shared.sd_model.fuse_lora(lora_scale=lora_scale) net = network.Network(name, network_on_disk) @@ -159,7 +168,6 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No list_available_networks() networks_on_disk = [available_network_aliases.get(name, None) for name in names] failed_to_load_networks = [] - recompile_model = False if shared.compiled_model_state is not None and shared.compiled_model_state.is_compiled: if len(names) == len(shared.compiled_model_state.lora_model): @@ -177,13 +185,10 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No shared.compiled_model_state.lora_model = [] if recompile_model: backup_cuda_compile = shared.opts.cuda_compile - backup_nncf_compress_weights = shared.opts.nncf_compress_weights sd_models.unload_model_weights(op='model') shared.opts.cuda_compile = False - shared.opts.nncf_compress_weights = [] sd_models.reload_model_weights(op='model') shared.opts.cuda_compile = backup_cuda_compile - shared.opts.nncf_compress_weights = backup_nncf_compress_weights loaded_networks.clear() for i, (network_on_disk, name) in enumerate(zip(networks_on_disk, names)): @@ -227,8 +232,6 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No if recompile_model: shared.log.info("LoRA recompiling model") backup_lora_model = shared.compiled_model_state.lora_model - if shared.opts.nncf_compress_weights and not (shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx"): - shared.sd_model = sd_models_compile.nncf_compress_weights(shared.sd_model) if shared.opts.cuda_compile: shared.sd_model = sd_models_compile.compile_diffusers(shared.sd_model) diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py index d224f4c67..3a612a635 100644 --- a/extensions-builtin/Lora/ui_extra_networks_lora.py +++ b/extensions-builtin/Lora/ui_extra_networks_lora.py @@ -102,7 +102,7 @@ def find_version(): return item except Exception as e: - shared.log.debug(f"Extra networks error: type=lora file={name} {e}") + shared.log.debug(f"Networks error: type=lora file={name} {e}") from modules import errors errors.display('e', 'Lora') return None diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui index 285743a83..dae2c67d8 160000 --- a/extensions-builtin/sdnext-modernui +++ b/extensions-builtin/sdnext-modernui @@ -1 +1 @@ -Subproject commit 285743a83f251ae23e3a4120d15badcead4eab33 +Subproject commit dae2c67d826b631dcc343c028c60f478b0437877 diff --git a/html/locale_en.json b/html/locale_en.json index 4b773c730..800ee6e58 100644 --- a/html/locale_en.json +++ b/html/locale_en.json @@ -230,7 +230,7 @@ {"id":"","label":"Control Options","localized":"","hint":"Settings related the Control tab"}, {"id":"","label":"Training","localized":"","hint":"Settings related to model training configuration and directories"}, {"id":"","label":"Interrogate","localized":"","hint":"Settings related to interrogation configuration"}, - {"id":"","label":"Extra Networks","localized":"","hint":"Settings related to extra networks user interface, extra networks multiplier defaults, and configuration"}, + {"id":"","label":"Networks","localized":"","hint":"Settings related to networks user interface, networks multiplier defaults, and configuration"}, {"id":"","label":"Licenses","localized":"","hint":"View licenses of all additional included libraries"}, {"id":"","label":"Show all pages","localized":"","hint":"Show all settings pages"} ], diff --git a/html/locale_ko.json b/html/locale_ko.json index c333d934c..0ddf62afb 100644 --- a/html/locale_ko.json +++ b/html/locale_ko.json @@ -48,7 +48,7 @@ {"id":"","label":"Interrogate\nDeepBooru","localized":"DeepBooru 모델 사용","hint":"DeepBooru 모델을 사용해 이미지에서 설명을 추출한다."} ], "extra networks": [ - {"id":"","label":"Extra networks tab order","localized":"엑스트라 네트워크 탭 순서","hint":"Comma-separated list of tab names; tabs listed here will appear in the extra networks UI first and in order lsited"}, + {"id":"","label":"Networks tab order","localized":"엑스트라 네트워크 탭 순서","hint":"Comma-separated list of tab names; tabs listed here will appear in the extra networks UI first and in order lsited"}, {"id":"","label":"UI position","localized":"UI 위치","hint":""}, {"id":"","label":"UI height (%)","localized":"UI 높이 (%)","hint":""}, {"id":"","label":"UI sidebar width (%)","localized":"UI 사이드바 너비 (%)","hint":""}, diff --git a/html/reference.json b/html/reference.json index 2ffdba6a4..a3f62e7e1 100644 --- a/html/reference.json +++ b/html/reference.json @@ -160,15 +160,30 @@ "preview": "PixArt-alpha--PixArt-XL-2-1024-MS.jpg", "extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 2.0" }, - "Pixart-Σ": { - "path": "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS", + "Pixart-Σ Small": { + "path": "huggingface/PixArt-alpha/PixArt-Sigma-XL-2-512-MS", "desc": "PixArt-Σ, a Diffusion Transformer model (DiT) capable of directly generating images at 4K resolution. PixArt-Σ represents a significant advancement over its predecessor, PixArt-α, offering images of markedly higher fidelity and improved alignment with text prompts.", "preview": "PixArt-alpha--pixart_sigma_sdxlvae_T5_diffusers.jpg", + "skip": true, + "extras": "width: 512, height: 512, sampler: Default, cfg_scale: 2.0" + }, + "Pixart-Σ Medium": { + "path": "huggingface/PixArt-alpha/PixArt-Sigma-XL-2-1024-MS", + "desc": "PixArt-Σ, a Diffusion Transformer model (DiT) capable of directly generating images at 4K resolution. PixArt-Σ represents a significant advancement over its predecessor, PixArt-α, offering images of markedly higher fidelity and improved alignment with text prompts.", + "preview": "PixArt-alpha--pixart_sigma_sdxlvae_T5_diffusers.jpg", + "skip": true, + "extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 2.0" + }, + "Pixart-Σ Large": { + "path": "huggingface/PixArt-alpha/PixArt-Sigma-XL-2-2K-MS", + "desc": "PixArt-Σ, a Diffusion Transformer model (DiT) capable of directly generating images at 4K resolution. PixArt-Σ represents a significant advancement over its predecessor, PixArt-α, offering images of markedly higher fidelity and improved alignment with text prompts.", + "preview": "PixArt-alpha--pixart_sigma_sdxlvae_T5_diffusers.jpg", + "skip": true, "extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 2.0" }, - "Tencent HunyuanDiT": { - "path": "Tencent-Hunyuan/HunyuanDiT-Diffusers", + "Tencent HunyuanDiT 1.1": { + "path": "Tencent-Hunyuan/HunyuanDiT-v1.1-Diffusers", "desc": "Hunyuan-DiT : A Powerful Multi-Resolution Diffusion Transformer with Fine-Grained Chinese Understanding.", "preview": "Tencent-Hunyuan-HunyuanDiT.jpg", "extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 2.0" diff --git a/installer.py b/installer.py index 686dd6c02..2bffe8276 100644 --- a/installer.py +++ b/installer.py @@ -275,9 +275,12 @@ def install(package, friendly: str = None, ignore: bool = False, reinstall: bool # execute git command @lru_cache() -def git(arg: str, folder: str = None, ignore: bool = False): +def git(arg: str, folder: str = None, ignore: bool = False, optional: bool = False): if args.skip_git: return '' + if optional: + if 'google.colab' in sys.modules: + return '' git_cmd = os.environ.get('GIT', "git") if git_cmd != "git": git_cmd = os.path.abspath(git_cmd) @@ -306,7 +309,7 @@ def branch(folder=None): return None branches = [] try: - b = git('branch --show-current', folder) + b = git('branch --show-current', folder, optional=True) if b == '': branches = git('branch', folder).split('\n') if len(branches) > 0: @@ -315,7 +318,7 @@ def branch(folder=None): b = branches[1].strip() log.debug(f'Git detached head detected: folder="{folder}" reattach={b}') except Exception: - b = git('git rev-parse --abbrev-ref HEAD', folder) + b = git('git rev-parse --abbrev-ref HEAD', folder, optional=True) if 'main' in b: b = 'main' elif 'master' in b: @@ -323,7 +326,7 @@ def branch(folder=None): else: b = b.split('\n')[0].replace('*', '').strip() log.debug(f'Submodule: {folder} / {b}') - git(f'checkout {b}', folder, ignore=True) + git(f'checkout {b}', folder, ignore=True, optional=True) return b @@ -396,6 +399,12 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None): if args.quick: return log.info(f'Python version={platform.python_version()} platform={platform.system()} bin="{sys.executable}" venv="{sys.prefix}"') + if int(sys.version_info.major) == 3 and int(sys.version_info.minor) == 12 and int(sys.version_info.micro) > 3: # TODO python 3.12.4 or higher cause a mess with pydantic + log.error(f"Incompatible Python version: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro} required 3.12.3 or lower") + if reason is not None: + log.error(reason) + if not args.ignore: + sys.exit(1) if not (int(sys.version_info.major) == 3 and int(sys.version_info.minor) in supported_minors): log.error(f"Incompatible Python version: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro} required 3.{supported_minors}") if reason is not None: @@ -434,7 +443,7 @@ def check_onnx(): def install_rocm_zluda(torch_command): - check_python(supported_minors=[10,11], reason='RocM or Zluda backends require Python 3.10 or 3.11') + check_python(supported_minors=[10, 11], reason='ROCm or ZLUDA backends require Python 3.10 or 3.11') is_windows = platform.system() == 'Windows' log.info('AMD ROCm toolkit detected') os.environ.setdefault('PYTORCH_HIP_ALLOC_CONF', 'garbage_collection_threshold:0.8,max_split_size_mb:512') @@ -515,14 +524,7 @@ def install_rocm_zluda(torch_command): torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision') # conceal ROCm installed - os.environ.pop("ROCM_HOME", None) - os.environ.pop("ROCM_PATH", None) - paths = os.environ["PATH"].split(";") - paths_no_rocm = [] - for path in paths: - if "ROCm" not in path: - paths_no_rocm.append(path) - os.environ["PATH"] = ";".join(paths_no_rocm) + conceal_rocm() else: if rocm_ver is None: # assume the latest if version check fails torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision --index-url https://download.pytorch.org/whl/rocm6.0') @@ -541,6 +543,17 @@ def install_rocm_zluda(torch_command): return torch_command +def conceal_rocm(): + os.environ.pop("ROCM_HOME", None) + os.environ.pop("ROCM_PATH", None) + paths = os.environ["PATH"].split(";") + paths_no_rocm = [] + for path in paths: + if "ROCm" not in path: + paths_no_rocm.append(path) + os.environ["PATH"] = ";".join(paths_no_rocm) + + def install_ipex(torch_command): check_python(supported_minors=[10,11], reason='IPEX backend requires Python 3.10 or 3.11') args.use_ipex = True # pylint: disable=attribute-defined-outside-init @@ -677,11 +690,11 @@ def check_torch(): torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision') elif allow_directml and args.use_directml and ('arm' not in machine and 'aarch' not in machine): log.info('Using DirectML Backend') - check_python(supported_minors=[10], reason='DirectML backend requires Python 3.10') - torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.0.0 torchvision torch-directml') + torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.3.1 torchvision torch-directml') if 'torch' in torch_command and not args.version: install(torch_command, 'torch torchvision') install('onnxruntime-directml', 'onnxruntime-directml', ignore=True) + conceal_rocm() else: if args.use_zluda: log.warning("ZLUDA failed to initialize: no HIP SDK found") @@ -881,6 +894,7 @@ def install_submodules(force=True): branch(name) except Exception: log.error(f'Error updating submodule: {submodule}') + setup_logging() if args.profile: print_profile(pr, 'Submodule') return '\n'.join(res) @@ -1015,7 +1029,7 @@ def get_version(force=False): 'url': origin.replace('\n', '') + '/tree/' + branch_name.replace('\n', '') } except Exception: - version = { 'app': 'sd.next', 'version': 'unknown' } + version = { 'app': 'sd.next', 'version': 'unknown', 'branch': 'unknown' } try: cwd = os.getcwd() os.chdir('extensions-builtin/sdnext-modernui') @@ -1031,21 +1045,24 @@ def get_version(force=False): def check_ui(ver): - if ver is None: - return - if ver['branch'] == ver['ui']: - return - log.debug(f'Branch mismatch: sdnext={ver["branch"]} ui={ver["ui"]}') + def same(ver): + core = ver['branch'] if ver is not None and 'branch' in ver else 'unknown' + ui = ver['ui'] if ver is not None and 'ui' in ver else 'unknown' + return core == ui or (core == 'master' and ui == 'main') + + if not same(ver): + log.debug(f'Branch mismatch: sdnext={ver["branch"]} ui={ver["ui"]}') cwd = os.getcwd() try: os.chdir('extensions-builtin/sdnext-modernui') - git('checkout ' + ver['branch'], ignore=True) + target = 'dev' if 'dev' in ver['branch'] else 'main' + git('checkout ' + target, ignore=True, optional=True) os.chdir(cwd) ver = get_version(force=True) - if ver['branch'] == ver['ui']: + if not same(ver): log.debug(f'Branch synchronized: {ver["branch"]}') else: - log.debug(f'Branch synch failed: sdnext={ver["branch"]} ui={ver["ui"]}') + log.debug(f'Branch sync failed: sdnext={ver["branch"]} ui={ver["ui"]}') except Exception as e: log.debug(f'Branch switch: {e}') os.chdir(cwd) diff --git a/javascript/base.css b/javascript/base.css index 799f06613..7daa8b2bd 100644 --- a/javascript/base.css +++ b/javascript/base.css @@ -17,6 +17,7 @@ .tooltip-show { opacity: 0.9; } .tooltip-left { right: unset; left: 1em; } .toolbutton-selected { background: var(--background-fill-primary) !important; } +.input-accordion-checkbox { display: none; } /* live preview */ .progressDiv { position: relative; height: 20px; background: #b4c0cc; margin-bottom: -3px; } diff --git a/javascript/black-teal.css b/javascript/black-teal.css index 63f6fb5c4..c6f266c54 100644 --- a/javascript/black-teal.css +++ b/javascript/black-teal.css @@ -30,7 +30,7 @@ --inactive-color: var(--primary--800); --body-text-color: var(--neutral-100); --body-text-color-subdued: var(--neutral-300); - --background-color: black; + --background-color: var(--neutral-950); --background-fill-primary: var(--neutral-700); --input-padding: 4px; --input-background-fill: var(--neutral-800); diff --git a/javascript/extraNetworks.js b/javascript/extraNetworks.js index ad224759a..e0294b4f4 100644 --- a/javascript/extraNetworks.js +++ b/javascript/extraNetworks.js @@ -461,7 +461,7 @@ function setupExtraNetworksForTab(tabname) { en.style.position = 'absolute'; en.style.right = '0'; en.style.top = '13em'; - en.style.height = '-webkit-fill-available'; + en.style.height = 'auto'; en.style.transition = 'width 0.3s ease'; en.style.width = `${window.opts.extra_networks_sidebar_width}vw`; gradioApp().getElementById(`${tabname}_settings`).parentNode.style.width = `${100 - 2 - window.opts.extra_networks_sidebar_width}vw`; diff --git a/javascript/inputAccordion.js b/javascript/inputAccordion.js new file mode 100644 index 000000000..f5f49bef1 --- /dev/null +++ b/javascript/inputAccordion.js @@ -0,0 +1,55 @@ +function inputAccordionChecked(id, checked) { + const accordion = gradioApp().getElementById(id); + accordion.visibleCheckbox.checked = checked; + accordion.onVisibleCheckboxChange(); +} + +function setupAccordion(accordion) { + const labelWrap = accordion.querySelector('.label-wrap'); + const gradioCheckbox = gradioApp().querySelector(`#${accordion.id}-checkbox input`); + const extra = gradioApp().querySelector(`#${accordion.id}-extra`); + const span = labelWrap.querySelector('span'); + let linked = true; + const isOpen = () => labelWrap.classList.contains('open'); + const observerAccordionOpen = new MutationObserver((mutations) => { + mutations.forEach((mutationRecord) => { + accordion.classList.toggle('input-accordion-open', isOpen()); + if (linked) { + accordion.visibleCheckbox.checked = isOpen(); + accordion.onVisibleCheckboxChange(); + } + }); + }); + observerAccordionOpen.observe(labelWrap, { attributes: true, attributeFilter: ['class'] }); + if (extra) labelWrap.insertBefore(extra, labelWrap.lastElementChild); + accordion.onChecked = (checked) => { + if (isOpen() !== checked) labelWrap.click(); + }; + + const visibleCheckbox = document.createElement('INPUT'); + visibleCheckbox.type = 'checkbox'; + visibleCheckbox.checked = isOpen(); + visibleCheckbox.id = `${accordion.id}-visible-checkbox`; + visibleCheckbox.className = `${gradioCheckbox.className} input-accordion-checkbox`; + span.insertBefore(visibleCheckbox, span.firstChild); + accordion.visibleCheckbox = visibleCheckbox; + accordion.onVisibleCheckboxChange = () => { + if (linked && isOpen() !== visibleCheckbox.checked) labelWrap.click(); + gradioCheckbox.checked = visibleCheckbox.checked; + updateInput(gradioCheckbox); + }; + + visibleCheckbox.addEventListener('click', (event) => { + linked = false; + event.stopPropagation(); + }); + visibleCheckbox.addEventListener('input', accordion.onVisibleCheckboxChange); +} + +// onUiLoaded(() => { +// for (const accordion of gradioApp().querySelectorAll('.input-accordion')) setupAccordion(accordion); +// }); + +function initAccordions() { + for (const accordion of gradioApp().querySelectorAll('.input-accordion')) setupAccordion(accordion); +} diff --git a/javascript/sdnext.css b/javascript/sdnext.css index e33a11f7a..f2d553106 100644 --- a/javascript/sdnext.css +++ b/javascript/sdnext.css @@ -220,7 +220,7 @@ table.settings-value-table td { padding: 0.4em; border: 1px solid #ccc; max-widt .extra-network-cards .card .preview { box-shadow: var(--button-shadow); min-height: 30px; } .extra-network-cards .card:hover .overlay { background: rgba(0, 0, 0, 0.70); } .extra-network-cards .card:hover .preview { box-shadow: none; filter: grayscale(100%); } -.extra-network-cards .card .overlay .tags { display: none; overflow-wrap: anywhere; position: absolute; top: 100%; z-index: 20; background: var(--body-background-fill); } +.extra-network-cards .card .overlay .tags { display: none; overflow-wrap: anywhere; position: absolute; top: 100%; z-index: 20; background: var(--body-background-fill); overflow-x: hidden; overflow-y: auto; max-height: 333px; } .extra-network-cards .card .overlay .tag { padding: 2px; margin: 2px; background: rgba(70, 70, 70, 0.60); font-size: var(--text-md); cursor: pointer; display: inline-block; } .extra-network-cards .card .actions>span { padding: 4px; font-size: 34px !important; } .extra-network-cards .card .actions>span:hover { color: var(--highlight-color); } @@ -240,7 +240,7 @@ table.settings-value-table td { padding: 0.4em; border: 1px solid #ccc; max-widt .extra-details > div { overflow-y: auto; min-height: 40vh; max-height: 80vh; align-self: flex-start; } .extra-details td:first-child { font-weight: bold; vertical-align: top; } .extra-details .gradio-image { max-height: 50vh; } - +.input-accordion-checkbox { display: none !important; } /* specific elements */ #modelmerger_interp_description { margin-top: 1em; margin-bottom: 1em; } diff --git a/javascript/startup.js b/javascript/startup.js index ab930645f..f1a44faf5 100644 --- a/javascript/startup.js +++ b/javascript/startup.js @@ -12,6 +12,7 @@ async function initStartup() { initLogMonitor(); initContextMenu(); initDragDrop(); + initAccordions(); initSettings(); initImageViewer(); initGallery(); diff --git a/javascript/ui.js b/javascript/ui.js index a3ff22542..c7ecd6680 100644 --- a/javascript/ui.js +++ b/javascript/ui.js @@ -424,6 +424,7 @@ function selectVAE(name) { } function selectReference(name) { + log(`Select reference: ${name}`); desiredCheckpointName = name; gradioApp().getElementById('change_reference').click(); } @@ -471,19 +472,23 @@ function toggleCompact(val, old) { function previewTheme() { let name = gradioApp().getElementById('setting_gradio_theme').querySelectorAll('input')?.[0].value || ''; - fetch('/file=html/themes.json').then((res) => { - res.json().then((themes) => { - const theme = themes.find((t) => t.id === name); - if (theme) { - window.open(theme.subdomain, '_blank'); - } else { - const el = document.getElementById('theme-preview') || createThemeElement(); - el.style.display = el.style.display === 'block' ? 'none' : 'block'; - name = name.replace('/', '-'); - el.src = `/file=html/${name}.jpg`; - } - }); - }); + fetch('/file=html/themes.json') + .then((res) => { + res.json() + .then((themes) => { + const theme = Array.isArray(themes) ? themes.find((t) => t.id === name) : null; + if (theme) { + window.open(theme.subdomain, '_blank'); + } else { + const el = document.getElementById('theme-preview') || createThemeElement(); + el.style.display = el.style.display === 'block' ? 'none' : 'block'; + name = name.replace('/', '-'); + el.src = `/file=html/${name}.jpg`; + } + }) + .catch((e) => console.error('previewTheme:', e)); + }) + .catch((e) => console.error('previewTheme:', e)); } async function browseFolder() { diff --git a/modules/api/models.py b/modules/api/models.py index a0cb2562d..5813fdcc6 100644 --- a/modules/api/models.py +++ b/modules/api/models.py @@ -152,8 +152,8 @@ class ItemIPAdapter(BaseModel): adapter: str = Field(title="Adapter", default="Base", description="") images: List[str] = Field(title="Image", default=[], description="") masks: Optional[List[str]] = Field(title="Mask", default=[], description="") - scale: float = Field(title="Scale", default=0.5, gt=0, le=1, description="") - start: float = Field(title="Start", default=0.0, gt=0, le=1, description="") + scale: float = Field(title="Scale", default=0.5, ge=0, le=1, description="") + start: float = Field(title="Start", default=0.0, ge=0, le=1, description="") end: float = Field(title="End", default=1.0, gt=0, le=1, description="") class ItemFace(BaseModel): @@ -313,7 +313,7 @@ class ResInterrogate(BaseModel): class ReqVQA(BaseModel): image: str = Field(default="", title="Image", description="Image to work on, must be a Base64 string containing the image's data.") - model: str = Field(default="Moondream 2", title="Model", description="The interrogate model used.") + model: str = Field(default="MS Florence 2 Base", title="Model", description="The interrogate model used.") question: str = Field(default="describe the image", title="Question", description="Question to ask the model.") class ResVQA(BaseModel): diff --git a/modules/control/processors.py b/modules/control/processors.py index eccaf1e30..19683f6ea 100644 --- a/modules/control/processors.py +++ b/modules/control/processors.py @@ -139,7 +139,7 @@ def reset(self, processor_id: str = None): self.model = None self.processor_id = processor_id # self.override = None - devices.torch_gc() + # devices.torch_gc() self.load_config = { 'cache_dir': cache_dir } def config(self, processor_id = None): diff --git a/modules/control/run.py b/modules/control/run.py index 5a41b87e4..5e0749d81 100644 --- a/modules/control/run.py +++ b/modules/control/run.py @@ -55,7 +55,7 @@ def control_set(kwargs): def control_run(units: List[unit.Unit] = [], inputs: List[Image.Image] = [], inits: List[Image.Image] = [], mask: Image.Image = None, unit_type: str = None, is_generator: bool = True, input_type: int = 0, - prompt: str = '', negative: str = '', styles: List[str] = [], + prompt: str = '', negative_prompt: str = '', styles: List[str] = [], steps: int = 20, sampler_index: int = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, cfg_scale: float = 6.0, clip_skip: float = 1.0, image_cfg_scale: float = 6.0, diffusers_guidance_rescale: float = 0.7, pag_scale: float = 0.0, pag_adaptive: float = 0.5, cfg_end: float = 1.0, @@ -90,12 +90,11 @@ def control_run(units: List[unit.Unit] = [], inputs: List[Image.Image] = [], ini shared.log.warning('Sampler: invalid') sampler_index = 0 if hr_sampler_index is None: - shared.log.warning('Sampler: invalid') - hr_sampler_index = 0 + hr_sampler_index = sampler_index p = StableDiffusionProcessingControl( prompt = prompt, - negative_prompt = negative, + negative_prompt = negative_prompt, styles = styles, steps = steps, n_iter = batch_count, @@ -192,7 +191,9 @@ def control_run(units: List[unit.Unit] = [], inputs: List[Image.Image] = [], ini p.refiner_prompt = refiner_prompt p.refiner_negative = refiner_negative if p.enable_hr and (p.hr_resize_x == 0 or p.hr_resize_y == 0): - p.hr_upscale_to_x, p.hr_upscale_to_y = 8 * int(p.width * p.hr_scale / 8), 8 * int(p.height * p.hr_scale / 8) + p.hr_upscale_to_x, p.hr_upscale_to_y = 8 * int(width_before * p.hr_scale / 8), 8 * int(height_before * p.hr_scale / 8) + elif p.enable_hr and (p.hr_upscale_to_x == 0 or p.hr_upscale_to_y == 0): + p.hr_upscale_to_x, p.hr_upscale_to_y = 8 * int(p.hr_resize_x / 8), 8 * int(hr_resize_y / 8) global p_extra_args # pylint: disable=global-statement for k, v in p_extra_args.items(): diff --git a/modules/control/units/controlnet.py b/modules/control/units/controlnet.py index e911bc6ff..b57005f3c 100644 --- a/modules/control/units/controlnet.py +++ b/modules/control/units/controlnet.py @@ -172,10 +172,19 @@ def load(self, model_id: str = None) -> str: self.load_safetensors(model_path) else: self.model = ControlNetModel.from_pretrained(model_path, **self.load_config) - if self.device is not None: - self.model.to(self.device) if self.dtype is not None: self.model.to(self.dtype) + if "ControlNet" in opts.nncf_compress_weights: + try: + log.debug(f'Control {what} model NNCF Compress: id="{model_id}"') + from installer import install + install('nncf==2.7.0', quiet=True) + from modules.sd_models_compile import nncf_compress_model + self.model = nncf_compress_model(self.model) + except Exception as e: + log.error(f'Control {what} model NNCF Compression failed: id="{model_id}" error={e}') + if self.device is not None: + self.model.to(self.device) t1 = time.time() self.model_id = model_id log.debug(f'Control {what} model loaded: id="{model_id}" path="{model_path}" time={t1-t0:.2f}') diff --git a/modules/control/units/t2iadapter.py b/modules/control/units/t2iadapter.py index 1c481398b..80eca41dd 100644 --- a/modules/control/units/t2iadapter.py +++ b/modules/control/units/t2iadapter.py @@ -74,7 +74,7 @@ def __init__(self, model_id: str = None, device = None, dtype = None, load_confi self.model_id: str = model_id self.device = device self.dtype = dtype - self.load_config = { 'cache_dir': cache_dir } + self.load_config = { 'cache_dir': cache_dir, 'use_safetensors': False } if load_config is not None: self.load_config.update(load_config) if model_id is not None: @@ -101,7 +101,7 @@ def load(self, model_id: str = None) -> str: log.error(f'Control {what} model load failed: id="{model_id}" error=unknown model id') return log.debug(f'Control {what} model loading: id="{model_id}" path="{model_path}"') - if model_path.endswith('.pth') or model_path.endswith('.pt') or model_path.endswith('.safetensors'): + if model_path.endswith('.pth') or model_path.endswith('.pt') or model_path.endswith('.safetensors') or model_path.endswith('.bin'): from huggingface_hub import hf_hub_download parts = model_path.split('/') repo_id = f'{parts[0]}/{parts[1]}' diff --git a/modules/control/units/xs_pipe.py b/modules/control/units/xs_pipe.py index 14581c0f1..7e717b542 100644 --- a/modules/control/units/xs_pipe.py +++ b/modules/control/units/xs_pipe.py @@ -1048,7 +1048,7 @@ def __call__( self.upcast_vae() latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype) - if not output_type == "latent": + if output_type != "latent": # make sure the VAE is in float32 mode, as it overflows in float16 needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast @@ -1064,7 +1064,7 @@ def __call__( else: image = latents - if not output_type == "latent": + if output_type != "latent": # apply watermark if available if self.watermark is not None: image = self.watermark.apply_watermark(image) @@ -1907,7 +1907,7 @@ def __call__( self.controlnet.to("cpu") torch.cuda.empty_cache() - if not output_type == "latent": + if output_type != "latent": image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[ 0 ] diff --git a/modules/devices.py b/modules/devices.py index 675fcac19..d72555192 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -140,13 +140,13 @@ def torch_gc(force=False): used_gpu = round(100 * gpu.get('used', 0) / gpu.get('total', 1)) if gpu.get('total', 1) > 1 else 0 used_ram = round(100 * ram.get('used', 0) / ram.get('total', 1)) if ram.get('total', 1) > 1 else 0 global previous_oom # pylint: disable=global-statement + threshold = 0 if (shared.cmd_opts.lowvram and not shared.cmd_opts.use_zluda) else shared.opts.torch_gc_threshold + if force or threshold == 0 or used_gpu >= threshold or used_ram >= threshold: + force = True if oom > previous_oom: previous_oom = oom log.warning(f'GPU out-of-memory error: {mem}') force = True - if used_gpu >= shared.opts.torch_gc_threshold or used_ram >= shared.opts.torch_gc_threshold: - log.info(f'High memory utilization: GPU={used_gpu}% RAM={used_ram}% {mem}') - force = True if not force: return @@ -160,7 +160,13 @@ def torch_gc(force=False): except Exception: pass t1 = time.time() - log.debug(f'GC: collected={collected} device={torch.device(get_optimal_device_name())} {memstats.memory_stats()} time={round(t1 - t0, 2)}') + mem = memstats.memory_stats() + saved = round(gpu.get('used', 0) - mem.get('gpu', {}).get('used', 0), 2) + before = { 'gpu': gpu.get('used', 0), 'ram': ram.get('used', 0) } + after = { 'gpu': mem.get('gpu', {}).get('used', 0), 'ram': mem.get('ram', {}).get('used', 0), 'retries': mem.get('retries', 0), 'oom': mem.get('oom', 0) } + utilization = { 'gpu': used_gpu, 'ram': used_ram, 'threshold': threshold } + results = { 'collected': collected, 'saved': saved } + log.debug(f'GC: utilization={utilization} gc={results} beofre={before} after={after} device={torch.device(get_optimal_device_name())} fn={sys._getframe(1).f_code.co_name} time={round(t1 - t0, 2)}') # pylint: disable=protected-access def set_cuda_sync_mode(mode): @@ -175,7 +181,7 @@ def set_cuda_sync_mode(mode): return try: import ctypes - log.info(f'Set cuda synch: mode={mode}') + log.info(f'Set cuda sync: mode={mode}') torch.cuda.set_device(torch.device(get_optimal_device_name())) ctypes.CDLL('libcudart.so').cudaSetDeviceFlags({'auto': 0, 'spin': 1, 'yield': 2, 'block': 4}[mode]) except Exception: diff --git a/modules/dml/backend.py b/modules/dml/backend.py index 5f34b4542..7947dc81b 100644 --- a/modules/dml/backend.py +++ b/modules/dml/backend.py @@ -78,6 +78,3 @@ def max_memory_allocated(device: Optional[rDevice]=None): def reset_peak_memory_stats(device: Optional[rDevice]=None): return - - def synchronize_tensor(tensor: torch.Tensor) -> None: - tensor.__str__() diff --git a/modules/dml/hijack/__init__.py b/modules/dml/hijack/__init__.py index 4f27b500c..46aecf4bd 100644 --- a/modules/dml/hijack/__init__.py +++ b/modules/dml/hijack/__init__.py @@ -1,8 +1,4 @@ -import modules.dml.hijack.kdiffusion -import modules.dml.hijack.stablediffusion import modules.dml.hijack.torch import modules.dml.hijack.realesrgan_model -import modules.dml.hijack.plms -import modules.dml.hijack.diffusers import modules.dml.hijack.transformers import modules.dml.hijack.tomesd diff --git a/modules/dml/hijack/diffusers.py b/modules/dml/hijack/diffusers.py deleted file mode 100644 index 56b7d85cb..000000000 --- a/modules/dml/hijack/diffusers.py +++ /dev/null @@ -1,227 +0,0 @@ -from typing import Optional, Union, Tuple -import torch -import diffusers -import diffusers.utils.torch_utils - - -# copied from diffusers.PNDMScheduler._get_prev_sample -def PNDMScheduler__get_prev_sample(self, sample: torch.FloatTensor, timestep, prev_timestep, model_output): - torch.dml.synchronize_tensor(sample) # DML synchronize - alpha_prod_t = self.alphas_cumprod[timestep] - alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod - beta_prod_t = 1 - alpha_prod_t - beta_prod_t_prev = 1 - alpha_prod_t_prev - - if self.config.prediction_type == "v_prediction": - model_output = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample - elif self.config.prediction_type != "epsilon": - raise ValueError( - f"prediction_type given as {self.config.prediction_type} must be one of `epsilon` or `v_prediction`" - ) - - sample_coeff = (alpha_prod_t_prev / alpha_prod_t) ** (0.5) - - model_output_denom_coeff = alpha_prod_t * beta_prod_t_prev ** (0.5) + ( - alpha_prod_t * beta_prod_t * alpha_prod_t_prev - ) ** (0.5) - - # full formula (9) - prev_sample = ( - sample_coeff * sample - (alpha_prod_t_prev - alpha_prod_t) * model_output / model_output_denom_coeff - ) - - return prev_sample - - -diffusers.PNDMScheduler._get_prev_sample = PNDMScheduler__get_prev_sample # pylint: disable=protected-access - - -# copied from diffusers.UniPCMultistepScheduler.multistep_uni_p_bh_update -def UniPCMultistepScheduler_multistep_uni_p_bh_update( - self: diffusers.UniPCMultistepScheduler, - model_output: torch.FloatTensor, - *args, - sample: torch.FloatTensor = None, - order: int = None, - **_, -) -> torch.FloatTensor: - if sample is None: - if len(args) > 1: - sample = args[1] - else: - raise ValueError(" missing `sample` as a required keyward argument") - if order is None: - if len(args) > 2: - order = args[2] - else: - raise ValueError(" missing `order` as a required keyward argument") - model_output_list = self.model_outputs - - s0 = self.timestep_list[-1] - m0 = model_output_list[-1] - x = sample - - if self.solver_p: - x_t = self.solver_p.step(model_output, s0, x).prev_sample - return x_t - - torch.dml.synchronize_tensor(sample) # DML synchronize - sigma_t, sigma_s0 = self.sigmas[self.step_index + 1], self.sigmas[self.step_index] - alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) - alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) - - lambda_t = torch.log(alpha_t) - torch.log(sigma_t) - lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) - - h = lambda_t - lambda_s0 - device = sample.device - - rks = [] - D1s = [] - for i in range(1, order): - si = self.step_index - i - mi = model_output_list[-(i + 1)] - alpha_si, sigma_si = self._sigma_to_alpha_sigma_t(self.sigmas[si]) - lambda_si = torch.log(alpha_si) - torch.log(sigma_si) - rk = (lambda_si - lambda_s0) / h - rks.append(rk) - D1s.append((mi - m0) / rk) - - rks.append(1.0) - rks = torch.tensor(rks, device=device) - - R = [] - b = [] - - hh = -h if self.predict_x0 else h - h_phi_1 = torch.expm1(hh) # h\phi_1(h) = e^h - 1 - h_phi_k = h_phi_1 / hh - 1 - - factorial_i = 1 - - if self.config.solver_type == "bh1": - B_h = hh - elif self.config.solver_type == "bh2": - B_h = torch.expm1(hh) - else: - raise NotImplementedError - - for i in range(1, order + 1): - R.append(torch.pow(rks, i - 1)) - b.append(h_phi_k * factorial_i / B_h) - factorial_i *= i + 1 - h_phi_k = h_phi_k / hh - 1 / factorial_i - - R = torch.stack(R) - b = torch.tensor(b, device=device) - - rhos_p = None - if len(D1s) > 0: - D1s = torch.stack(D1s, dim=1) # (B, K) - # for order 2, we use a simplified version - if order == 2: - rhos_p = torch.tensor([0.5], dtype=x.dtype, device=device) - else: - rhos_p = torch.linalg.solve(R[:-1, :-1], b[:-1]) - else: - D1s = None - - if self.predict_x0: - x_t_ = sigma_t / sigma_s0 * x - alpha_t * h_phi_1 * m0 - if D1s is not None: - pred_res = torch.einsum("k,bkc...->bc...", rhos_p, D1s) - else: - pred_res = 0 - x_t = x_t_ - alpha_t * B_h * pred_res - else: - x_t_ = alpha_t / alpha_s0 * x - sigma_t * h_phi_1 * m0 - if D1s is not None: - pred_res = torch.einsum("k,bkc...->bc...", rhos_p, D1s) - else: - pred_res = 0 - x_t = x_t_ - sigma_t * B_h * pred_res - - x_t = x_t.to(x.dtype) - return x_t - - -diffusers.UniPCMultistepScheduler.multistep_uni_p_bh_update = UniPCMultistepScheduler_multistep_uni_p_bh_update - - -# copied from diffusers.LCMScheduler.step -def LCMScheduler_step( - self: diffusers.LCMScheduler, - model_output: torch.FloatTensor, - timestep: int, - sample: torch.FloatTensor, - generator: Optional[torch.Generator] = None, - return_dict: bool = True, - ) -> Union[diffusers.schedulers.scheduling_lcm.LCMSchedulerOutput, Tuple]: - if self.num_inference_steps is None: - raise ValueError( - "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" - ) - - if self.step_index is None: - self._init_step_index(timestep) - - # 1. get previous step value - prev_step_index = self.step_index + 1 - if prev_step_index < len(self.timesteps): - prev_timestep = self.timesteps[prev_step_index] - else: - prev_timestep = timestep - - # 2. compute alphas, betas - torch.dml.synchronize_tensor(sample) # DML synchronize - alpha_prod_t = self.alphas_cumprod[timestep] - alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod - - beta_prod_t = 1 - alpha_prod_t - beta_prod_t_prev = 1 - alpha_prod_t_prev - - # 3. Get scalings for boundary conditions - c_skip, c_out = self.get_scalings_for_boundary_condition_discrete(timestep) - - # 4. Compute the predicted original sample x_0 based on the model parameterization - if self.config.prediction_type == "epsilon": # noise-prediction - predicted_original_sample = (sample - beta_prod_t.sqrt() * model_output) / alpha_prod_t.sqrt() - elif self.config.prediction_type == "sample": # x-prediction - predicted_original_sample = model_output - elif self.config.prediction_type == "v_prediction": # v-prediction - predicted_original_sample = alpha_prod_t.sqrt() * sample - beta_prod_t.sqrt() * model_output - else: - raise ValueError( - f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample` or" - " `v_prediction` for `LCMScheduler`." - ) - - # 5. Clip or threshold "predicted x_0" - if self.config.thresholding: - predicted_original_sample = self._threshold_sample(predicted_original_sample) - elif self.config.clip_sample: - predicted_original_sample = predicted_original_sample.clamp( - -self.config.clip_sample_range, self.config.clip_sample_range - ) - - # 6. Denoise model output using boundary conditions - denoised = c_out * predicted_original_sample + c_skip * sample - - # 7. Sample and inject noise z ~ N(0, I) for MultiStep Inference - # Noise is not used for one-step sampling. - if len(self.timesteps) > 1: - noise = diffusers.utils.torch_utils.randn_tensor(model_output.shape, generator=generator, device=model_output.device) - prev_sample = alpha_prod_t_prev.sqrt() * denoised + beta_prod_t_prev.sqrt() * noise - else: - prev_sample = denoised - - # upon completion increase step index by one - self._step_index += 1 - - if not return_dict: - return (prev_sample, denoised) - - return diffusers.schedulers.scheduling_lcm.LCMSchedulerOutput(prev_sample=prev_sample, denoised=denoised) - - -diffusers.LCMScheduler.step = LCMScheduler_step diff --git a/modules/dml/hijack/kdiffusion.py b/modules/dml/hijack/kdiffusion.py deleted file mode 100644 index d772dc88f..000000000 --- a/modules/dml/hijack/kdiffusion.py +++ /dev/null @@ -1,89 +0,0 @@ -import torch -from tqdm.auto import tqdm -from k_diffusion import sampling -import modules.devices as devices - - -def dpm_solver_adaptive(self, x, t_start, t_end, order=3, rtol=0.05, atol=0.0078, h_init=0.05, pcoeff=0., icoeff=1., dcoeff=0., accept_safety=0.81, eta=0., s_noise=1., noise_sampler=None): - noise_sampler = sampling.default_noise_sampler(x) if noise_sampler is None else noise_sampler - if order not in {2, 3}: - raise ValueError('order should be 2 or 3') - forward = t_end > t_start - if not forward and eta: - raise ValueError('eta must be 0 for reverse sampling') - h_init = abs(h_init) * (1 if forward else -1) - atol = torch.tensor(atol, device=devices.device) - rtol = torch.tensor(rtol, device=devices.device) - s = t_start - x_prev = x - accept = True - pid = sampling.PIDStepSizeController(h_init, pcoeff, icoeff, dcoeff, 1.5 if eta else order, accept_safety) - info = {'steps': 0, 'nfe': 0, 'n_accept': 0, 'n_reject': 0} - - while s < t_end - 1e-5 if forward else s > t_end + 1e-5: - eps_cache = {} - t = torch.minimum(t_end, s + pid.h) if forward else torch.maximum(t_end, s + pid.h) - if eta: - sd, su = sampling.get_ancestral_step(self.sigma(s), self.sigma(t), eta) - t_ = torch.minimum(t_end, self.t(sd)) - su = (self.sigma(t) ** 2 - self.sigma(t_) ** 2) ** 0.5 - else: - t_, su = t, 0. - - eps, eps_cache = self.eps(eps_cache, 'eps', x, s) - denoised = x - self.sigma(s) * eps - - if order == 2: - x_low, eps_cache = self.dpm_solver_1_step(x, s, t_, eps_cache=eps_cache) - x_high, eps_cache = self.dpm_solver_2_step(x, s, t_, eps_cache=eps_cache) - else: - x_low, eps_cache = self.dpm_solver_2_step(x, s, t_, r1=1 / 3, eps_cache=eps_cache) - x_high, eps_cache = self.dpm_solver_3_step(x, s, t_, eps_cache=eps_cache) - delta = torch.maximum(atol, rtol * torch.maximum(x_low.abs(), x_prev.abs())) - error = torch.linalg.norm((x_low - x_high) / delta) / x.numel() ** 0.5 - accept = pid.propose_step(error) - if accept: - x_prev = x_low - x = x_high + su * s_noise * noise_sampler(self.sigma(s), self.sigma(t)) - s = t - info['n_accept'] += 1 - else: - info['n_reject'] += 1 - info['nfe'] += order - info['steps'] += 1 - - if self.info_callback is not None: - self.info_callback({'x': x, 'i': info['steps'] - 1, 't': s, 't_up': s, 'denoised': denoised, 'error': error, 'h': pid.h, **info}) - - return x, info - - -@devices.inference_context() -def sample_dpm_fast(model, x, sigma_min, sigma_max, n, extra_args=None, callback=None, disable=None, eta=0., s_noise=1., noise_sampler=None): - """DPM-Solver-Fast (fixed step size). See https://arxiv.org/abs/2206.00927.""" - if sigma_min <= 0 or sigma_max <= 0: - raise ValueError('sigma_min and sigma_max must not be 0') - with tqdm(total=n, disable=disable) as pbar: - dpm_solver = sampling.DPMSolver(model, extra_args, eps_callback=pbar.update) - if callback is not None: - dpm_solver.info_callback = lambda info: callback({'sigma': dpm_solver.sigma(info['t']), 'sigma_hat': dpm_solver.sigma(info['t_up']), **info}) - return dpm_solver.dpm_solver_fast(x, dpm_solver.t(torch.tensor(sigma_max, device=devices.device)), dpm_solver.t(torch.tensor(sigma_min, device=devices.device)), n, eta, s_noise, noise_sampler) - - -@devices.inference_context() -def sample_dpm_adaptive(model, x, sigma_min, sigma_max, extra_args=None, callback=None, disable=None, order=3, rtol=0.05, atol=0.0078, h_init=0.05, pcoeff=0., icoeff=1., dcoeff=0., accept_safety=0.81, eta=0., s_noise=1., noise_sampler=None, return_info=False): - """DPM-Solver-12 and 23 (adaptive step size). See https://arxiv.org/abs/2206.00927.""" - if sigma_min <= 0 or sigma_max <= 0: - raise ValueError('sigma_min and sigma_max must not be 0') - with tqdm(disable=disable) as pbar: - dpm_solver = sampling.DPMSolver(model, extra_args, eps_callback=pbar.update) - if callback is not None: - dpm_solver.info_callback = lambda info: callback({'sigma': dpm_solver.sigma(info['t']), 'sigma_hat': dpm_solver.sigma(info['t_up']), **info}) - x, info = dpm_solver.dpm_solver_adaptive(x, dpm_solver.t(torch.tensor(sigma_max, device=devices.device)), dpm_solver.t(torch.tensor(sigma_min, device=devices.device)), order, rtol, atol, h_init, pcoeff, icoeff, dcoeff, accept_safety, eta, s_noise, noise_sampler) - if return_info: - return x, info - return x - -sampling.DPMSolver.dpm_solver_adaptive = dpm_solver_adaptive -sampling.sample_dpm_fast = sample_dpm_fast -sampling.sample_dpm_adaptive = sample_dpm_adaptive diff --git a/modules/dml/hijack/plms.py b/modules/dml/hijack/plms.py deleted file mode 100644 index d19c46629..000000000 --- a/modules/dml/hijack/plms.py +++ /dev/null @@ -1,90 +0,0 @@ -import torch -from ldm.models.diffusion.ddim import noise_like -import modules.sd_hijack_inpainting as plms_hijack -import modules.devices as devices - - -@devices.inference_context() -def p_sample_plms(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False, - temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None, - unconditional_guidance_scale=1., unconditional_conditioning=None, old_eps=None, t_next=None, dynamic_threshold=None): - b, *_, device = *x.shape, x.device - - def get_model_output(x, t): - if unconditional_conditioning is None or unconditional_guidance_scale == 1.: - e_t = self.model.apply_model(x, t, c) - else: - x_in = torch.cat([x] * 2) - t_in = torch.cat([t] * 2) - - if isinstance(c, dict): - assert isinstance(unconditional_conditioning, dict) - c_in = {} - for k in c: - if isinstance(c[k], list): - c_in[k] = [ - torch.cat([unconditional_conditioning[k][i], c[k][i]]) - for i in range(len(c[k])) - ] - else: - c_in[k] = torch.cat([unconditional_conditioning[k], c[k]]) - else: - c_in = torch.cat([unconditional_conditioning, c]) - - e_t_uncond, e_t = self.model.apply_model(x_in, t_in, c_in).chunk(2) - e_t = e_t_uncond + unconditional_guidance_scale * (e_t - e_t_uncond) - - if score_corrector is not None: - assert self.model.parameterization == "eps" - e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs) - - return e_t - - alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas - alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev - sqrt_one_minus_alphas = self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas - sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas - - def get_x_prev_and_pred_x0(e_t, index): - # select parameters corresponding to the currently considered timestep - torch.dml.synchronize_tensor(alphas[index]) # DML synchronize - a_t = torch.full((b, 1, 1, 1), alphas[index], device=device) - a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device) - sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device) - sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index],device=device) - - # current prediction for x_0 - pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt() - if quantize_denoised: - pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0) - if dynamic_threshold is not None: - from ldm.models.diffusion.sampling_util import norm_thresholding - pred_x0 = norm_thresholding(pred_x0, dynamic_threshold) - # direction pointing to x_t - dir_xt = (1. - a_prev - sigma_t**2).sqrt() * e_t - noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature - if noise_dropout > 0.: - noise = torch.nn.functional.dropout(noise, p=noise_dropout) - x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise - return x_prev, pred_x0 - - e_t = get_model_output(x, t) - if len(old_eps) == 0: - # Pseudo Improved Euler (2nd order) - x_prev, pred_x0 = get_x_prev_and_pred_x0(e_t, index) - e_t_next = get_model_output(x_prev, t_next) - e_t_prime = (e_t + e_t_next) / 2 - elif len(old_eps) == 1: - # 2nd order Pseudo Linear Multistep (Adams-Bashforth) - e_t_prime = (3 * e_t - old_eps[-1]) / 2 - elif len(old_eps) == 2: - # 3nd order Pseudo Linear Multistep (Adams-Bashforth) - e_t_prime = (23 * e_t - 16 * old_eps[-1] + 5 * old_eps[-2]) / 12 - elif len(old_eps) >= 3: - # 4nd order Pseudo Linear Multistep (Adams-Bashforth) - e_t_prime = (55 * e_t - 59 * old_eps[-1] + 37 * old_eps[-2] - 9 * old_eps[-3]) / 24 - - x_prev, pred_x0 = get_x_prev_and_pred_x0(e_t_prime, index) - - return x_prev, pred_x0, e_t -plms_hijack.p_sample_plms = p_sample_plms diff --git a/modules/dml/hijack/stablediffusion.py b/modules/dml/hijack/stablediffusion.py deleted file mode 100644 index fc2518aa7..000000000 --- a/modules/dml/hijack/stablediffusion.py +++ /dev/null @@ -1,81 +0,0 @@ -import torch -from ldm.models.diffusion.ddim import DDIMSampler -from ldm.modules.diffusionmodules.util import noise_like -import modules.devices as devices - - -@devices.inference_context() -def p_sample_ddim(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False, - temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None, - unconditional_guidance_scale=1., unconditional_conditioning=None, - dynamic_threshold=None): - b, *_, device = *x.shape, x.device - - if unconditional_conditioning is None or unconditional_guidance_scale == 1.: - model_output = self.model.apply_model(x, t, c) - else: - x_in = torch.cat([x] * 2) - t_in = torch.cat([t] * 2) - if isinstance(c, dict): - assert isinstance(unconditional_conditioning, dict) - c_in = dict() - for k in c: - if isinstance(c[k], list): - c_in[k] = [torch.cat([ - unconditional_conditioning[k][i], - c[k][i]]) for i in range(len(c[k]))] - else: - c_in[k] = torch.cat([ - unconditional_conditioning[k], - c[k]]) - elif isinstance(c, list): - c_in = list() - assert isinstance(unconditional_conditioning, list) - for i in range(len(c)): - c_in.append(torch.cat([unconditional_conditioning[i], c[i]])) - else: - c_in = torch.cat([unconditional_conditioning, c]) - model_uncond, model_t = self.model.apply_model(x_in, t_in, c_in).chunk(2) - model_output = model_uncond + unconditional_guidance_scale * (model_t - model_uncond) - - if self.model.parameterization == "v": - e_t = self.model.predict_eps_from_z_and_v(x, t, model_output) - else: - e_t = model_output - - if score_corrector is not None: - assert self.model.parameterization == "eps", 'not implemented' - e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs) - - alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas - alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev - sqrt_one_minus_alphas = self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas - sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas - # select parameters corresponding to the currently considered timestep - torch.dml.synchronize_tensor(alphas[index]) # DML synchronize - a_t = torch.full((b, 1, 1, 1), alphas[index], device=device) - a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device) - sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device) - sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index],device=device) - - # current prediction for x_0 - if self.model.parameterization != "v": - pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt() - else: - pred_x0 = self.model.predict_start_from_z_and_v(x, t, model_output) - - if quantize_denoised: - pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0) - - if dynamic_threshold is not None: - raise NotImplementedError - - # direction pointing to x_t - dir_xt = (1. - a_prev - sigma_t**2).sqrt() * e_t - noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature - if noise_dropout > 0.: - noise = torch.nn.functional.dropout(noise, p=noise_dropout) - x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise - return x_prev, pred_x0 - -DDIMSampler.p_sample_ddim = p_sample_ddim diff --git a/modules/dml/hijack/torch.py b/modules/dml/hijack/torch.py index e4913a357..e216de46f 100644 --- a/modules/dml/hijack/torch.py +++ b/modules/dml/hijack/torch.py @@ -7,41 +7,6 @@ CondFunc('torch.Tensor.new', lambda orig, self, *args, **kwargs: orig(self.cpu(), *args, **kwargs).to(self.device), lambda orig, self, *args, **kwargs: torch.dml.is_directml_device(self.device)) -_lerp = torch.lerp -def lerp(*args, **kwargs) -> torch.Tensor: - rep = None - for i in range(0, len(args)): - if torch.is_tensor(args[i]): - rep = args[i] - break - if rep is None: - for key in kwargs: - if torch.is_tensor(kwargs[key]): - rep = kwargs[key] - break - if torch.dml.is_directml_device(rep.device): - args = list(args) - - if rep.dtype == torch.float16: - for i in range(len(args)): - if torch.is_tensor(args[i]): - args[i] = args[i].float() - for i in range(len(args)): - if torch.is_tensor(args[i]): - args[i] = args[i].cpu() - - if rep.dtype == torch.float16: - for kwarg in kwargs: - if torch.is_tensor(kwargs[kwarg]): - kwargs[kwarg] = kwargs[kwarg].float() - for kwarg in kwargs: - if torch.is_tensor(kwargs[kwarg]): - kwargs[kwarg] = kwargs[kwarg].cpu() - return _lerp(*args, **kwargs).to(rep.device).type(rep.dtype) - return _lerp(*args, **kwargs) -torch.lerp = lerp - - # https://github.com/lshqqytiger/stable-diffusion-webui-directml/issues/436 _pow_ = torch.Tensor.pow_ def pow_(self: torch.Tensor, *args, **kwargs): diff --git a/modules/hidiffusion/hidiffusion.py b/modules/hidiffusion/hidiffusion.py index df866bce8..7874f03af 100644 --- a/modules/hidiffusion/hidiffusion.py +++ b/modules/hidiffusion/hidiffusion.py @@ -1,4 +1,5 @@ from typing import Type, Dict, Any, Tuple, Optional +import math import torch import torch.nn.functional as F from diffusers.utils.torch_utils import is_torch_version @@ -100,15 +101,18 @@ def forward( # reference: https://github.com/microsoft/Swin-Transformer def window_partition(x, window_size, shift_size, H, W): B, _N, C = x.shape - # H, W = int(N**0.5), int(N**0.5) x = x.view(B,H,W,C) + if H % 2 != 0 or W % 2 != 0: + from modules.errors import log + log.warning('HiDiffusion: The feature size is not divisible by 2') + x = F.interpolate(x.permute(0,3,1,2).contiguous(), size=(window_size[0]*2, window_size[1]*2), mode='bicubic').permute(0,2,3,1).contiguous() if type(shift_size) == list or type(shift_size) == tuple: if shift_size[0] > 0: x = torch.roll(x, shifts=(-shift_size[0], -shift_size[1]), dims=(1, 2)) else: if shift_size > 0: x = torch.roll(x, shifts=(-shift_size, -shift_size), dims=(1, 2)) - x = x.view(B, H // window_size[0], window_size[0], W // window_size[1], window_size[1], C) + x = x.view(B, 2, window_size[0], 2, window_size[1], C) windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size[0], window_size[1], C) windows = windows.view(-1, window_size[0] * window_size[1], C) return windows @@ -116,15 +120,17 @@ def window_partition(x, window_size, shift_size, H, W): def window_reverse(windows, window_size, H, W, shift_size): B, _N, C = windows.shape windows = windows.view(-1, window_size[0], window_size[1], C) - B = int(windows.shape[0] / (H * W / window_size[0] / window_size[1])) - x = windows.view(B, H // window_size[0], W // window_size[1], window_size[0], window_size[1], -1) - x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + B = int(windows.shape[0] / 4) # 2x2 + x = windows.view(B, 2, 2, window_size[0], window_size[1], -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, window_size[0]*2, window_size[1]*2, -1) if type(shift_size) == list or type(shift_size) == tuple: if shift_size[0] > 0: x = torch.roll(x, shifts=(shift_size[0], shift_size[1]), dims=(1, 2)) else: if shift_size > 0: x = torch.roll(x, shifts=(shift_size, shift_size), dims=(1, 2)) + if H % 2 != 0 or W % 2 != 0: + x = F.interpolate(x.permute(0,3,1,2).contiguous(), size=(H, W), mode='bicubic').permute(0,2,3,1).contiguous() x = x.view(B, H*W, C) return x @@ -152,9 +158,9 @@ def window_reverse(windows, window_size, H, W, shift_size): rand_num = torch.rand(1) _B, N, _C = hidden_states.shape ori_H, ori_W = self.info['size'] - downsample_ratio = int(((ori_H*ori_W) // N)**0.5) - H, W = (ori_H//downsample_ratio, ori_W//downsample_ratio) - widow_size = (H//2, W//2) + downsample_ratio = round(((ori_H*ori_W) / N)**0.5) + H, W = (math.ceil(ori_H/downsample_ratio), math.ceil(ori_W/downsample_ratio)) + widow_size = (math.ceil(H/2), math.ceil(W/2)) if rand_num <= 0.25: shift_size = (0,0) if rand_num > 0.25 and rand_num <= 0.5: @@ -351,9 +357,11 @@ def custom_forward(*inputs): if i == 0: if self.aggressive_raunet and self.timestep >= self.T1_start and self.timestep < self.T1_end: - hidden_states = F.avg_pool2d(hidden_states, kernel_size=(2,2)) + self.info["upsample_size"] = (hidden_states.shape[2], hidden_states.shape[3]) + hidden_states = F.avg_pool2d(hidden_states, kernel_size=(2,2),ceil_mode=True) elif self.timestep < self.T1: - hidden_states = F.avg_pool2d(hidden_states, kernel_size=(2,2)) + self.info["upsample_size"] = (hidden_states.shape[2], hidden_states.shape[3]) + hidden_states = F.avg_pool2d(hidden_states, kernel_size=(2,2),ceil_mode=True) output_states = output_states + (hidden_states,) if self.downsamplers is not None: @@ -458,11 +466,9 @@ def fix_scale(first, second): # TODO hidiffusion breaks hidden_scale.shape on 3r )[0] if i == 1: if self.aggressive_raunet and self.timestep >= self.T1_start and self.timestep < self.T1_end: - re_size = (int(hidden_states.shape[-2] * 2), int(hidden_states.shape[-1] * 2)) - hidden_states = F.interpolate(hidden_states, size=re_size, mode='bicubic') + hidden_states = F.interpolate(hidden_states, size=self.info["upsample_size"], mode='bicubic') elif self.timestep < self.T1: - re_size = (int(hidden_states.shape[-2] * 2), int(hidden_states.shape[-1] * 2)) - hidden_states = F.interpolate(hidden_states, size=re_size, mode='bicubic') + hidden_states = F.interpolate(hidden_states, size=self.info["upsample_size"], mode='bicubic') if self.upsamplers is not None: for upsampler in self.upsamplers: @@ -589,9 +595,6 @@ def forward(self, hidden_states: torch.Tensor, scale = 1.0) -> torch.Tensor: # p self.T1 = int(aggressive_step/50 * self.max_timestep) else: self.T1 = int(self.max_timestep * self.T1_ratio) - if self.timestep < self.T1: - if ori_H != hidden_states.shape[2] and ori_W != hidden_states.shape[3]: - hidden_states = F.interpolate(hidden_states, scale_factor=2.0, mode='bicubic') self.timestep += 1 if self.timestep == self.max_timestep: self.timestep = 0 @@ -629,9 +632,10 @@ def apply_hidiffusion( make_block_fn = make_diffusers_unet_2d_condition model.unet.__class__ = make_block_fn(model.unet.__class__) diffusion_model = model.unet if hasattr(model, "unet") else model - diffusion_model.num_upsamplers += 2 + diffusion_model.num_upsamplers += 12 diffusion_model.info = { 'size': None, + 'upsample_size': None, 'hooks': [], 'text_to_img_controlnet': hasattr(model, 'controlnet'), 'is_inpainting_task': model.__class__ in auto_pipeline.AUTO_INPAINT_PIPELINES_MAPPING.values(), diff --git a/modules/images.py b/modules/images.py index caf3a7cc4..abb5608db 100644 --- a/modules/images.py +++ b/modules/images.py @@ -54,6 +54,9 @@ def image_grid(imgs, batch_size=1, rows=None): cols = math.ceil(len(imgs) / rows) params = script_callbacks.ImageGridLoopParams(imgs, cols, rows) script_callbacks.image_grid_callback(params) + imgs = [i for i in imgs if i is not None] if imgs is not None else [] + if len(imgs) == 0: + return None w, h = imgs[0].size grid = Image.new('RGB', size=(params.cols * w, params.rows * h), color=shared.opts.grid_background) for i, img in enumerate(params.imgs): diff --git a/modules/intel/ipex/diffusers.py b/modules/intel/ipex/diffusers.py index 732a18568..4f294ce6b 100644 --- a/modules/intel/ipex/diffusers.py +++ b/modules/intel/ipex/diffusers.py @@ -70,8 +70,8 @@ class SlicedAttnProcessor: # pylint: disable=too-few-public-methods def __init__(self, slice_size): self.slice_size = slice_size - def __call__(self, attn: Attention, hidden_states: torch.FloatTensor, - encoder_hidden_states=None, attention_mask=None) -> torch.FloatTensor: # pylint: disable=too-many-statements, too-many-locals, too-many-branches + def __call__(self, attn: Attention, hidden_states: torch.Tensor, + encoder_hidden_states=None, attention_mask=None) -> torch.Tensor: # pylint: disable=too-many-statements, too-many-locals, too-many-branches residual = hidden_states @@ -188,14 +188,11 @@ class AttnProcessor: Default processor for performing attention-related computations. """ - def __call__(self, attn: Attention, hidden_states: torch.FloatTensor, - encoder_hidden_states=None, attention_mask=None, - temb=None, scale: float = 1.0) -> torch.Tensor: # pylint: disable=too-many-statements, too-many-locals, too-many-branches + def __call__(self, attn, hidden_states: torch.Tensor, encoder_hidden_states=None, attention_mask=None, + temb=None, *args, **kwargs) -> torch.Tensor: # pylint: disable=too-many-statements, too-many-locals, too-many-branches residual = hidden_states - args = () if USE_PEFT_BACKEND else (scale,) - if attn.spatial_norm is not None: hidden_states = attn.spatial_norm(hidden_states, temb) @@ -213,15 +210,15 @@ def __call__(self, attn: Attention, hidden_states: torch.FloatTensor, if attn.group_norm is not None: hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) - query = attn.to_q(hidden_states, *args) + query = attn.to_q(hidden_states) if encoder_hidden_states is None: encoder_hidden_states = hidden_states elif attn.norm_cross: encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) - key = attn.to_k(encoder_hidden_states, *args) - value = attn.to_v(encoder_hidden_states, *args) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) query = attn.head_to_batch_dim(query) key = attn.head_to_batch_dim(key) @@ -292,7 +289,7 @@ def __call__(self, attn: Attention, hidden_states: torch.FloatTensor, hidden_states = attn.batch_to_head_dim(hidden_states) # linear proj - hidden_states = attn.to_out[0](hidden_states, *args) + hidden_states = attn.to_out[0](hidden_states) # dropout hidden_states = attn.to_out[1](hidden_states) diff --git a/modules/model_pixart.py b/modules/model_pixart.py new file mode 100644 index 000000000..d57079e4e --- /dev/null +++ b/modules/model_pixart.py @@ -0,0 +1,30 @@ +import diffusers + + +def load_pixart(checkpoint_info, diffusers_load_config={}): + from modules import shared, devices, modelloader, model_t5 + modelloader.hf_login() + # shared.opts.data['cuda_dtype'] = 'FP32' # override + # shared.opts.data['diffusers_model_cpu_offload'] = True # override + # devices.set_cuda_params() + fn = checkpoint_info.path.replace('huggingface/', '') + t5 = model_t5.load_t5(shared.opts.sd_text_encoder, cache_dir=shared.opts.diffusers_dir) + transformer = diffusers.PixArtTransformer2DModel.from_pretrained( + fn, + subfolder = 'transformer', + cache_dir = shared.opts.diffusers_dir, + **diffusers_load_config, + ) + transformer.to(devices.device) + kwargs = { 'transformer': transformer } + if t5 is not None: + kwargs['text_encoder'] = t5 + diffusers_load_config.pop('variant', None) + pipe = diffusers.PixArtSigmaPipeline.from_pretrained( + 'PixArt-alpha/PixArt-Sigma-XL-2-1024-MS', + cache_dir = shared.opts.diffusers_dir, + **kwargs, + **diffusers_load_config, + ) + devices.torch_gc() + return pipe diff --git a/modules/model_sd3.py b/modules/model_sd3.py index 0b7373572..81470a97e 100644 --- a/modules/model_sd3.py +++ b/modules/model_sd3.py @@ -1,35 +1,11 @@ -import io import os -import contextlib -import warnings import torch import diffusers import transformers -import rich.traceback -rich.traceback.install() -warnings.filterwarnings(action="ignore", category=FutureWarning) -loggedin = False - - -def hf_login(): - global loggedin # pylint: disable=global-statement - import huggingface_hub as hf - from modules import shared - if shared.opts.huggingface_token is not None and len(shared.opts.huggingface_token) > 2 and not loggedin: - stdout = io.StringIO() - with contextlib.redirect_stdout(stdout): - hf.login(shared.opts.huggingface_token) - text = stdout.getvalue() or '' - line = [l for l in text.split('\n') if 'Token' in l] - shared.log.info(f'HF login: {line[0] if len(line) > 0 else text}') - loggedin = True - - -def load_sd3(te3=None, fn=None, cache_dir=None, config=None): - from modules import devices - hf_login() +def load_sd3(fn=None, cache_dir=None, config=None): + from modules import devices, modelloader repo_id = 'stabilityai/stable-diffusion-3-medium-diffusers' model_id = 'stabilityai/stable-diffusion-3-medium-diffusers' dtype = torch.float16 @@ -37,140 +13,52 @@ def load_sd3(te3=None, fn=None, cache_dir=None, config=None): if fn is not None and fn.endswith('.safetensors') and os.path.exists(fn): model_id = fn loader = diffusers.StableDiffusion3Pipeline.from_single_file - kwargs = { - 'text_encoder': transformers.CLIPTextModelWithProjection.from_pretrained( - repo_id, - subfolder='text_encoder', - cache_dir=cache_dir, - torch_dtype=dtype, - ), - 'text_encoder_2': transformers.CLIPTextModelWithProjection.from_pretrained( - repo_id, - subfolder='text_encoder_2', - cache_dir=cache_dir, - torch_dtype=dtype, - ), - 'tokenizer': transformers.CLIPTokenizer.from_pretrained( - repo_id, - subfolder='tokenizer', - cache_dir=cache_dir, - ), - 'tokenizer_2': transformers.CLIPTokenizer.from_pretrained( - repo_id, - subfolder='tokenizer_2', - cache_dir=cache_dir, - ), - } + diffusers_minor = int(diffusers.__version__.split('.')[1]) + fn_size = os.path.getsize(fn) + if diffusers_minor < 30 or fn_size < 5e9: # te1/te2 do not get loaded correctly in diffusers 0.29.0 or model is without te1/te2 + kwargs = { + 'text_encoder': transformers.CLIPTextModelWithProjection.from_pretrained( + repo_id, + subfolder='text_encoder', + cache_dir=cache_dir, + torch_dtype=dtype, + ), + 'text_encoder_2': transformers.CLIPTextModelWithProjection.from_pretrained( + repo_id, + subfolder='text_encoder_2', + cache_dir=cache_dir, + torch_dtype=dtype, + ), + 'tokenizer': transformers.CLIPTokenizer.from_pretrained( + repo_id, + subfolder='tokenizer', + cache_dir=cache_dir, + ), + 'tokenizer_2': transformers.CLIPTokenizer.from_pretrained( + repo_id, + subfolder='tokenizer_2', + cache_dir=cache_dir, + ), + 'text_encoder_3': None, + } + elif fn_size < 1e10: # if model is below 10gb it does not have te3 + kwargs = { + 'text_encoder_3': None, + } + else: + kwargs = {} else: + modelloader.hf_login() model_id = repo_id loader = diffusers.StableDiffusion3Pipeline.from_pretrained - if te3 == 'fp16': - text_encoder_3 = transformers.T5EncoderModel.from_pretrained( - repo_id, - subfolder='text_encoder_3', - torch_dtype=dtype, - cache_dir=cache_dir, - ) - pipe = loader( - model_id, - torch_dtype=dtype, - text_encoder_3=text_encoder_3, - cache_dir=cache_dir, - config=config, - **kwargs, - ) - elif te3 == 'fp8': - quantization_config = transformers.BitsAndBytesConfig(load_in_8bit=True) - text_encoder_3 = transformers.T5EncoderModel.from_pretrained( - repo_id, - subfolder='text_encoder_3', - quantization_config=quantization_config, - cache_dir=cache_dir, - config=config, - ) - pipe = loader( - model_id, - text_encoder_3=text_encoder_3, - device_map='balanced', - torch_dtype=dtype, - cache_dir=cache_dir, - config=config, - **kwargs, - ) - else: - pipe = loader( - model_id, - torch_dtype=dtype, - text_encoder_3=None, - cache_dir=cache_dir, - config=config, - **kwargs, - ) + pipe = loader( + model_id, + torch_dtype=dtype, + cache_dir=cache_dir, + config=config, + **kwargs, + ) diffusers.pipelines.auto_pipeline.AUTO_TEXT2IMAGE_PIPELINES_MAPPING["stable-diffusion-3"] = diffusers.StableDiffusion3Pipeline diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["stable-diffusion-3"] = diffusers.StableDiffusion3Img2ImgPipeline - devices.torch_gc(force=True) + devices.torch_gc() return pipe - - -def load_te3(pipe, te3=None, cache_dir=None): - from modules import devices - hf_login() - repo_id = 'stabilityai/stable-diffusion-3-medium-diffusers' - if pipe is None or not hasattr(pipe, 'text_encoder_3'): - return pipe - if 'fp16' in te3.lower(): - pipe.text_encoder_3 = transformers.T5EncoderModel.from_pretrained( - repo_id, - subfolder='text_encoder_3', - # torch_dtype=dtype, - cache_dir=cache_dir, - torch_dtype=pipe.text_encoder.dtype, - ) - elif 'fp8' in te3.lower(): - from installer import install - install('bitsandbytes', quiet=True) - quantization_config = transformers.BitsAndBytesConfig(load_in_8bit=True) - pipe.text_encoder_3 = transformers.T5EncoderModel.from_pretrained( - repo_id, - subfolder='text_encoder_3', - quantization_config=quantization_config, - cache_dir=cache_dir, - torch_dtype=pipe.text_encoder.dtype, - ) - else: - pipe.text_encoder_3 = None - if getattr(pipe, 'text_encoder_3', None) is not None and getattr(pipe, 'tokenizer_3', None) is None: - pipe.tokenizer_3 = transformers.T5TokenizerFast.from_pretrained( - repo_id, - subfolder='tokenizer_3', - cache_dir=cache_dir, - ) - devices.torch_gc(force=True) - - -if __name__ == '__main__': - model_fn = '/mnt/models/stable-diffusion/sd3/sd3_medium_incl_clips.safetensors' - import time - import logging - logging.basicConfig(level=logging.INFO) - log = logging.getLogger('sd') - t0 = time.time() - pipeline = load_sd3(te3='fp16', fn='') - - # pipeline.to('cuda') - t1 = time.time() - log.info(f'Loaded: time={t1-t0:.3f}') - - # pipeline.scheduler = diffusers.schedulers.EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config) - log.info(f'Scheduler, {pipeline.scheduler}') - image = pipeline( - prompt='a photo of a cute robot holding a sign above his head that says sdnext, high detailed', - negative_prompt='', - num_inference_steps=50, - height=1024, - width=1024, - guidance_scale=7.0, - ).images[0] - t2 = time.time() - log.info(f'Generated: time={t2-t1:.3f}') - image.save("/tmp/sd3.png") diff --git a/modules/model_t5.py b/modules/model_t5.py new file mode 100644 index 000000000..7b735794c --- /dev/null +++ b/modules/model_t5.py @@ -0,0 +1,77 @@ +import transformers + + +def load_t5(t5=None, cache_dir=None): + from modules import devices, modelloader + repo_id = 'stabilityai/stable-diffusion-3-medium-diffusers' + if 'fp16' in t5.lower(): + modelloader.hf_login() + t5 = transformers.T5EncoderModel.from_pretrained( + repo_id, + subfolder='text_encoder_3', + # torch_dtype=dtype, + cache_dir=cache_dir, + torch_dtype=devices.dtype, + ) + elif 'fp4' in t5.lower(): + modelloader.hf_login() + from installer import install + install('bitsandbytes', quiet=True) + quantization_config = transformers.BitsAndBytesConfig(load_in_4bit=True) + t5 = transformers.T5EncoderModel.from_pretrained( + repo_id, + subfolder='text_encoder_3', + quantization_config=quantization_config, + cache_dir=cache_dir, + torch_dtype=devices.dtype, + ) + elif 'fp8' in t5.lower(): + modelloader.hf_login() + from installer import install + install('bitsandbytes', quiet=True) + quantization_config = transformers.BitsAndBytesConfig(load_in_8bit=True) + t5 = transformers.T5EncoderModel.from_pretrained( + repo_id, + subfolder='text_encoder_3', + quantization_config=quantization_config, + cache_dir=cache_dir, + torch_dtype=devices.dtype, + ) + elif 'int8' in t5.lower(): + modelloader.hf_login() + from installer import install + install('nncf==2.7.0', quiet=True) + from modules.sd_models_compile import nncf_compress_model + from modules.sd_hijack import NNCF_T5DenseGatedActDense # T5DenseGatedActDense uses fp32 + t5 = transformers.T5EncoderModel.from_pretrained( + repo_id, + subfolder='text_encoder_3', + cache_dir=cache_dir, + torch_dtype=devices.dtype, + ) + for i in range(len(t5.encoder.block)): + t5.encoder.block[i].layer[1].DenseReluDense = NNCF_T5DenseGatedActDense( + t5.encoder.block[i].layer[1].DenseReluDense + ) + t5 = nncf_compress_model(t5) + else: + t5 = None + return t5 + + +def set_t5(pipe, module, t5=None, cache_dir=None): + from modules import devices, shared + if pipe is None or not hasattr(pipe, module): + return pipe + t5 = load_t5(t5=t5, cache_dir=cache_dir) + setattr(pipe, module, t5) + if shared.cmd_opts.lowvram or shared.opts.diffusers_seq_cpu_offload: + from accelerate import cpu_offload + getattr(pipe, module).to("cpu") + cpu_offload(getattr(pipe, module), devices.device, offload_buffers=len(getattr(pipe, module)._parameters) > 0) # pylint: disable=protected-access + elif shared.cmd_opts.medvram or shared.opts.diffusers_model_cpu_offload: + if not hasattr(pipe, "_all_hooks") or len(pipe._all_hooks) == 0: # pylint: disable=protected-access + pipe.enable_model_cpu_offload(device=devices.device) + else: + pipe.maybe_free_model_hooks() + devices.torch_gc() diff --git a/modules/modeldata.py b/modules/modeldata.py index c904ef05b..2ae958218 100644 --- a/modules/modeldata.py +++ b/modules/modeldata.py @@ -83,7 +83,7 @@ def sd_model_type(self): return model_type if not shared.native: model_type = 'ldm' - elif "StableDiffusion3" in self.sd_refiner.__class__.__name__: + elif "StableDiffusion3" in self.sd_model.__class__.__name__: model_type = 'sd3' elif "StableDiffusionXL" in self.sd_model.__class__.__name__: model_type = 'sdxl' @@ -97,6 +97,8 @@ def sd_model_type(self): model_type = 'sd' # sd is compatible with sd elif "Kandinsky" in self.sd_model.__class__.__name__: model_type = 'kandinsky' + elif "HunyuanDiT" in self.sd_model.__class__.__name__: + model_type = 'hunyuandit' elif "Cascade" in self.sd_model.__class__.__name__: model_type = 'sc' else: diff --git a/modules/modelloader.py b/modules/modelloader.py index fa27ede6d..fd9e41edb 100644 --- a/modules/modelloader.py +++ b/modules/modelloader.py @@ -1,8 +1,10 @@ +import io import os import time import json import shutil import importlib +import contextlib from typing import Dict from urllib.parse import urlparse from PIL import Image @@ -12,10 +14,25 @@ from modules.paths import script_path, models_path +loggedin = False diffuser_repos = [] debug = shared.log.trace if os.environ.get('SD_DOWNLOAD_DEBUG', None) is not None else lambda *args, **kwargs: None +def hf_login(token=None): + global loggedin # pylint: disable=global-statement + import huggingface_hub as hf + token = token or shared.opts.huggingface_token + if token is not None and len(token) > 2 and not loggedin: + stdout = io.StringIO() + with contextlib.redirect_stdout(stdout): + hf.login(shared.opts.huggingface_token) + text = stdout.getvalue() or '' + line = [l for l in text.split('\n') if 'Token' in l] + shared.log.info(f'HF login: {line[0] if len(line) > 0 else text}') + loggedin = True + + def download_civit_meta(model_path: str, model_id): fn = os.path.splitext(model_path)[0] + '.json' url = f'https://civitai.com/api/v1/models/{model_id}' @@ -152,6 +169,10 @@ def download_civit_model_thread(model_name, model_url, model_path, model_type, t def download_civit_model(model_url: str, model_name: str, model_path: str, model_type: str, token: str = None): import threading + if model_name is None or len(model_name) == 0: + err = 'Model download: no target model name provided' + shared.log.error(err) + return err thread = threading.Thread(target=download_civit_model_thread, args=(model_name, model_url, model_path, model_type, token)) thread.start() return f'Model download: name={model_name} url={model_url} path={model_path}' @@ -183,8 +204,7 @@ def download_diffusers_model(hub_id: str, cache_dir: str = None, download_config shared.log.debug(f'Diffusers downloading: id="{hub_id}" args={download_config}') token = token or shared.opts.huggingface_token if token is not None and len(token) > 2: - shared.log.debug(f"Diffusers authentication: {token}") - hf.login(token) + hf_login(token) pipeline_dir = None ok = False @@ -297,6 +317,10 @@ def get_reference_opts(name: str, quiet=False): if k == name or model_name == name: model_opts = v break + model_name = model_name.replace('huggingface/', '') + if k == name or model_name == name: + model_opts = v + break if not model_opts: # shared.log.error(f'Reference: model="{name}" not found') return {} diff --git a/modules/pag/__init__.py b/modules/pag/__init__.py index 484fccc13..29cdee8ca 100644 --- a/modules/pag/__init__.py +++ b/modules/pag/__init__.py @@ -15,9 +15,11 @@ def apply(p: processing.StableDiffusionProcessing): # pylint: disable=arguments- c = shared.sd_model.__class__ if shared.sd_loaded else None if c == StableDiffusionPAGPipeline or c == StableDiffusionXLPAGPipeline: unapply() - return None if p.pag_scale == 0: return + if sd_models.get_diffusers_task(shared.sd_model) != sd_models.DiffusersTaskType.TEXT_2_IMAGE: + shared.log.warning(f'PAG: pipeline={c} not implemented') + return None if detect.is_sd15(c): orig_pipeline = shared.sd_model shared.sd_model = sd_models.switch_pipe(StableDiffusionPAGPipeline, shared.sd_model) diff --git a/modules/pag/pipe_sdxl.py b/modules/pag/pipe_sdxl.py index 1576926d1..429384ea3 100644 --- a/modules/pag/pipe_sdxl.py +++ b/modules/pag/pipe_sdxl.py @@ -446,6 +446,7 @@ def __init__( feature_extractor: CLIPImageProcessor = None, force_zeros_for_empty_prompt: bool = True, add_watermarker: Optional[bool] = None, + requires_aesthetics_score: Optional[bool] = None, # todo: patch SDXLPAG pipeline ): super().__init__() @@ -460,13 +461,13 @@ def __init__( image_encoder=image_encoder, feature_extractor=feature_extractor, ) + if 'requires_aesthetics_score' in self.config: + self.register_to_config(requires_aesthetics_score=requires_aesthetics_score) self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt) self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) - self.default_sample_size = self.unet.config.sample_size - - add_watermarker = add_watermarker if add_watermarker is not None else is_invisible_watermark_available() + add_watermarker = False if add_watermarker: self.watermark = StableDiffusionXLWatermarker() @@ -1500,7 +1501,7 @@ def __call__( else: replace_processor = PAGIdentitySelfAttnProcessor() - if(self.pag_applied_layers_index): + if self.pag_applied_layers_index: drop_layers = self.pag_applied_layers_index for drop_layer in drop_layers: layer_number = int(drop_layer[1:]) @@ -1517,7 +1518,7 @@ def __call__( raise ValueError( f"Invalid layer index: {drop_layer}. Available layers: {len(down_layers)} down layers, {len(mid_layers)} mid layers, {len(up_layers)} up layers." ) - elif(self.pag_applied_layers): + elif self.pag_applied_layers: drop_full_layers = self.pag_applied_layers for drop_full_layer in drop_full_layers: try: @@ -1621,7 +1622,7 @@ def __call__( if XLA_AVAILABLE: xm.mark_step() - if not output_type == "latent": + if output_type != "latent": # make sure the VAE is in float32 mode, as it overflows in float16 needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast @@ -1656,7 +1657,7 @@ def __call__( else: image = latents - if not output_type == "latent": + if output_type != "latent": # apply watermark if available if self.watermark is not None: image = self.watermark.apply_watermark(image) @@ -1671,7 +1672,7 @@ def __call__( #Change the attention layers back to original ones after PAG was applied if self.do_adversarial_guidance: - if(self.pag_applied_layers_index): + if self.pag_applied_layers_index: drop_layers = self.pag_applied_layers_index for drop_layer in drop_layers: layer_number = int(drop_layer[1:]) @@ -1685,26 +1686,22 @@ def __call__( else: raise ValueError(f"Invalid layer type: {drop_layer[0]}") except IndexError: - raise ValueError( - f"Invalid layer index: {drop_layer}. Available layers: {len(down_layers)} down layers, {len(mid_layers)} mid layers, {len(up_layers)} up layers." - ) - elif(self.pag_applied_layers): - drop_full_layers = self.pag_applied_layers - for drop_full_layer in drop_full_layers: - try: - if drop_full_layer == "down": - for down_layer in down_layers: - down_layer.processor = AttnProcessor2_0() - elif drop_full_layer == "mid": - for mid_layer in mid_layers: - mid_layer.processor = AttnProcessor2_0() - elif drop_full_layer == "up": - for up_layer in up_layers: - up_layer.processor = AttnProcessor2_0() - else: - raise ValueError(f"Invalid layer type: {drop_full_layer}") - except IndexError: - raise ValueError( - f"Invalid layer index: {drop_full_layer}. Available layers are: down, mid and up. If you need to specify each layer index, you can use `pag_applied_layers_index`" - ) + raise ValueError(f"Invalid layer index: {drop_layer}. Available layers: {len(down_layers)} down layers, {len(mid_layers)} mid layers, {len(up_layers)} up layers.") + elif self.pag_applied_layers: + drop_full_layers = self.pag_applied_layers + for drop_full_layer in drop_full_layers: + try: + if drop_full_layer == "down": + for down_layer in down_layers: + down_layer.processor = AttnProcessor2_0() + elif drop_full_layer == "mid": + for mid_layer in mid_layers: + mid_layer.processor = AttnProcessor2_0() + elif drop_full_layer == "up": + for up_layer in up_layers: + up_layer.processor = AttnProcessor2_0() + else: + raise ValueError(f"Invalid layer type: {drop_full_layer}") + except IndexError: + raise ValueError(f"Invalid layer index: {drop_full_layer}. Available layers are: down, mid and up. If you need to specify each layer index, you can use `pag_applied_layers_index`") return StableDiffusionXLPipelineOutput(images=image) diff --git a/modules/processing.py b/modules/processing.py index 4782c8b4d..d453e0b15 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -158,7 +158,6 @@ def process_images(p: StableDiffusionProcessing) -> Processed: shared.prompt_styles.apply_styles_to_extra(p) shared.prompt_styles.extract_comments(p) - pag.apply(p) if shared.opts.cuda_compile_backend == 'none': sd_models.apply_token_merging(p.sd_model) sd_hijack_freeu.apply_freeu(p, not shared.native) @@ -273,6 +272,7 @@ def infotext(_inxex=0): # dummy function overriden if there are iterations extra_network_data = None debug(f'Processing inner: args={vars(p)}') for n in range(p.n_iter): + pag.apply(p) debug(f'Processing inner: iteration={n+1}/{p.n_iter}') p.iteration = n if shared.state.skipped: diff --git a/modules/processing_args.py b/modules/processing_args.py index a8e7c33a3..91f10c80d 100644 --- a/modules/processing_args.py +++ b/modules/processing_args.py @@ -27,6 +27,8 @@ def task_specific_kwargs(p, model): 'height': 8 * math.ceil(p.height / 8), } elif (sd_models.get_diffusers_task(model) == sd_models.DiffusersTaskType.IMAGE_2_IMAGE or is_img2img_model) and len(getattr(p, 'init_images', [])) > 0: + if shared.sd_model_type == 'sdxl': + model.register_to_config(requires_aesthetics_score = False) p.ops.append('img2img') task_args = { 'image': p.init_images, @@ -41,6 +43,8 @@ def task_specific_kwargs(p, model): 'strength': p.denoising_strength, } elif (sd_models.get_diffusers_task(model) == sd_models.DiffusersTaskType.INPAINTING or is_img2img_model) and len(getattr(p, 'init_images', [])) > 0: + if shared.sd_model_type == 'sdxl': + model.register_to_config(requires_aesthetics_score = False) p.ops.append('inpaint') width, height = processing_helpers.resize_init_images(p) task_args = { @@ -106,7 +110,7 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2 shared.log.error(f'Sampler timesteps: {e}') else: shared.log.warning(f'Sampler: sampler={model.scheduler.__class__.__name__} timesteps not supported') - if shared.opts.prompt_attention != 'Fixed attention' and ('StableDiffusion' in model.__class__.__name__ or 'StableCascade' in model.__class__.__name__) and 'Onnx' not in model.__class__.__name__ and 'StableDiffusion3' not in model.__class__.__name__: + if shared.opts.prompt_attention != 'Fixed attention' and ('StableDiffusion' in model.__class__.__name__ or 'StableCascade' in model.__class__.__name__) and 'Onnx' not in model.__class__.__name__: try: prompt_parser_diffusers.encode_prompts(model, p, prompts, negative_prompts, steps=steps, clip_skip=clip_skip) parser = shared.opts.prompt_attention @@ -126,6 +130,8 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2 args['prompt_embeds_pooled'] = p.positive_pooleds[0].unsqueeze(0) elif 'XL' in model.__class__.__name__ and len(getattr(p, 'positive_pooleds', [])) > 0: args['pooled_prompt_embeds'] = p.positive_pooleds[0] + elif 'StableDiffusion3' in model.__class__.__name__ and len(getattr(p, 'positive_pooleds', [])) > 0: + args['pooled_prompt_embeds'] = p.positive_pooleds[0] else: args['prompt'] = prompts if 'negative_prompt' in possible: @@ -135,6 +141,8 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2 args['negative_prompt_embeds_pooled'] = p.negative_pooleds[0].unsqueeze(0) if 'XL' in model.__class__.__name__ and len(getattr(p, 'negative_pooleds', [])) > 0: args['negative_pooled_prompt_embeds'] = p.negative_pooleds[0] + if 'StableDiffusion3' in model.__class__.__name__ and len(getattr(p, 'negative_pooleds', [])) > 0: + args['negative_pooled_prompt_embeds'] = p.negative_pooleds[0] else: if 'PixArtSigmaPipeline' in model.__class__.__name__: # pixart-sigma pipeline throws list-of-list for negative prompt args['negative_prompt'] = negative_prompts[0] diff --git a/modules/processing_callbacks.py b/modules/processing_callbacks.py index d5b7d1922..824a79007 100644 --- a/modules/processing_callbacks.py +++ b/modules/processing_callbacks.py @@ -37,8 +37,6 @@ def diffusers_callback(pipe, step: int, timestep: int, kwargs: dict): if p is None: return kwargs latents = kwargs.get('latents', None) - if torch.is_tensor(latents) and latents.device.type == "privateuseone": - torch.dml.synchronize_tensor(latents) # DML synchronize debug_callback(f'Callback: step={step} timestep={timestep} latents={latents.shape if latents is not None else None} kwargs={list(kwargs)}') shared.state.sampling_step = step if shared.state.interrupted or shared.state.skipped: diff --git a/modules/processing_class.py b/modules/processing_class.py index bf37c8e2d..e61cbe401 100644 --- a/modules/processing_class.py +++ b/modules/processing_class.py @@ -516,6 +516,8 @@ def switch_class(p: StableDiffusionProcessing, new_class: type, dct: dict = None for k, v in dct.items(): if k in possible: kwargs[k] = v + if new_class == StableDiffusionProcessingTxt2Img: + sd_models.clean_diffuser_pipe(shared.sd_model) debug(f"Switching class: {p.__class__.__name__} -> {new_class.__name__} fn={sys._getframe(1).f_code.co_name}") # pylint: disable=protected-access p.__class__ = new_class p.__init__(**kwargs) diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py index b9448cc38..dca8e0a71 100644 --- a/modules/processing_diffusers.py +++ b/modules/processing_diffusers.py @@ -105,7 +105,6 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): desc='Base', ) shared.state.sampling_steps = base_args.get('prior_num_inference_steps', None) or base_args.get('num_inference_steps', None) or p.steps - p.extra_generation_params['Pipeline'] = shared.sd_model.__class__.__name__ if shared.opts.scheduler_eta is not None and shared.opts.scheduler_eta > 0 and shared.opts.scheduler_eta < 1: p.extra_generation_params["Sampler Eta"] = shared.opts.scheduler_eta output = None diff --git a/modules/processing_helpers.py b/modules/processing_helpers.py index be04ea8d4..110b32b93 100644 --- a/modules/processing_helpers.py +++ b/modules/processing_helpers.py @@ -351,11 +351,10 @@ def validate_sample(tensor): cast = sample.astype(np.uint8) if len(w) > 0: nans = np.isnan(sample).sum() - shared.log.error(f'Failed to validate samples: sample={sample.shape} invalid={nans}') cast = np.nan_to_num(sample) minimum, maximum, mean = np.min(cast), np.max(cast), np.mean(cast) cast = cast.astype(np.uint8) - shared.log.warning(f'Attempted to correct samples: min={minimum:.2f} max={maximum:.2f} mean={mean:.2f}') + shared.log.error(f'Failed to validate samples: sample={sample.shape} min={minimum:.2f} max={maximum:.2f} mean={mean:.2f} invalid={nans}') return cast @@ -390,6 +389,9 @@ def resize_hires(p, latents): # input=latents output=pil if not latent_upscaler if latent_upscaler is not None: return torch.nn.functional.interpolate(latents, size=(p.hr_upscale_to_y // 8, p.hr_upscale_to_x // 8), mode=latent_upscaler["mode"], antialias=latent_upscaler["antialias"]) first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, full_quality=p.full_quality, output_type='pil') + if p.hr_upscale_to_x == 0 or p.hr_upscale_to_y == 0 and hasattr(p, 'init_hr'): + shared.log.error('Hires: missing upscaling dimensions') + return first_pass_images resized_images = [] for img in first_pass_images: if latent_upscaler is None: @@ -397,6 +399,7 @@ def resize_hires(p, latents): # input=latents output=pil if not latent_upscaler else: resized_image = img resized_images.append(resized_image) + devices.torch_gc() return resized_images diff --git a/modules/processing_info.py b/modules/processing_info.py index c6f572dae..95e573f21 100644 --- a/modules/processing_info.py +++ b/modules/processing_info.py @@ -63,6 +63,10 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No "Comment": comment, "Operations": '; '.join(ops).replace('"', '') if len(p.ops) > 0 else 'none', } + # native + if shared.native: + args['Pipeline'] = shared.sd_model.__class__.__name__ + args['T5'] = None if (not shared.opts.add_model_name_to_info or shared.opts.sd_text_encoder is None or shared.opts.sd_text_encoder == 'None') else shared.opts.sd_text_encoder if 'txt2img' in p.ops: args["Variation seed"] = all_subseeds[index] if p.subseed_strength > 0 else None args["Variation strength"] = p.subseed_strength if p.subseed_strength > 0 else None @@ -143,12 +147,20 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No args['Sampler sigma uncond'] = shared.opts.s_churn if shared.opts.s_churn != shared.opts.data_labels.get('s_churn').default else None args['Sampler sigma noise'] = shared.opts.s_noise if shared.opts.s_noise != shared.opts.data_labels.get('s_noise').default else None args['Sampler sigma tmin'] = shared.opts.s_tmin if shared.opts.s_tmin != shared.opts.data_labels.get('s_tmin').default else None - # tome - args['ToMe'] = shared.opts.tome_ratio if shared.opts.tome_ratio != 0 else None - args['ToDo'] = shared.opts.todo_ratio if shared.opts.todo_ratio != 0 else None + # tome/todo + if shared.opts.token_merging_method == 'ToMe': + args['ToMe'] = shared.opts.tome_ratio if shared.opts.tome_ratio != 0 else None + else: + args['ToDo'] = shared.opts.todo_ratio if shared.opts.todo_ratio != 0 else None args.update(p.extra_generation_params) - params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in args.items() if v is not None]) + for k, v in args.copy().items(): + if v is None: + del args[k] + if isinstance(v, str): + if len(v) == 0 or v == '0x0': + del args[k] + params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in args.items()]) negative_prompt_text = f"\nNegative prompt: {all_negative_prompts[index]}" if all_negative_prompts[index] else "" infotext = f"{all_prompts[index]}{negative_prompt_text}\n{params_text}".strip() return infotext diff --git a/modules/processing_vae.py b/modules/processing_vae.py index 5db51a176..9b295e39c 100644 --- a/modules/processing_vae.py +++ b/modules/processing_vae.py @@ -140,6 +140,7 @@ def vae_decode(latents, model, output_type='np', full_quality=True): if shared.cmd_opts.profile: t1 = time.time() shared.log.debug(f'Profile: VAE decode: {t1-t0:.2f}') + devices.torch_gc() return imgs @@ -155,4 +156,5 @@ def vae_encode(image, model, full_quality=True): # pylint: disable=unused-variab latents = full_vae_encode(image=tensor, model=shared.sd_model) else: latents = taesd_vae_encode(image=tensor) + devices.torch_gc() return latents diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py index 3c472b227..b212d08a6 100644 --- a/modules/prompt_parser_diffusers.py +++ b/modules/prompt_parser_diffusers.py @@ -12,10 +12,9 @@ debug = shared.log.trace if os.environ.get('SD_PROMPT_DEBUG', None) is not None else lambda *args, **kwargs: None debug('Trace: PROMPT') orig_encode_token_ids_to_embeddings = EmbeddingsProvider._encode_token_ids_to_embeddings # pylint: disable=protected-access -token_dict = None -token_type = None +token_dict = None # used by helper get_tokens +token_type = None # used by helper get_tokens cache = {} -cache_type = None def compel_hijack(self, token_ids: torch.Tensor, @@ -41,8 +40,26 @@ def compel_hijack(self, token_ids: torch.Tensor, return hidden_state -EmbeddingsProvider._encode_token_ids_to_embeddings = compel_hijack # pylint: disable=protected-access +def sd3_compel_hijack(self, token_ids: torch.Tensor, + attention_mask: typing.Optional[torch.Tensor] = None) -> torch.Tensor: + needs_hidden_states = True + text_encoder_output = self.text_encoder(token_ids, attention_mask, output_hidden_states=needs_hidden_states, return_dict=True) + clip_skip = int(self.returned_embeddings_type) + hidden_state = text_encoder_output.hidden_states[-(clip_skip+1)] + + return hidden_state + +def insert_parser_highjack(pipename): + if "StableDiffusion3" in pipename: + EmbeddingsProvider._encode_token_ids_to_embeddings = sd3_compel_hijack # pylint: disable=protected-access + debug("Loading SD3 Parser hijack") + else: + EmbeddingsProvider._encode_token_ids_to_embeddings = compel_hijack # pylint: disable=protected-access + debug("Loading Standard Parser hijack") + + +insert_parser_highjack("Initialize") # from https://github.com/damian0815/compel/blob/main/src/compel/diffusers_textual_inversion_manager.py class DiffusersTextualInversionManager(BaseTextualInversionManager): @@ -126,14 +143,14 @@ def get_tokens(msg, prompt): except Exception: tokens.append(f'UNK_{i}') token_count = len(ids) - int(has_bos_token) - int(has_eos_token) - shared.log.trace(f'Prompt tokenizer: type={msg} tokens={token_count} {tokens}') + debug(f'Prompt tokenizer: type={msg} tokens={token_count} {tokens}') def encode_prompts(pipe, p, prompts: list, negative_prompts: list, steps: int, clip_skip: typing.Optional[int] = None): if 'StableDiffusion' not in pipe.__class__.__name__ and 'DemoFusion' not in pipe.__class__.__name__ and 'StableCascade' not in pipe.__class__.__name__: shared.log.warning(f"Prompt parser not supported: {pipe.__class__.__name__}") return - elif prompts == cache.get('prompts', None) and negative_prompts == cache.get('negative_prompts', None) and clip_skip == cache.get('clip_skip', None) and cache.get('model_type', None) == shared.sd_model_type and steps == cache.get('steps', None): + elif shared.opts.sd_textencoder_cache and prompts == cache.get('prompts', None) and negative_prompts == cache.get('negative_prompts', None) and clip_skip == cache.get('clip_skip', None) and cache.get('model_type', None) == shared.sd_model_type and steps == cache.get('steps', None): p.prompt_embeds = cache.get('prompt_embeds', None) p.positive_pooleds = cache.get('positive_pooleds', None) p.negative_embeds = cache.get('negative_embeds', None) @@ -151,6 +168,11 @@ def encode_prompts(pipe, p, prompts: list, negative_prompts: list, steps: int, c p.negative_embeds = [] p.negative_pooleds = [] + if (shared.cmd_opts.medvram or shared.opts.diffusers_model_cpu_offload) and hasattr(pipe, "_all_hooks") and hasattr(pipe, "maybe_free_model_hooks"): + # if the last job is interrupted, model will stay in the vram and cause oom, send everything back to cpu before continuing + pipe.maybe_free_model_hooks() + devices.torch_gc() + for i in range(max(len(positive_schedule), len(negative_schedule))): positive_prompt = positive_schedule[i % len(positive_schedule)] negative_prompt = negative_schedule[i % len(negative_schedule)] @@ -164,22 +186,29 @@ def encode_prompts(pipe, p, prompts: list, negative_prompts: list, steps: int, c if negative_pooled is not None: p.negative_pooleds.append(torch.cat([negative_pooled] * len(negative_prompts), dim=0)) - cache.update({ - 'prompt_embeds': p.prompt_embeds, - 'negative_embeds': p.negative_embeds, - 'positive_pooleds': p.positive_pooleds, - 'negative_pooleds': p.negative_pooleds, - 'scheduled_prompt': p.scheduled_prompt, - 'prompts': prompts, - 'negative_prompts': negative_prompts, - 'clip_skip': clip_skip, - 'steps': steps, - 'model_type': shared.sd_model_type - }) + if shared.opts.sd_textencoder_cache: + cache.update({ + 'prompt_embeds': p.prompt_embeds, + 'negative_embeds': p.negative_embeds, + 'positive_pooleds': p.positive_pooleds, + 'negative_pooleds': p.negative_pooleds, + 'scheduled_prompt': p.scheduled_prompt, + 'prompts': prompts, + 'negative_prompts': negative_prompts, + 'clip_skip': clip_skip, + 'steps': steps, + 'model_type': shared.sd_model_type + }) + else: + cache.clear() if debug_enabled: get_tokens('positive', prompts[0]) get_tokens('negative', negative_prompts[0]) + if (shared.cmd_opts.medvram or shared.opts.diffusers_model_cpu_offload) and hasattr(pipe, "_all_hooks") and hasattr(pipe, "maybe_free_model_hooks"): + # text encoder will stay in the vram and cause oom, send everything back to cpu before continuing + pipe.maybe_free_model_hooks() debug(f"Prompt encode: time={(time.time() - t0):.3f}") + devices.torch_gc() return @@ -237,7 +266,7 @@ def pad_to_same_length(pipe, embeds): if not hasattr(pipe, 'encode_prompt') and 'StableCascade' not in pipe.__class__.__name__: return embeds device = pipe.device if str(pipe.device) != 'meta' else devices.device - if shared.opts.diffusers_zeros_prompt_pad: + if shared.opts.diffusers_zeros_prompt_pad or 'StableDiffusion3' in pipe.__class__.__name__: empty_embed = [torch.zeros((1, 77, embeds[0].shape[2]), device=device, dtype=embeds[0].dtype)] else: try: @@ -257,15 +286,34 @@ def pad_to_same_length(pipe, embeds): embeds[i] = embed return embeds +def split_prompts(prompt, SD3 = False): + if prompt.find("TE2:") != -1: + prompt, prompt2 = prompt.split("TE2:") + else: + prompt2 = prompt + + if prompt.find("TE3:") != -1: + prompt, prompt3 = prompt.split("TE3:") + elif prompt2.find("TE3:") != -1: + prompt2, prompt3 = prompt2.split("TE3:") + else: + prompt3 = prompt + + prompt = prompt.strip() + prompt2 = " " if prompt2.strip() == "" else prompt2.strip() + prompt3 = " " if prompt3.strip() == "" else prompt3.strip() + + if SD3 and prompt3 != " ": + ps, _ws = get_prompts_with_weights(prompt3) + prompt3 = " ".join(ps) + return prompt, prompt2, prompt3 + def get_weighted_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", clip_skip: int = None): device = pipe.device if str(pipe.device) != 'meta' else devices.device - prompt_split = prompt.split("TE2:") - prompt = prompt_split[0] - prompt_2 = prompt_split[-1] - neg_prompt_split = neg_prompt.split("TE2:") - neg_prompt_2 = neg_prompt_split[-1] - neg_prompt = neg_prompt_split[0] + SD3 = hasattr(pipe, 'text_encoder_3') + prompt, prompt_2, prompt_3 = split_prompts(prompt, SD3) + neg_prompt, neg_prompt_2, neg_prompt_3 = split_prompts(neg_prompt, SD3) if prompt != prompt_2: ps = [get_prompts_with_weights(p) for p in [prompt, prompt_2]] @@ -285,8 +333,8 @@ def get_weighted_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", c embedding_providers = prepare_embedding_providers(pipe, clip_skip) prompt_embeds = [] negative_prompt_embeds = [] - pooled_prompt_embeds = None - negative_pooled_prompt_embeds = None + pooled_prompt_embeds = [] + negative_pooled_prompt_embeds = [] for i in range(len(embedding_providers)): t0 = time.time() text = list(positives[i]) @@ -310,22 +358,30 @@ def get_weighted_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", c embed, ntokens = embedding_providers[i].get_embeddings_for_weighted_prompt_fragments(text_batch=[negatives[i]], fragment_weights_batch=[negative_weights[i]], device=device, should_return_tokens=True) negative_prompt_embeds.append(embed) debug(f'Prompt: unpadded shape={prompt_embeds[0].shape} TE{i+1} ptokens={torch.count_nonzero(ptokens)} ntokens={torch.count_nonzero(ntokens)} time={(time.time() - t0):.3f}') - - if prompt_embeds[-1].shape[-1] > 768: + if SD3: + t0 = time.time() + pooled_prompt_embeds.append(embedding_providers[0].get_pooled_embeddings(texts=positives[0] if len(positives[0]) == 1 else [" ".join(positives[0])], device=device)) + pooled_prompt_embeds.append(embedding_providers[1].get_pooled_embeddings(texts=positives[-1] if len(positives[-1]) == 1 else [" ".join(positives[-1])], device=device)) + negative_pooled_prompt_embeds.append(embedding_providers[0].get_pooled_embeddings(texts=negatives[0] if len(negatives[0]) == 1 else [" ".join(negatives[0])], device=device)) + negative_pooled_prompt_embeds.append(embedding_providers[1].get_pooled_embeddings(texts=negatives[-1] if len(negatives[-1]) == 1 else [" ".join(negatives[-1])], device=device)) + pooled_prompt_embeds = torch.cat(pooled_prompt_embeds, dim=-1) + negative_pooled_prompt_embeds = torch.cat(negative_pooled_prompt_embeds, dim=-1) + debug(f'Prompt: pooled shape={pooled_prompt_embeds[0].shape} time={(time.time() - t0):.3f}') + elif prompt_embeds[-1].shape[-1] > 768: t0 = time.time() if shared.opts.diffusers_pooled == "weighted": - pooled_prompt_embeds = prompt_embeds[-1][ + pooled_prompt_embeds = embedding_providers[-1].text_encoder.text_projection(prompt_embeds[-1][ torch.arange(prompt_embeds[-1].shape[0], device=device), (ptokens.to(dtype=torch.int, device=device) == 49407) .int() .argmax(dim=-1), - ] - negative_pooled_prompt_embeds = negative_prompt_embeds[-1][ + ]) + negative_pooled_prompt_embeds = embedding_providers[-1].text_encoder.text_projection(negative_prompt_embeds[-1][ torch.arange(negative_prompt_embeds[-1].shape[0], device=device), (ntokens.to(dtype=torch.int, device=device) == 49407) .int() .argmax(dim=-1), - ] + ]) else: try: pooled_prompt_embeds = embedding_providers[-1].get_pooled_embeddings(texts=[prompt_2], device=device) if prompt_embeds[-1].shape[-1] > 768 else None @@ -338,7 +394,31 @@ def get_weighted_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", c prompt_embeds = torch.cat(prompt_embeds, dim=-1) if len(prompt_embeds) > 1 else prompt_embeds[0] negative_prompt_embeds = torch.cat(negative_prompt_embeds, dim=-1) if len(negative_prompt_embeds) > 1 else \ negative_prompt_embeds[0] + if pooled_prompt_embeds == []: + pooled_prompt_embeds = None + if negative_pooled_prompt_embeds == []: + negative_pooled_prompt_embeds = None debug(f'Prompt: positive={prompt_embeds.shape if prompt_embeds is not None else None} pooled={pooled_prompt_embeds.shape if pooled_prompt_embeds is not None else None} negative={negative_prompt_embeds.shape if negative_prompt_embeds is not None else None} pooled={negative_pooled_prompt_embeds.shape if negative_pooled_prompt_embeds is not None else None}') if prompt_embeds.shape[1] != negative_prompt_embeds.shape[1]: [prompt_embeds, negative_prompt_embeds] = pad_to_same_length(pipe, [prompt_embeds, negative_prompt_embeds]) + if SD3: + device = pipe.device if str(pipe.device) != 'meta' else devices.device + t5_prompt_embed = pipe._get_t5_prompt_embeds( # pylint: disable=protected-access + prompt=prompt_3, + num_images_per_prompt=prompt_embeds.shape[0], + device=device, + ) + prompt_embeds = torch.nn.functional.pad( + prompt_embeds, (0, t5_prompt_embed.shape[-1] - prompt_embeds.shape[-1]) + ).to(device) + prompt_embeds = torch.cat([prompt_embeds, t5_prompt_embed], dim=-2) + t5_negative_prompt_embed = pipe._get_t5_prompt_embeds( # pylint: disable=protected-access + prompt=neg_prompt_3, + num_images_per_prompt=prompt_embeds.shape[0], + device=device, + ) + negative_prompt_embeds = torch.nn.functional.pad( + negative_prompt_embeds, (0, t5_negative_prompt_embed.shape[-1] - negative_prompt_embeds.shape[-1]) + ).to(device) + negative_prompt_embeds = torch.cat([negative_prompt_embeds, t5_negative_prompt_embed], dim=-2) return prompt_embeds, pooled_prompt_embeds, negative_prompt_embeds, negative_pooled_prompt_embeds diff --git a/modules/scripts.py b/modules/scripts.py index 59eb4c82b..87a25a56b 100644 --- a/modules/scripts.py +++ b/modules/scripts.py @@ -489,8 +489,10 @@ def before_process(self, p, **kwargs): s = ScriptSummary('before-process') for script in self.alwayson_scripts: try: - script_args = p.script_args[script.args_from:script.args_to] - script.before_process(p, *script_args, **kwargs) + args = p.script_args[script.args_from:script.args_to] + if len(args) == 0: + continue + script.before_process(p, *args, **kwargs) except Exception as e: errors.display(e, f"Error running before process: {script.filename}") s.record(script.title()) @@ -501,6 +503,8 @@ def process(self, p, **kwargs): for script in self.alwayson_scripts: try: args = p.per_script_args.get(script.title(), p.script_args[script.args_from:script.args_to]) + if len(args) == 0: + continue script.process(p, *args, **kwargs) except Exception as e: errors.display(e, f'Running script process: {script.filename}') @@ -513,6 +517,8 @@ def process_images(self, p, **kwargs): for script in self.alwayson_scripts: try: args = p.per_script_args.get(script.title(), p.script_args[script.args_from:script.args_to]) + if len(args) == 0: + continue processed = script.process_images(p, *args, **kwargs) except Exception as e: errors.display(e, f'Running script process images: {script.filename}') @@ -525,6 +531,8 @@ def before_process_batch(self, p, **kwargs): for script in self.alwayson_scripts: try: args = p.per_script_args.get(script.title(), p.script_args[script.args_from:script.args_to]) + if len(args) == 0: + continue script.before_process_batch(p, *args, **kwargs) except Exception as e: errors.display(e, f'Running script before process batch: {script.filename}') @@ -536,6 +544,8 @@ def process_batch(self, p, **kwargs): for script in self.alwayson_scripts: try: args = p.per_script_args.get(script.title(), p.script_args[script.args_from:script.args_to]) + if len(args) == 0: + continue script.process_batch(p, *args, **kwargs) except Exception as e: errors.display(e, f'Running script process batch: {script.filename}') @@ -547,6 +557,8 @@ def postprocess(self, p, processed): for script in self.alwayson_scripts: try: args = p.per_script_args.get(script.title(), p.script_args[script.args_from:script.args_to]) + if len(args) == 0: + continue script.postprocess(p, processed, *args) except Exception as e: errors.display(e, f'Running script postprocess: {script.filename}') @@ -558,6 +570,8 @@ def postprocess_batch(self, p, images, **kwargs): for script in self.alwayson_scripts: try: args = p.per_script_args.get(script.title(), p.script_args[script.args_from:script.args_to]) + if len(args) == 0: + continue script.postprocess_batch(p, *args, images=images, **kwargs) except Exception as e: errors.display(e, f'Running script before postprocess batch: {script.filename}') @@ -569,6 +583,8 @@ def postprocess_batch_list(self, p, pp: PostprocessBatchListArgs, **kwargs): for script in self.alwayson_scripts: try: args = p.per_script_args.get(script.title(), p.script_args[script.args_from:script.args_to]) + if len(args) == 0: + continue script.postprocess_batch_list(p, pp, *args, **kwargs) except Exception as e: errors.display(e, f'Running script before postprocess batch list: {script.filename}') @@ -580,6 +596,8 @@ def postprocess_image(self, p, pp: PostprocessImageArgs): for script in self.alwayson_scripts: try: args = p.per_script_args.get(script.title(), p.script_args[script.args_from:script.args_to]) + if len(args) == 0: + continue script.postprocess_image(p, pp, *args) except Exception as e: errors.display(e, f'Running script postprocess image: {script.filename}') diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index b811f33bf..6894c8ff9 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -283,6 +283,25 @@ def forward(self, input_ids): return torch.stack(vecs) +class NNCF_T5DenseGatedActDense(torch.nn.Module): # forward can't find what self is without creating a class + def __init__(self, T5DenseGatedActDense): + super().__init__() + self.wi_0 = T5DenseGatedActDense.wi_0 + self.wi_1 = T5DenseGatedActDense.wi_1 + self.wo = T5DenseGatedActDense.wo + self.dropout = T5DenseGatedActDense.dropout + self.act = T5DenseGatedActDense.act + + def forward(self, hidden_states): + hidden_gelu = self.act(self.wi_0(hidden_states)) + hidden_linear = self.wi_1(hidden_states) + hidden_states = hidden_gelu * hidden_linear + hidden_states = self.dropout(hidden_states) + hidden_states = hidden_states.to(torch.float32) # this line needs to be forced to fp32 + hidden_states = self.wo(hidden_states) + return hidden_states + + def add_circular_option_to_conv_2d(): conv2d_constructor = torch.nn.Conv2d.__init__ diff --git a/modules/sd_hijack_dynamic_atten.py b/modules/sd_hijack_dynamic_atten.py index 5ba9dc6e4..b2d6fdc42 100644 --- a/modules/sd_hijack_dynamic_atten.py +++ b/modules/sd_hijack_dynamic_atten.py @@ -110,8 +110,8 @@ def __init__(self): if not hasattr(F, "scaled_dot_product_attention"): raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.") - def __call__( - self, attn, hidden_states: torch.FloatTensor, encoder_hidden_states=None, attention_mask=None, temb=None, scale: float = 1.0) -> torch.FloatTensor: + def __call__(self, attn, hidden_states: torch.Tensor, encoder_hidden_states=None, attention_mask=None, temb=None, *args, **kwargs) -> torch.Tensor: + residual = hidden_states if attn.spatial_norm is not None: hidden_states = attn.spatial_norm(hidden_states, temb) @@ -135,16 +135,15 @@ def __call__( if attn.group_norm is not None: hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) - args = () if USE_PEFT_BACKEND else (scale,) - query = attn.to_q(hidden_states, *args) + query = attn.to_q(hidden_states) if encoder_hidden_states is None: encoder_hidden_states = hidden_states elif attn.norm_cross: encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) - key = attn.to_k(encoder_hidden_states, *args) - value = attn.to_v(encoder_hidden_states, *args) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) inner_dim = key.shape[-1] head_dim = inner_dim // attn.heads @@ -167,7 +166,7 @@ def __call__( hidden_states = hidden_states.to(query.dtype) # linear proj - hidden_states = attn.to_out[0](hidden_states, *args) + hidden_states = attn.to_out[0](hidden_states) # dropout hidden_states = attn.to_out[1](hidden_states) @@ -190,13 +189,11 @@ class DynamicAttnProcessorBMM: based on AttnProcessor V1 """ - def __call__(self, attn, hidden_states: torch.FloatTensor, encoder_hidden_states=None, attention_mask=None, - temb=None, scale: float = 1.0) -> torch.Tensor: # pylint: disable=too-many-statements, too-many-locals, too-many-branches + def __call__(self, attn, hidden_states: torch.Tensor, encoder_hidden_states=None, attention_mask=None, + temb=None, *args, **kwargs) -> torch.Tensor: # pylint: disable=too-many-statements, too-many-locals, too-many-branches residual = hidden_states - args = () if USE_PEFT_BACKEND else (scale,) - if attn.spatial_norm is not None: hidden_states = attn.spatial_norm(hidden_states, temb) @@ -214,15 +211,15 @@ def __call__(self, attn, hidden_states: torch.FloatTensor, encoder_hidden_states if attn.group_norm is not None: hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) - query = attn.to_q(hidden_states, *args) + query = attn.to_q(hidden_states) if encoder_hidden_states is None: encoder_hidden_states = hidden_states elif attn.norm_cross: encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) - key = attn.to_k(encoder_hidden_states, *args) - value = attn.to_v(encoder_hidden_states, *args) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) query = attn.head_to_batch_dim(query) key = attn.head_to_batch_dim(key) @@ -294,7 +291,7 @@ def __call__(self, attn, hidden_states: torch.FloatTensor, encoder_hidden_states hidden_states = attn.batch_to_head_dim(hidden_states) # linear proj - hidden_states = attn.to_out[0](hidden_states, *args) + hidden_states = attn.to_out[0](hidden_states) # dropout hidden_states = attn.to_out[1](hidden_states) diff --git a/modules/sd_models.py b/modules/sd_models.py index 2fb4a0292..f039ea316 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -38,6 +38,7 @@ sd_metadata_timer = 0 debug_move = shared.log.trace if os.environ.get('SD_MOVE_DEBUG', None) is not None else lambda *args, **kwargs: None debug_load = os.environ.get('SD_LOAD_DEBUG', None) +debug_process = shared.log.trace if os.environ.get('SD_PROCESS_DEBUG', None) is not None else lambda *args, **kwargs: None diffusers_version = int(diffusers.__version__.split('.')[1]) @@ -202,11 +203,17 @@ def get_closet_checkpoint_match(search_string): if checkpoint_info is not None: return checkpoint_info found = sorted([info for info in checkpoints_list.values() if search_string in info.title], key=lambda x: len(x.title)) - if found: + if found and len(found) > 0: return found[0] found = sorted([info for info in checkpoints_list.values() if search_string.split(' ')[0] in info.title], key=lambda x: len(x.title)) - if found: + if found and len(found) > 0: return found[0] + for v in shared.reference_models.values(): + if search_string in v['path'] or os.path.basename(search_string) in v['path']: + model_name = search_string.replace('huggingface/', '') + checkpoint_info = CheckpointInfo(v['path']) # create a virutal model info + checkpoint_info.type = 'huggingface' + return checkpoint_info return None @@ -249,15 +256,16 @@ def select_checkpoint(op='model'): shared.log.info(" or use --ckpt-dir to specify folder with sd models") shared.log.info(" or use --ckpt to force using specific model") return None - checkpoint_info = next(iter(checkpoints_list.values())) + # checkpoint_info = next(iter(checkpoints_list.values())) if model_checkpoint is not None: if model_checkpoint != 'model.ckpt' and model_checkpoint != 'runwayml/stable-diffusion-v1-5': - shared.log.warning(f"Selected checkpoint not found: {model_checkpoint}") + shared.log.warning(f'Selected: {op}="{model_checkpoint}" not found') else: shared.log.info("Selecting first available checkpoint") # shared.log.warning(f"Loading fallback checkpoint: {checkpoint_info.title}") - shared.opts.data['sd_model_checkpoint'] = checkpoint_info.title - shared.log.info(f'Select: {op}="{checkpoint_info.title if checkpoint_info is not None else None}"') + # shared.opts.data['sd_model_checkpoint'] = checkpoint_info.title + else: + shared.log.info(f'Select: {op}="{checkpoint_info.title if checkpoint_info is not None else None}"') return checkpoint_info @@ -545,7 +553,7 @@ def change_backend(): refresh_vae_list() -def detect_pipeline(f: str, op: str = 'model', warning=True): +def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False): guess = shared.opts.diffusers_pipeline warn = shared.log.warning if warning else lambda *args, **kwargs: None size = 0 @@ -560,39 +568,25 @@ def detect_pipeline(f: str, op: str = 'model', warning=True): elif (size >= 316 and size <= 324) or (size >= 156 and size <= 164): # 320 or 160 warn(f'Model detected as VAE model, but attempting to load as model: {op}={f} size={size} MB') guess = 'VAE' - elif size >= 4970 and size <= 4976: # 4973 + elif (size >= 4970 and size <= 4976): # 4973 guess = 'Stable Diffusion 2' # SD v2 but could be eps or v-prediction # elif size < 0: # unknown # guess = 'Stable Diffusion 2B' - elif size >= 5791 and size <= 5799: # 5795 - if not shared.native: - warn(f'Model detected as SD-XL refiner model, but attempting to load using backend=original: {op}={f} size={size} MB') + elif (size >= 5791 and size <= 5799): # 5795 if op == 'model': warn(f'Model detected as SD-XL refiner model, but attempting to load a base model: {op}={f} size={size} MB') guess = 'Stable Diffusion XL Refiner' elif (size >= 6611 and size <= 7220): # 6617, HassakuXL is 6776, monkrenRealisticINT_v10 is 7217 - if not shared.native: - warn(f'Model detected as SD-XL base model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'Stable Diffusion XL' - elif size >= 3361 and size <= 3369: # 3368 - if not shared.native: - warn(f'Model detected as SD upscale model, but attempting to load using backend=original: {op}={f} size={size} MB') + elif (size >= 3361 and size <= 3369): # 3368 guess = 'Stable Diffusion Upscale' - elif size >= 4891 and size <= 4899: # 4897 - if not shared.native: - warn(f'Model detected as SD XL inpaint model, but attempting to load using backend=original: {op}={f} size={size} MB') + elif (size >= 4891 and size <= 4899): # 4897 guess = 'Stable Diffusion XL Inpaint' - elif size >= 9791 and size <= 9799: # 9794 - if not shared.native: - warn(f'Model detected as SD XL instruct pix2pix model, but attempting to load using backend=original: {op}={f} size={size} MB') + elif (size >= 9791 and size <= 9799): # 9794 guess = 'Stable Diffusion XL Instruct' - elif size > 3138 and size < 3142: #3140 - if not shared.native: - warn(f'Model detected as Segmind Vega model, but attempting to load using backend=original: {op}={f} size={size} MB') + elif (size > 3138 and size < 3142): #3140 guess = 'Stable Diffusion XL' - elif size > 5692 and size < 5698 or size > 4134 and size < 4138: - if not shared.native: - warn(f'Model detected as Stable Diffusion 3 model, but attempting to load using backend=original: {op}={f} size={size} MB') + elif (size > 5692 and size < 5698) or (size > 4134 and size < 4138) or (size > 10362 and size < 10366): guess = 'Stable Diffusion 3' # guess by name """ @@ -602,34 +596,20 @@ def detect_pipeline(f: str, op: str = 'model', warning=True): guess = 'Latent Consistency Model' """ if 'instaflow' in f.lower(): - if not shared.native: - warn(f'Model detected as InstaFlow model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'InstaFlow' if 'segmoe' in f.lower(): - if not shared.native: - warn(f'Model detected as SegMoE model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'SegMoE' if 'hunyuandit' in f.lower(): - if not shared.native: - warn(f'Model detected as Tenecent HunyuanDiT model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'HunyuanDiT' if 'pixart-xl' in f.lower(): - if not shared.native: - warn(f'Model detected as PixArt Alpha model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'PixArt-Alpha' if 'stable-diffusion-3' in f.lower(): - if not shared.native: - warn(f'Model detected as Stable Diffusion 3 model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'Stable Diffusion 3' if 'stable-cascade' in f.lower() or 'stablecascade' in f.lower() or 'wuerstchen3' in f.lower(): - if not shared.native: - warn(f'Model detected as Stable Cascade model, but attempting to load using backend=original: {op}={f} size={size} MB') if devices.dtype == torch.float16: warn('Stable Cascade does not support Float16') guess = 'Stable Cascade' if 'pixart-sigma' in f.lower(): - if not shared.native: - warn(f'Model detected as PixArt-Sigma model, but attempting to load using backend=original: {op}={f} size={size} MB') guess = 'PixArt-Sigma' # switch for specific variant if guess == 'Stable Diffusion' and 'inpaint' in f.lower(): @@ -642,7 +622,8 @@ def detect_pipeline(f: str, op: str = 'model', warning=True): guess = 'Stable Diffusion XL Instruct' # get actual pipeline pipeline = shared_items.get_pipelines().get(guess, None) - shared.log.info(f'Autodetect: {op}="{guess}" class={pipeline.__name__} file="{f}" size={size}MB') + if not quiet: + shared.log.info(f'Autodetect: {op}="{guess}" class={pipeline.__name__} file="{f}" size={size}MB') except Exception as e: shared.log.error(f'Error detecting diffusers pipeline: model={f} {e}') return None, None @@ -650,7 +631,8 @@ def detect_pipeline(f: str, op: str = 'model', warning=True): try: size = round(os.path.getsize(f) / 1024 / 1024) pipeline = shared_items.get_pipelines().get(guess, None) - shared.log.info(f'Diffusers: {op}="{guess}" class={pipeline.__name__} file="{f}" size={size}MB') + if not quiet: + shared.log.info(f'Diffusers: {op}="{guess}" class={pipeline.__name__} file="{f}" size={size}MB') except Exception as e: shared.log.error(f'Error loading diffusers pipeline: model={f} {e}') @@ -673,15 +655,10 @@ def copy_diffuser_options(new_pipe, orig_pipe): new_pipe.is_sd1 = getattr(orig_pipe, 'is_sd1', True) -def set_diffuser_options(sd_model, vae = None, op: str = 'model'): +def set_diffuser_options(sd_model, vae = None, op: str = 'model', offload=True): if sd_model is None: shared.log.warning(f'{op} is not loaded') return - if (shared.opts.diffusers_model_cpu_offload or shared.cmd_opts.medvram) and (shared.opts.diffusers_seq_cpu_offload or shared.cmd_opts.lowvram): - shared.log.warning(f'Setting {op}: Model CPU offload and Sequential CPU offload are not compatible') - shared.log.debug(f'Setting {op}: disabling model CPU offload') - shared.opts.diffusers_model_cpu_offload=False - shared.cmd_opts.medvram=False if hasattr(sd_model, "watermark"): sd_model.watermark = NoWatermark() @@ -737,6 +714,20 @@ def set_diffuser_options(sd_model, vae = None, op: str = 'model'): shared.log.debug(f'Setting {op}: enable channels last') sd_model.unet.to(memory_format=torch.channels_last) + if offload: + set_diffuser_offload(sd_model, op) + +def set_diffuser_offload(sd_model, op: str = 'model'): + if sd_model is None: + shared.log.warning(f'{op} is not loaded') + return + if (shared.opts.diffusers_model_cpu_offload or shared.cmd_opts.medvram) and (shared.opts.diffusers_seq_cpu_offload or shared.cmd_opts.lowvram): + shared.log.warning(f'Setting {op}: Model CPU offload and Sequential CPU offload are not compatible') + shared.log.debug(f'Setting {op}: disabling model CPU offload') + shared.opts.diffusers_model_cpu_offload=False + shared.cmd_opts.medvram=False + if not (hasattr(sd_model, "has_accelerate") and sd_model.has_accelerate): + sd_model.has_accelerate = False if hasattr(sd_model, "enable_model_cpu_offload"): if shared.cmd_opts.medvram or shared.opts.diffusers_model_cpu_offload: shared.log.debug(f'Setting {op}: enable model CPU offload') @@ -774,7 +765,7 @@ def move_model(model, device=None, force=False): if model is None or device is None: return if getattr(model, 'vae', None) is not None and get_diffusers_task(model) != DiffusersTaskType.TEXT_2_IMAGE: - if device == devices.device: # force vae back to gpu if not in txt2img mode + if device == devices.device and model.vae.device.type != "meta": # force vae back to gpu if not in txt2img mode model.vae.to(device) if hasattr(model.vae, '_hf_hook'): debug_move(f'Model move: to={device} class={model.vae.__class__} fn={sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access @@ -994,14 +985,8 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No return elif model_type in ['PixArt-Sigma']: # forced pipeline try: - # shared.opts.data['cuda_dtype'] = 'FP32' # override - shared.opts.data['diffusers_model_cpu_offload'] = True # override - devices.set_cuda_params() - sd_model = diffusers.PixArtSigmaPipeline.from_pretrained( - checkpoint_info.path, - use_safetensors=True, - cache_dir=shared.opts.diffusers_dir, - **diffusers_load_config) + from modules.model_pixart import load_pixart + sd_model = load_pixart(checkpoint_info, diffusers_load_config) except Exception as e: shared.log.error(f'Diffusers Failed loading {op}: {checkpoint_info.path} {e}') if debug_load: @@ -1156,8 +1141,17 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No sd_model.embedding_db.load_textual_inversion_embeddings(force_reload=True) timer.record("embeddings") - set_diffuser_options(sd_model, vae, op) + from modules.prompt_parser_diffusers import insert_parser_highjack + insert_parser_highjack(sd_model.__class__.__name__) + set_diffuser_options(sd_model, vae, op, offload=False) + if shared.opts.nncf_compress_weights and not (shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx"): + sd_model = sd_models_compile.nncf_compress_weights(sd_model) # run this before move model so it can be compressed in CPU + timer.record("options") + + set_diffuser_offload(sd_model, op) + if op == 'model': + sd_vae.apply_vae_config(shared.sd_model.sd_checkpoint_info.filename, vae_file, sd_model) if op == 'refiner' and shared.opts.diffusers_move_refiner: shared.log.debug('Moving refiner model to CPU') move_model(sd_model, devices.cpu) @@ -1165,14 +1159,11 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No move_model(sd_model, devices.device) timer.record("move") - reload_text_encoder() + reload_text_encoder(initial=True) if shared.opts.ipex_optimize: sd_model = sd_models_compile.ipex_optimize(sd_model) - if shared.opts.nncf_compress_weights and not (shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx"): - sd_model = sd_models_compile.nncf_compress_weights(sd_model) - if (shared.opts.cuda_compile and shared.opts.cuda_compile_backend != 'none'): sd_model = sd_models_compile.compile_diffusers(sd_model) timer.record("compile") @@ -1305,11 +1296,25 @@ def switch_pipe(cls: diffusers.DiffusionPipeline, pipeline: diffusers.DiffusionP return pipeline +def clean_diffuser_pipe(pipe): + if pipe is not None and shared.sd_model_type == 'sdxl' and 'requires_aesthetics_score' in pipe.config and hasattr(pipe, '_internal_dict'): + debug_process(f'Pipeline clean: {pipe.__class__.__name__}') + # diffusers adds requires_aesthetics_score with img2img and complains if requires_aesthetics_score exist in txt2img + internal_dict = dict(pipe._internal_dict) # pylint: disable=protected-access + internal_dict.pop('requires_aesthetics_score', None) + del pipe._internal_dict + pipe.register_to_config(**internal_dict) + + def set_diffuser_pipe(pipe, new_pipe_type): + n = getattr(pipe.__class__, '__name__', '') + if new_pipe_type == DiffusersTaskType.TEXT_2_IMAGE: + clean_diffuser_pipe(pipe) + if get_diffusers_task(pipe) == new_pipe_type: return pipe + # skip specific pipelines - n = getattr(pipe.__class__, '__name__', '') if n in ['StableDiffusionReferencePipeline', 'StableDiffusionAdapterPipeline', 'AnimateDiffPipeline', 'AnimateDiffSDXLPipeline']: return pipe if 'Onnx' in pipe.__class__.__name__: @@ -1378,8 +1383,10 @@ def set_attn(pipe, attention): modules = [getattr(pipe, n, None) for n in module_names] modules = [m for m in modules if isinstance(m, torch.nn.Module) and hasattr(m, "set_attn_processor")] for module in modules: - if 'SD3Transformer2DModel' in module.__class__.__name__: # TODO SD3 + if module.__class__.__name__ in ['SD3Transformer2DModel']: module.set_attn_processor(p.JointAttnProcessor2_0()) + elif module.__class__.__name__ in ['HunyuanDiT2DModel']: + pass else: module.set_attn_processor(attention) @@ -1522,11 +1529,19 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None, timer=None, shared.log.info(f'Model load finished: {memory_stats()} cached={len(checkpoints_loaded.keys())}') -def reload_text_encoder(): - if hasattr(shared.sd_model, 'text_encoder_3'): - from modules.model_sd3 import load_te3 - shared.log.debug(f'Load: TE3={shared.opts.sd_te3}') - load_te3(shared.sd_model, shared.opts.sd_te3, cache_dir=shared.opts.diffusers_dir) +def reload_text_encoder(initial=False): + if initial and (shared.opts.sd_text_encoder is None or shared.opts.sd_text_encoder == 'None'): + return # dont unload + signature = inspect.signature(shared.sd_model.__class__.__init__, follow_wrapped=True, eval_str=True).parameters + t5 = [k for k, v in signature.items() if 'T5EncoderModel' in str(v)] + if len(t5) > 0: + from modules.model_t5 import set_t5 + shared.log.debug(f'Load: t5={shared.opts.sd_text_encoder} module="{t5[0]}"') + set_t5(pipe=shared.sd_model, module=t5[0], t5=shared.opts.sd_text_encoder, cache_dir=shared.opts.diffusers_dir) + elif hasattr(shared.sd_model, 'text_encoder_3'): + from modules.model_t5 import set_t5 + shared.log.debug(f'Load: t5={shared.opts.sd_text_encoder} module="text_encoder_3"') + set_t5(pipe=shared.sd_model, module='text_encoder_3', t5=shared.opts.sd_text_encoder, cache_dir=shared.opts.diffusers_dir) def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model', force=False): diff --git a/modules/sd_models_compile.py b/modules/sd_models_compile.py index c6006db25..ed3109869 100644 --- a/modules/sd_models_compile.py +++ b/modules/sd_models_compile.py @@ -58,9 +58,23 @@ def apply_compile_to_model(sd_model, function, options, op=None): sd_model.text_encoder = None sd_model.text_encoder = sd_model.decoder_pipe.text_encoder = function(sd_model.decoder_pipe.text_encoder) else: + if op == "nncf" and sd_model.text_encoder.__class__.__name__ == "T5EncoderModel": + from modules.sd_hijack import NNCF_T5DenseGatedActDense # T5DenseGatedActDense uses fp32 + for i in range(len(sd_model.text_encoder.encoder.block)): + sd_model.text_encoder.encoder.block[i].layer[1].DenseReluDense = NNCF_T5DenseGatedActDense( + sd_model.text_encoder.encoder.block[i].layer[1].DenseReluDense + ) sd_model.text_encoder = function(sd_model.text_encoder) if hasattr(sd_model, 'text_encoder_2') and hasattr(sd_model.text_encoder_2, 'config'): sd_model.text_encoder_2 = function(sd_model.text_encoder_2) + if hasattr(sd_model, 'text_encoder_3') and hasattr(sd_model.text_encoder_3, 'config'): + if op == "nncf" and sd_model.text_encoder_3.__class__.__name__ == "T5EncoderModel": + from modules.sd_hijack import NNCF_T5DenseGatedActDense # T5DenseGatedActDense uses fp32 + for i in range(len(sd_model.text_encoder_3.encoder.block)): + sd_model.text_encoder_3.encoder.block[i].layer[1].DenseReluDense = NNCF_T5DenseGatedActDense( + sd_model.text_encoder_3.encoder.block[i].layer[1].DenseReluDense + ) + sd_model.text_encoder_3 = function(sd_model.text_encoder_3) if hasattr(sd_model, 'prior_pipe') and hasattr(sd_model, 'prior_text_encoder'): sd_model.prior_text_encoder = None sd_model.prior_text_encoder = sd_model.prior_pipe.text_encoder = function(sd_model.prior_pipe.text_encoder) @@ -100,29 +114,31 @@ def ipex_optimize_model(model): shared.log.warning(f"IPEX Optimize: error: {e}") return sd_model +def nncf_send_to_device(model): + for child in model.children(): + if child.__class__.__name__ == "WeightsDecompressor": + child.scale = child.scale.to(devices.device) + child.zero_point = child.zero_point.to(devices.device) + nncf_send_to_device(child) + +def nncf_compress_model(model): + import nncf + model.eval() + backup_embeddings = None + if hasattr(model, "get_input_embeddings"): + backup_embeddings = copy.deepcopy(model.get_input_embeddings()) + model = nncf.compress_weights(model) + nncf_send_to_device(model) + if hasattr(model, "set_input_embeddings") and backup_embeddings is not None: + model.set_input_embeddings(backup_embeddings) + devices.torch_gc(force=True) + return model def nncf_compress_weights(sd_model): try: t0 = time.time() - if sd_model.device.type == "meta": - shared.log.warning("Compress Weights is not compatible with Sequential CPU offload") - return sd_model - - def nncf_compress_model(model): - return_device = model.device - model.eval() - backup_embeddings = None - if hasattr(model, "get_input_embeddings"): - backup_embeddings = copy.deepcopy(model.get_input_embeddings()) - model = nncf.compress_weights(model.to(devices.device)).to(return_device) - if hasattr(model, "set_input_embeddings") and backup_embeddings is not None: - model.set_input_embeddings(backup_embeddings) - devices.torch_gc(force=True) - return model - - import nncf - shared.compiled_model_state = CompiledModelState() - shared.compiled_model_state.is_compiled = True + from installer import install + install('nncf==2.7.0', quiet=True) sd_model = apply_compile_to_model(sd_model, nncf_compress_model, shared.opts.nncf_compress_weights, op="nncf") diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index 57b4137ef..54a38cf55 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -40,15 +40,17 @@ def single_sample_to_image(sample, approximation=None): warn_once('Unknown decode type') approximation = 0 # normal sample is [4,64,64] - if sample.dtype == torch.bfloat16: - sample = sample.to(torch.float16) + try: + if sample.dtype == torch.bfloat16: + sample = sample.to(torch.float16) + except Exception as e: + warn_once(f'live preview: {e}') if len(sample.shape) > 4: # likely unknown video latent (e.g. svd) return Image.new(mode="RGB", size=(512, 512)) if len(sample) == 16: # sd_cascade sd_cascade = True if len(sample.shape) == 4 and sample.shape[0]: # likely animatediff latent sample = sample.permute(1, 0, 2, 3)[0] - if shared.native: # [-x,x] to [-5,5] sample_max = torch.max(sample) if sample_max > 5: @@ -56,7 +58,10 @@ def single_sample_to_image(sample, approximation=None): sample_min = torch.min(sample) if sample_min < -5: sample = sample * (5 / abs(sample_min)) - if sd_cascade: + if approximation == 2: # TAESD + x_sample = sd_vae_taesd.decode(sample) + x_sample = (1.0 + x_sample) / 2.0 # preview requires smaller range + elif sd_cascade: x_sample = sd_vae_stablecascade.decode(sample) elif approximation == 0: # Simple x_sample = sd_vae_approx.cheap_approximation(sample) * 0.5 + 0.5 @@ -64,9 +69,6 @@ def single_sample_to_image(sample, approximation=None): x_sample = sd_vae_approx.nn_approximation(sample) * 0.5 + 0.5 if shared.sd_model_type == "sdxl": x_sample = x_sample[[2,1,0], :, :] # BGR to RGB - elif approximation == 2: # TAESD - x_sample = sd_vae_taesd.decode(sample) - x_sample = (1.0 + x_sample) / 2.0 # preview requires smaller range elif approximation == 3: # Full VAE x_sample = processing.decode_first_stage(shared.sd_model, sample.unsqueeze(0))[0] * 0.5 + 0.5 else: diff --git a/modules/sd_samplers_diffusers.py b/modules/sd_samplers_diffusers.py index 25c94cfb2..997ca5c3a 100644 --- a/modules/sd_samplers_diffusers.py +++ b/modules/sd_samplers_diffusers.py @@ -66,7 +66,7 @@ 'Euler EDM': { }, 'DPM++ 2M EDM': { 'solver_order': 2, 'solver_type': 'midpoint', 'final_sigmas_type': 'zero', 'algorithm_type': 'dpmsolver++' }, 'CMSI': { }, #{ 'sigma_min': 0.002, 'sigma_max': 80.0, 'sigma_data': 0.5, 's_noise': 1.0, 'rho': 7.0, 'clip_denoised': True }, - 'Euler FlowMatch': { }, + 'Euler FlowMatch': { 'shift': 1, }, 'IPNDM': { }, } @@ -156,6 +156,8 @@ def __init__(self, name, constructor, model, **kwargs): self.config['beta_start'] = shared.opts.schedulers_beta_start if 'beta_end' in self.config and shared.opts.schedulers_beta_end > 0: self.config['beta_end'] = shared.opts.schedulers_beta_end + if 'shift' in self.config and shared.opts.schedulers_shift != 1: + self.config['shift'] = shared.opts.schedulers_shift if 'rescale_betas_zero_snr' in self.config: self.config['rescale_betas_zero_snr'] = shared.opts.schedulers_rescale_betas if 'timestep_spacing' in self.config and shared.opts.schedulers_timestep_spacing != 'default' and shared.opts.schedulers_timestep_spacing is not None: diff --git a/modules/sd_vae.py b/modules/sd_vae.py index 2e27393e2..53b89161f 100644 --- a/modules/sd_vae.py +++ b/modules/sd_vae.py @@ -155,8 +155,6 @@ def load_vae(model, vae_file=None, vae_source="unknown-source"): except Exception as e: shared.log.error(f"Loading VAE failed: model={vae_file} source={vae_source} {e}") restore_base_vae(model) - # If vae used is not in dict, update it - # It will be removed on refresh though vae_opt = get_filename(vae_file) if vae_opt not in vae_dict: vae_dict[vae_opt] = vae_file @@ -165,6 +163,26 @@ def load_vae(model, vae_file=None, vae_source="unknown-source"): loaded_vae_file = vae_file +def apply_vae_config(model_file, vae_file, sd_model): + def get_vae_config(): + config_file = os.path.join(paths.sd_configs_path, os.path.splitext(os.path.basename(model_file))[0] + '_vae.json') + if config_file is not None and os.path.exists(config_file): + return shared.readfile(config_file) + config_file = os.path.join(paths.sd_configs_path, os.path.splitext(os.path.basename(vae_file))[0] + '.json') if vae_file else None + if config_file is not None and os.path.exists(config_file): + return shared.readfile(config_file) + config_file = os.path.join(paths.sd_configs_path, shared.sd_model_type, 'vae', 'config.json') + if config_file is not None and os.path.exists(config_file): + return shared.readfile(config_file) + return {} + + if hasattr(sd_model, 'vae') and hasattr(sd_model.vae, 'config'): + config = get_vae_config() + for k, v in config.items(): + if k in sd_model.vae.config and not k.startswith('_'): + sd_model.vae.config[k] = v + + def load_vae_diffusers(model_file, vae_file=None, vae_source="unknown-source"): if vae_file is None: return None @@ -241,6 +259,11 @@ def reload_vae_weights(sd_model=None, vae_file=unspecified): vae_file, vae_source = resolve_vae(checkpoint_file) else: vae_source = "function-argument" + if vae_file is None or vae_file == 'None': + if hasattr(sd_model, 'original_vae'): + sd_models.set_diffuser_options(sd_model, vae=sd_model.original_vae, op='vae') + shared.log.info("VAE restored") + return None if loaded_vae_file == vae_file: return None if not shared.native and (shared.cmd_opts.lowvram or shared.cmd_opts.medvram): @@ -258,10 +281,14 @@ def reload_vae_weights(sd_model=None, vae_file=unspecified): if vae_file is not None: shared.log.info(f"VAE weights loaded: {vae_file}") else: - if hasattr(shared.sd_model, "vae") and hasattr(shared.sd_model, "sd_checkpoint_info"): - vae = load_vae_diffusers(shared.sd_model.sd_checkpoint_info.filename, vae_file, vae_source) + if hasattr(sd_model, "vae") and hasattr(sd_model, "sd_checkpoint_info"): + vae = load_vae_diffusers(sd_model.sd_checkpoint_info.filename, vae_file, vae_source) if vae is not None: + if not hasattr(sd_model, 'original_vae'): + sd_model.original_vae = sd_model.vae + sd_models.move_model(sd_model.original_vae, devices.cpu) sd_models.set_diffuser_options(sd_model, vae=vae, op='vae') + apply_vae_config(sd_model.sd_checkpoint_info.filename, vae_file, sd_model) if not shared.cmd_opts.lowvram and not shared.cmd_opts.medvram: sd_models.move_model(sd_model, devices.device) diff --git a/modules/sd_vae_approx.py b/modules/sd_vae_approx.py index e66b78011..2b4399edb 100644 --- a/modules/sd_vae_approx.py +++ b/modules/sd_vae_approx.py @@ -34,21 +34,24 @@ def forward(self, x): def nn_approximation(sample): # Approximate NN global sd_vae_approx_model # pylint: disable=global-statement + # ROCm throws memory exceptions and crashes the GPU with it if we use approx on the GPU + device = devices.device if devices.backend != "rocm" else "cpu" + dtype = devices.dtype_vae if devices.backend != "rocm" else torch.float32 if sd_vae_approx_model is None: model_path = os.path.join(paths.models_path, "VAE-approx", "model.pt") sd_vae_approx_model = VAEApprox() if not os.path.exists(model_path): model_path = os.path.join(paths.script_path, "models", "VAE-approx", "model.pt") - approx_weights = torch.load(model_path, map_location='cpu' if devices.device.type != 'cuda' else None) + approx_weights = torch.load(model_path, map_location='cpu' if devices.device.type != 'cuda' or devices.backend == "rocm" else None) sd_vae_approx_model.load_state_dict(approx_weights) sd_vae_approx_model.eval() - sd_vae_approx_model.to(devices.device, sample.dtype) + sd_vae_approx_model.to(device, dtype) shared.log.debug(f'VAE load: type=approximate model={model_path}') try: - in_sample = sample.to(devices.device).unsqueeze(0) - sd_vae_approx_model.to(devices.device, devices.dtype) + in_sample = sample.to(device, dtype).unsqueeze(0) + sd_vae_approx_model.to(device, dtype) x_sample = sd_vae_approx_model(in_sample) - x_sample = x_sample[0].detach().cpu() + x_sample = x_sample[0].to(torch.float32).detach().cpu() return x_sample except Exception as e: shared.log.error(f'VAE decode approximate: {e}') diff --git a/modules/sd_vae_taesd.py b/modules/sd_vae_taesd.py index 2a3c427ee..d5edbd11f 100644 --- a/modules/sd_vae_taesd.py +++ b/modules/sd_vae_taesd.py @@ -11,7 +11,14 @@ from modules import devices, paths -taesd_models = { 'sd-decoder': None, 'sd-encoder': None, 'sdxl-decoder': None, 'sdxl-encoder': None } +taesd_models = { + 'sd-decoder': None, + 'sd-encoder': None, + 'sdxl-decoder': None, + 'sdxl-encoder': None, + 'sd3-decoder': None, + 'sd3-encoder': None, +} previous_warnings = False @@ -31,33 +38,63 @@ def __init__(self, n_in, n_out): def forward(self, x): return self.fuse(self.conv(x) + self.skip(x)) -def Encoder(): +def Encoder(latent_channels=4): return nn.Sequential( conv(3, 64), Block(64, 64), conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), - conv(64, 4), + conv(64, latent_channels), ) -def Decoder(): +def Decoder(latent_channels=4): return nn.Sequential( - Clamp(), conv(4, 64), nn.ReLU(), + Clamp(), conv(latent_channels, 64), nn.ReLU(), Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), Block(64, 64), conv(64, 3), ) + +class TAESD2(nn.Module): # pylint: disable=abstract-method + latent_magnitude = 3 + latent_shift = 0.5 + + def __init__(self, encoder_path="taesd_encoder.pth", decoder_path="taesd_decoder.pth", latent_channels=None): + """Initialize pretrained TAESD on the given device from the given checkpoints.""" + super().__init__() + if latent_channels is None: + latent_channels = 16 if "taesd3" in str(encoder_path) else 4 + self.encoder = Encoder(latent_channels) + self.decoder = Decoder(latent_channels) + if encoder_path is not None: + self.encoder.load_state_dict(torch.load(encoder_path, map_location="cpu")) + if decoder_path is not None: + self.decoder.load_state_dict(torch.load(decoder_path, map_location="cpu")) + + @staticmethod + def scale_latents(x): + """raw latents -> [0, 1]""" + return x.div(2 * TAESD.latent_magnitude).add(TAESD.latent_shift).clamp(0, 1) + + @staticmethod + def unscale_latents(x): + """[0, 1] -> raw latents""" + return x.sub(TAESD.latent_shift).mul(2 * TAESD.latent_magnitude) + + class TAESD(nn.Module): # pylint: disable=abstract-method latent_magnitude = 3 latent_shift = 0.5 - def __init__(self, encoder_path="taesd_encoder.pth", decoder_path="taesd_decoder.pth"): + def __init__(self, encoder_path="taesd_encoder.pth", decoder_path="taesd_decoder.pth", latent_channels=None): """Initialize pretrained TAESD on the given device from the given checkpoints.""" super().__init__() - self.encoder = Encoder() - self.decoder = Decoder() + if latent_channels is None: + latent_channels = 16 if "taesd3" in str(encoder_path) or "taesd3" in str(decoder_path) else 4 + self.encoder = Encoder(latent_channels) + self.decoder = Decoder(latent_channels) if encoder_path is not None: self.encoder.load_state_dict(torch.load(encoder_path, map_location="cpu")) if decoder_path is not None: @@ -105,13 +142,16 @@ def model(model_class = 'sd', model_type = 'decoder'): def decode(latents): + global previous_warnings # pylint: disable=global-statement from modules import shared model_class = shared.sd_model_type if model_class == 'ldm': model_class = 'sd' dtype = devices.dtype_vae if devices.dtype_vae != torch.bfloat16 else torch.float16 # taesd does not support bf16 if 'sd' not in model_class: - shared.log.warning(f'TAESD unsupported model type: {model_class}') + if not previous_warnings: + previous_warnings = True + shared.log.warning(f'TAESD unsupported model type: {model_class}') return Image.new('RGB', (8, 8), color = (0, 0, 0)) vae = taesd_models[f'{model_class}-decoder'] if vae is None: diff --git a/modules/shared.py b/modules/shared.py index 571f06052..1cf31c491 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -41,7 +41,7 @@ xformers_available = False locking_available = True clip_model = None -interrogator = modules.interrogate.InterrogateModels("interrogate") +interrogator = modules.interrogate.InterrogateModels(os.path.join("models", "interrogate")) sd_upscalers = [] face_restorers = [] tab_names = [] @@ -330,8 +330,9 @@ def temp_disable_extensions(): modules.shared.opts.data['theme_type'] = 'None' modules.shared.opts.data['gradio_theme'] = theme_name else: - modules.shared.opts.data['theme_type'] = 'None' - modules.shared.opts.data['gradio_theme'] = theme_name + modules.shared.log.error(f'UI theme invalid: theme="{theme_name}" available={["standard/*", "modern/*", "none/*"]} fallback="standard/black-teal"') + modules.shared.opts.data['theme_type'] = 'Standard' + modules.shared.opts.data['gradio_theme'] = 'black-teal' for ext in disable_themes: if ext.lower() not in opts.disabled_extensions: @@ -385,19 +386,20 @@ def temp_disable_extensions(): sdp_options_default = ['Flash attention', 'Memory attention', 'Math attention'] options_templates.update(options_section(('sd', "Execution & Models"), { - "sd_backend": OptionInfo(default_backend, "Execution backend", gr.Radio, {"choices": ["original", "diffusers"] }), + "sd_backend": OptionInfo(default_backend, "Execution backend", gr.Radio, {"choices": ["diffusers", "original"] }), "sd_model_checkpoint": OptionInfo(default_checkpoint, "Base model", gr.Dropdown, lambda: {"choices": list_checkpoint_tiles()}, refresh=refresh_checkpoints), "sd_model_refiner": OptionInfo('None', "Refiner model", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints), "sd_vae": OptionInfo("Automatic", "VAE model", gr.Dropdown, lambda: {"choices": shared_items.sd_vae_items()}, refresh=shared_items.refresh_vae_list), "sd_unet": OptionInfo("None", "UNET model", gr.Dropdown, lambda: {"choices": shared_items.sd_unet_items()}, refresh=shared_items.refresh_unet_list), - "sd_te3": OptionInfo('None', "Text encoder model", gr.Dropdown, lambda: {"choices": ['None', 'T5 FP8', 'T5 FP16']}), - "sd_checkpoint_autoload": OptionInfo(True, "Model autoload on start"), + "sd_text_encoder": OptionInfo('None', "Text encoder model", gr.Dropdown, lambda: {"choices": ['None', 'T5 FP4', 'T5 FP8', 'T5 INT8', 'T5 FP16']}), "sd_model_dict": OptionInfo('None', "Use separate base dict", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints), + "sd_checkpoint_autoload": OptionInfo(True, "Model autoload on start"), + "sd_textencoder_cache": OptionInfo(True, "Cache text encoder results"), "stream_load": OptionInfo(False, "Load models using stream loading method", gr.Checkbox, {"visible": not native }), "model_reuse_dict": OptionInfo(False, "Reuse loaded model dictionary", gr.Checkbox, {"visible": False}), - "prompt_attention": OptionInfo("Full parser", "Prompt attention parser", gr.Radio, {"choices": ["Full parser", "Compel parser", "A1111 parser", "Fixed attention"] }), "prompt_mean_norm": OptionInfo(False, "Prompt attention normalization", gr.Checkbox), "comma_padding_backtrack": OptionInfo(20, "Prompt padding", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1, "visible": not native }), + "prompt_attention": OptionInfo("Full parser", "Prompt attention parser", gr.Radio, {"choices": ["Full parser", "Compel parser", "A1111 parser", "Fixed attention"] }), "sd_checkpoint_cache": OptionInfo(0, "Cached models", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1, "visible": not native }), "sd_vae_checkpoint_cache": OptionInfo(0, "Cached VAEs", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1, "visible": False}), "sd_disable_ckpt": OptionInfo(False, "Disallow models in ckpt format", gr.Checkbox, {"visible": False}), @@ -448,7 +450,7 @@ def temp_disable_extensions(): "deep_cache_interval": OptionInfo(3, "DeepCache cache interval", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}), "nncf_sep": OptionInfo("

Model Compress

", "", gr.HTML), - "nncf_compress_weights": OptionInfo([], "Compress Model weights with NNCF", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": native}), + "nncf_compress_weights": OptionInfo([], "Compress Model weights with NNCF", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}), "ipex_sep": OptionInfo("

IPEX

", "", gr.HTML, {"visible": devices.backend == "ipex"}), "ipex_optimize": OptionInfo([], "IPEX Optimize for Intel GPUs", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "Upscaler"], "visible": devices.backend == "ipex"}), @@ -715,6 +717,7 @@ def temp_disable_extensions(): 'schedulers_timesteps_range': OptionInfo(1000, "Timesteps range", gr.Slider, {"minimum": 250, "maximum": 4000, "step": 1}), 'schedulers_timesteps': OptionInfo('', "Timesteps"), "schedulers_rescale_betas": OptionInfo(False, "Rescale betas with zero terminal SNR", gr.Checkbox), + 'schedulers_shift': OptionInfo(1, "Sampler shift", gr.Slider, {"minimum": 0.1, "maximum": 10, "step": 0.1}), # managed from ui.py for backend original k-diffusion "schedulers_sep_kdiffusers": OptionInfo("

K-Diffusion specific config

", "", gr.HTML), @@ -774,19 +777,19 @@ def temp_disable_extensions(): "control_unload_processor": OptionInfo(False, "Processor unload after use"), })) -options_templates.update(options_section(('training', "Training"), { - "unload_models_when_training": OptionInfo(False, "Move VAE and CLIP to RAM when training"), - "pin_memory": OptionInfo(True, "Pin training dataset to memory"), - "save_optimizer_state": OptionInfo(False, "Save resumable optimizer state when training"), - "save_training_settings_to_txt": OptionInfo(True, "Save training settings to a text file"), - "dataset_filename_word_regex": OptionInfo("", "Filename word regex"), - "dataset_filename_join_string": OptionInfo(" ", "Filename join string"), - "embeddings_templates_dir": OptionInfo(os.path.join(paths.script_path, 'train', 'templates'), "Embeddings train templates directory", folder=True), - "training_image_repeats_per_epoch": OptionInfo(1, "Image repeats per epoch", gr.Slider, {"minimum": 1, "maximum": 100, "step": 1}), - "training_write_csv_every": OptionInfo(0, "Save loss CSV file every n steps"), - "training_enable_tensorboard": OptionInfo(False, "Enable tensorboard logging"), - "training_tensorboard_save_images": OptionInfo(False, "Save generated images within tensorboard"), - "training_tensorboard_flush_every": OptionInfo(120, "Tensorboard flush period"), +options_templates.update(options_section(('interrogate', "Interrogate"), { # "Training" section disabled so just a placeholder + "unload_models_when_training": OptionInfo(False, "Move VAE and CLIP to RAM when training", gr.Checkbox, { "visible": False }), + "pin_memory": OptionInfo(True, "Pin training dataset to memory", gr.Checkbox, { "visible": False }), + "save_optimizer_state": OptionInfo(False, "Save resumable optimizer state when training", gr.Checkbox, { "visible": False }), + "save_training_settings_to_txt": OptionInfo(True, "Save training settings to a text file", gr.Checkbox, { "visible": False }), + "dataset_filename_word_regex": OptionInfo("", "Filename word regex", gr.Textbox, { "visible": False }), + "dataset_filename_join_string": OptionInfo(" ", "Filename join string", gr.Textbox, { "visible": False }), + "embeddings_templates_dir": OptionInfo("", "Embeddings train templates directory", gr.Textbox, { "visible": False }), + "training_image_repeats_per_epoch": OptionInfo(1, "Image repeats per epoch", gr.Slider, {"minimum": 1, "maximum": 100, "step": 1, "visible": False }), + "training_write_csv_every": OptionInfo(0, "Save loss CSV file every n steps", gr.Number, { "visible": False }), + "training_enable_tensorboard": OptionInfo(False, "Enable tensorboard logging", gr.Checkbox, { "visible": False }), + "training_tensorboard_save_images": OptionInfo(False, "Save generated images within tensorboard", gr.Checkbox, { "visible": False }), + "training_tensorboard_flush_every": OptionInfo(120, "Tensorboard flush period", gr.Number, { "visible": False }), })) options_templates.update(options_section(('interrogate', "Interrogate"), { @@ -804,13 +807,13 @@ def temp_disable_extensions(): "deepbooru_filter_tags": OptionInfo("", "Filter out tags from deepbooru output"), })) -options_templates.update(options_section(('extra_networks', "Extra Networks"), { +options_templates.update(options_section(('extra_networks', "Networks"), { "extra_networks_sep1": OptionInfo("

Extra networks UI

", "", gr.HTML), - "extra_networks": OptionInfo(["All"], "Extra networks", gr.Dropdown, lambda: {"multiselect":True, "choices": ['All'] + [en.title for en in extra_networks]}), + "extra_networks": OptionInfo(["All"], "Networks", gr.Dropdown, lambda: {"multiselect":True, "choices": ['All'] + [en.title for en in extra_networks]}), "extra_networks_sort": OptionInfo("Default", "Sort order", gr.Dropdown, {"choices": ['Default', 'Name [A-Z]', 'Name [Z-A]', 'Date [Newest]', 'Date [Oldest]', 'Size [Largest]', 'Size [Smallest]']}), "extra_networks_view": OptionInfo("gallery", "UI view", gr.Radio, {"choices": ["gallery", "list"]}), "extra_networks_card_cover": OptionInfo("sidebar", "UI position", gr.Radio, {"choices": ["cover", "inline", "sidebar"]}), - "extra_networks_height": OptionInfo(53, "UI height (%)", gr.Slider, {"minimum": 10, "maximum": 100, "step": 1}), + "extra_networks_height": OptionInfo(55, "UI height (%)", gr.Slider, {"minimum": 10, "maximum": 100, "step": 1}), "extra_networks_sidebar_width": OptionInfo(35, "UI sidebar width (%)", gr.Slider, {"minimum": 10, "maximum": 80, "step": 1}), "extra_networks_card_size": OptionInfo(160, "UI card size (px)", gr.Slider, {"minimum": 20, "maximum": 2000, "step": 1}), "extra_networks_card_square": OptionInfo(True, "UI disable variable aspect ratio"), @@ -818,7 +821,7 @@ def temp_disable_extensions(): "extra_networks_sep2": OptionInfo("

Extra networks general

", "", gr.HTML), "extra_network_reference": OptionInfo(False, "Use reference values when available", gr.Checkbox), "extra_network_skip_indexing": OptionInfo(False, "Build info on first access", gr.Checkbox), - "extra_networks_default_multiplier": OptionInfo(1.0, "Default multiplier for extra networks", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), + "extra_networks_default_multiplier": OptionInfo(1.0, "Default strength for extra networks", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), "diffusers_convert_embed": OptionInfo(False, "Auto-convert SD 1.5 embeddings to SDXL ", gr.Checkbox, {"visible": native}), "extra_networks_sep3": OptionInfo("

Extra networks settings

", "", gr.HTML), "extra_networks_styles": OptionInfo(True, "Show built-in styles"), diff --git a/modules/shared_state.py b/modules/shared_state.py index 470ee19c7..79ee20f19 100644 --- a/modules/shared_state.py +++ b/modules/shared_state.py @@ -41,10 +41,12 @@ def pause(self): log.debug(f'Requested {"pause" if self.paused else "continue"}') def nextjob(self): + import modules.devices self.do_set_current_image() self.job_no += 1 self.sampling_step = 0 self.current_image_sampling_step = 0 + modules.devices.torch_gc() def dict(self): obj = { diff --git a/modules/styles.py b/modules/styles.py index d118800e8..4269b3f55 100644 --- a/modules/styles.py +++ b/modules/styles.py @@ -328,7 +328,7 @@ def save_styles(self, path, verbose=False): "preview": "", } keepcharacters = (' ','.','_') - fn = "".join(c for c in name if c.isalnum() or c in keepcharacters).rstrip() + fn = "".join(c for c in name if c.isalnum() or c in keepcharacters).strip() fn = os.path.join(path, fn + ".json") try: with open(fn, 'w', encoding='utf-8') as f: diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index e0fcbc55f..b0779716b 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -13,17 +13,6 @@ debug = shared.log.trace if os.environ.get('SD_TI_DEBUG', None) is not None else lambda *args, **kwargs: None debug('Trace: TEXTUAL INVERSION') TokenToAdd = namedtuple("TokenToAdd", ["clip_l", "clip_g"]) -TextualInversionTemplate = namedtuple("TextualInversionTemplate", ["name", "path"]) -textual_inversion_templates = {} - - -def list_textual_inversion_templates(): - textual_inversion_templates.clear() - for root, _dirs, fns in os.walk(shared.opts.embeddings_templates_dir): - for fn in fns: - path = os.path.join(root, fn) - textual_inversion_templates[fn] = TextualInversionTemplate(fn, path) - return textual_inversion_templates def list_embeddings(*dirs): diff --git a/modules/theme.py b/modules/theme.py index 8dfde3e22..26bb39858 100644 --- a/modules/theme.py +++ b/modules/theme.py @@ -91,7 +91,6 @@ def reload_gradio_theme(): 'font_mono':['IBM Plex Mono', 'ui-monospace', 'Consolas', 'monospace'] } gradio_theme = gr.themes.Base(**default_font_params) - available_themes = list_themes() if theme_name not in available_themes: modules.shared.log.error(f'UI theme invalid: type={modules.shared.opts.theme_type} theme="{theme_name}" available={available_themes}') @@ -99,6 +98,9 @@ def reload_gradio_theme(): theme_name = 'black-teal' elif modules.shared.opts.theme_type == 'Modern': theme_name = 'Default' + else: + modules.shared.opts.theme_type = 'Standard' + theme_name = 'black-teal' modules.shared.opts.data['gradio_theme'] = theme_name diff --git a/modules/txt2img.py b/modules/txt2img.py index 56abf3de4..76b0a7c45 100644 --- a/modules/txt2img.py +++ b/modules/txt2img.py @@ -35,8 +35,7 @@ def txt2img(id_task, shared.log.warning('Sampler: invalid') sampler_index = 0 if hr_sampler_index is None: - shared.log.warning('Sampler: invalid') - hr_sampler_index = 0 + hr_sampler_index = sampler_index p = processing.StableDiffusionProcessingTxt2Img( sd_model=shared.sd_model, diff --git a/modules/ui_extra_networks.py b/modules/ui_extra_networks.py index 7a49ce660..505a62145 100644 --- a/modules/ui_extra_networks.py +++ b/modules/ui_extra_networks.py @@ -71,7 +71,7 @@ def get_metadata(page: str = "", item: str = ""): metadata = page.metadata.get(item, 'none') if metadata is None: metadata = '' - # shared.log.debug(f"Extra networks metadata: page='{page}' item={item} len={len(metadata)}") + # shared.log.debug(f"Networks metadata: page='{page}' item={item} len={len(metadata)}") return JSONResponse({"metadata": metadata}) def get_info(page: str = "", item: str = ""): @@ -84,7 +84,7 @@ def get_info(page: str = "", item: str = ""): info = page.find_info(item['filename']) if info is None: info = {} - # shared.log.debug(f"Extra networks info: page='{page.name}' item={item['name']} len={len(info)}") + # shared.log.debug(f"Networks info: page='{page.name}' item={item['name']} len={len(info)}") return JSONResponse({"info": info}) def get_desc(page: str = "", item: str = ""): @@ -97,7 +97,7 @@ def get_desc(page: str = "", item: str = ""): desc = page.find_description(item['filename']) if desc is None: desc = '' - # shared.log.debug(f"Extra networks desc: page='{page.name}' item={item['name']} len={len(desc)}") + # shared.log.debug(f"Networks desc: page='{page.name}' item={item['name']} len={len(desc)}") return JSONResponse({"description": desc}) app.add_api_route("/sd_extra_networks/thumb", fetch_file, methods=["GET"]) @@ -186,7 +186,7 @@ def create_thumb(self): except Exception as e: shared.log.warning(f'Extra network error creating thumbnail: {f} {e}') if created > 0: - shared.log.info(f"Extra network thumbnails: {self.name} created={created}") + shared.log.info(f"Network thumbnails: {self.name} created={created}") self.missing_thumbs.clear() def create_items(self, tabname): @@ -235,7 +235,7 @@ def create_page(self, tabname, skip = False): continue # if not self.is_empty(tgt): subdirs[subdir] = 1 - debug(f"Extra networks: page='{self.name}' subfolders={list(subdirs)}") + debug(f"Networks: page='{self.name}' subfolders={list(subdirs)}") subdirs = OrderedDict(sorted(subdirs.items())) if self.name == 'model': subdirs['Reference'] = 1 @@ -272,7 +272,7 @@ def create_page(self, tabname, skip = False): self.html += ''.join(htmls) self.page_time = time.time() self.html = f"
{subdirs_html}
{self.html}
" - shared.log.debug(f"Extra networks: page='{self.name}' items={len(self.items)} subfolders={len(subdirs)} tab={tabname} folders={self.allowed_directories_for_previews()} list={self.list_time:.2f} thumb={self.preview_time:.2f} desc={self.desc_time:.2f} info={self.info_time:.2f} workers={shared.max_workers} sort={shared.opts.extra_networks_sort}") + shared.log.debug(f"Networks: page='{self.name}' items={len(self.items)} subfolders={len(subdirs)} tab={tabname} folders={self.allowed_directories_for_previews()} list={self.list_time:.2f} thumb={self.preview_time:.2f} desc={self.desc_time:.2f} info={self.info_time:.2f} workers={shared.max_workers} sort={shared.opts.extra_networks_sort}") if len(self.missing_thumbs) > 0: threading.Thread(target=self.create_thumb).start() return self.patch(self.html, tabname) @@ -570,7 +570,7 @@ def toggle_visibility(is_visible): with gr.Group(elem_id=f"{tabname}_extra_details_tabs", visible=False) as ui.details_tabs: with gr.Tabs(): with gr.Tab('Description', elem_classes=['extra-details-tabs']): - desc = gr.Textbox('', show_label=False, lines=8, placeholder="Extra network description...") + desc = gr.Textbox('', show_label=False, lines=8, placeholder="Network description...") ui.details_components.append(desc) with gr.Row(): btn_save_desc = gr.Button('Save', elem_classes=['small-button'], elem_id=f'{tabname}_extra_details_save_desc') @@ -812,15 +812,22 @@ def show_details(text, img, desc, info, meta, description, prompt, negative, par Preview Embedded{item.preview.startswith('data:')} ''' # desc = f'Name: {os.path.basename(item.name)}\nDescription: {item.description}\nPrompt: {item.prompt}\nNegative: {item.negative}\nExtra: {item.extra}\n' + if item.name.startswith('Diffusers'): + url = item.name.replace('Diffusers/', '') + url = f'https://huggingface.co/models/{url}' if url is not None else 'N/A' + else: + url = info.get('id', None) if info is not None else None + url = f'civitai.com/models/{url}' if url is not None else 'N/A' text = f'''

{item.name}

- +
+ {lora} {model} @@ -888,7 +895,8 @@ def ui_save_click(): return res def ui_quicksave_click(name): - if name is None: + if name is None or len(name) < 1: + shared.log.warning("Network quick save style: no name provided") return fn = os.path.join(paths.data_path, "params.txt") if os.path.exists(fn): @@ -908,9 +916,9 @@ def ui_quicksave_click(name): } shared.writefile(item, fn, silent=True) if len(prompt) > 0: - shared.log.debug(f"Extra network quick save style: item={name} filename='{fn}'") + shared.log.debug(f"Network quick save style: item={name} filename='{fn}'") else: - shared.log.warning(f"Extra network quick save model: item={name} filename='{fn}' prompt is empty") + shared.log.warning(f"Network quick save model: item={name} filename='{fn}' prompt is empty") def ui_sort_cards(sort_order): if shared.opts.extra_networks_sort != sort_order: diff --git a/modules/ui_extra_networks_checkpoints.py b/modules/ui_extra_networks_checkpoints.py index a6c95ee7c..7a5285d49 100644 --- a/modules/ui_extra_networks_checkpoints.py +++ b/modules/ui_extra_networks_checkpoints.py @@ -64,7 +64,7 @@ def create_item(self, name): record["info"] = self.find_info(checkpoint.filename) record["description"] = self.find_description(checkpoint.filename, record["info"]) except Exception as e: - shared.log.debug(f"Extra networks error: type=model file={name} {e}") + shared.log.debug(f"Networks error: type=model file={name} {e}") return record def list_items(self): diff --git a/modules/ui_extra_networks_hypernets.py b/modules/ui_extra_networks_hypernets.py index caf861938..b6fbbd38f 100644 --- a/modules/ui_extra_networks_hypernets.py +++ b/modules/ui_extra_networks_hypernets.py @@ -27,7 +27,7 @@ def list_items(self): "size": os.path.getsize(path), } except Exception as e: - shared.log.debug(f"Extra networks error: type=hypernetwork file={path} {e}") + shared.log.debug(f"Networks error: type=hypernetwork file={path} {e}") def allowed_directories_for_previews(self): return [shared.opts.hypernetwork_dir] diff --git a/modules/ui_extra_networks_styles.py b/modules/ui_extra_networks_styles.py index d8bb13cfa..f03cb22be 100644 --- a/modules/ui_extra_networks_styles.py +++ b/modules/ui_extra_networks_styles.py @@ -93,11 +93,12 @@ def create_item(self, k): "size": os.path.getsize(style.filename), } except Exception as e: - shared.log.debug(f"Extra networks error: type=style file={k} {e}") + shared.log.debug(f"Networks error: type=style file={k} {e}") return item def list_items(self): items = [self.create_item(k) for k in list(shared.prompt_styles.styles)] + items = [item for item in items if item is not None] self.update_all_previews(items) return items diff --git a/modules/ui_extra_networks_textual_inversion.py b/modules/ui_extra_networks_textual_inversion.py index 3b0ec0948..0e086e55d 100644 --- a/modules/ui_extra_networks_textual_inversion.py +++ b/modules/ui_extra_networks_textual_inversion.py @@ -37,7 +37,7 @@ def create_item(self, embedding: Embedding): record["info"] = self.find_info(embedding.filename) record["description"] = self.find_description(embedding.filename, record["info"]) except Exception as e: - shared.log.debug(f"Extra networks error: type=embedding file={embedding.filename} {e}") + shared.log.debug(f"Networks error: type=embedding file={embedding.filename} {e}") return record def list_items(self): diff --git a/modules/ui_extra_networks_vae.py b/modules/ui_extra_networks_vae.py index a4a212fdf..8a161bbbb 100644 --- a/modules/ui_extra_networks_vae.py +++ b/modules/ui_extra_networks_vae.py @@ -31,7 +31,7 @@ def list_items(self): record["description"] = self.find_description(filename, record["info"]) yield record except Exception as e: - shared.log.debug(f"Extra networks error: type=vae file={filename} {e}") + shared.log.debug(f"Networks error: type=vae file={filename} {e}") def allowed_directories_for_previews(self): return [v for v in [shared.opts.vae_dir] if v is not None] diff --git a/modules/ui_models.py b/modules/ui_models.py index c7fa8425b..d29f2f404 100644 --- a/modules/ui_models.py +++ b/modules/ui_models.py @@ -431,7 +431,7 @@ def civit_search_model(name, tag, model_type): r = req(url) log.debug(f'CivitAI search: name="{name}" tag={tag or "none"} url="{url}" status={r.status_code}') if r.status_code != 200: - return [], [], [] + return [], gr.update(visible=False, value=[]), gr.update(visible=False, value=None), gr.update(visible=False, value=None) body = r.json() nonlocal data data = body.get('items', []) diff --git a/modules/ui_postprocessing.py b/modules/ui_postprocessing.py index 42d1f9847..b1948a27b 100644 --- a/modules/ui_postprocessing.py +++ b/modules/ui_postprocessing.py @@ -80,7 +80,7 @@ def create_ui(): with gr.Row(): vqa_answer = gr.Textbox(label="Answer", lines=3) with gr.Row(elem_id='interrogate_buttons_query'): - vqa_model = gr.Dropdown(list(vqa.MODELS), value='Moondream 2', label='VQA Model') + vqa_model = gr.Dropdown(list(vqa.MODELS), value='MS Florence 2 Base', label='VQA Model') vqa_submit = gr.Button("Interrogate", elem_id="interrogate_btn_interrogate", variant='primary') vqa_submit.click(vqa.interrogate, inputs=[vqa_question, vqa_image, vqa_model], outputs=[vqa_answer]) diff --git a/modules/ui_prompt_styles.py b/modules/ui_prompt_styles.py index f0b031eaa..a7f81ee8b 100644 --- a/modules/ui_prompt_styles.py +++ b/modules/ui_prompt_styles.py @@ -46,60 +46,3 @@ def refresh_styles(): class UiPromptStyles: def __init__(self, tabname, main_ui_prompt, main_ui_negative_prompt): # pylint: disable=unused-argument self.dropdown = gr.Dropdown(label="Styles", elem_id=f"{tabname}_styles", choices=[style.name for style in shared.prompt_styles.styles.values()], value=[], multiselect=True) - - """ - def __init__(self, tabname, main_ui_prompt, main_ui_negative_prompt): - self.tabname = tabname - - with gr.Row(elem_id=f"{tabname}_styles_row"): - self.dropdown = gr.Dropdown(label="Styles", show_label=False, elem_id=f"{tabname}_styles", choices=list(shared.prompt_styles.styles), value=[], multiselect=True, tooltip="Styles") - edit_button = ui_components.ToolButton(value=styles_edit_symbol, elem_id=f"{tabname}_styles_edit_button", tooltip="Edit styles") - - with gr.Box(elem_id=f"{tabname}_styles_dialog", elem_classes="popup-dialog") as styles_dialog: - with gr.Row(): - self.selection = gr.Dropdown(label="Styles", elem_id=f"{tabname}_styles_edit_select", choices=list(shared.prompt_styles.styles), value=[], allow_custom_value=True, info="Styles allow you to add custom text to prompt. Use the {prompt} token in style text, and it will be replaced with user's prompt when applying style. Otherwise, style's text will be added to the end of the prompt.") - ui_common.create_refresh_button([self.dropdown, self.selection], shared.prompt_styles.reload, lambda: {"choices": list(shared.prompt_styles.styles)}, f"refresh_{tabname}_styles") - self.materialize = ui_components.ToolButton(value=styles_materialize_symbol, elem_id=f"{tabname}_style_apply", tooltip="Apply all selected styles from the style selction dropdown in main UI to the prompt.") - - with gr.Row(): - self.prompt = gr.Textbox(label="Prompt", show_label=True, elem_id=f"{tabname}_edit_style_prompt", lines=3) - - with gr.Row(): - self.neg_prompt = gr.Textbox(label="Negative prompt", show_label=True, elem_id=f"{tabname}_edit_style_neg_prompt", lines=3) - - with gr.Row(): - self.save = gr.Button('Save', variant='primary', elem_id=f'{tabname}_edit_style_save', visible=False) - self.delete = gr.Button('Delete', variant='primary', elem_id=f'{tabname}_edit_style_delete', visible=False) - self.close = gr.Button('Close', variant='secondary', elem_id=f'{tabname}_edit_style_close') - - self.selection.change( - fn=select_style, - inputs=[self.selection], - outputs=[self.prompt, self.neg_prompt, self.delete, self.save], - show_progress=False, - ) - - self.save.click( - fn=save_style, - inputs=[self.selection, self.prompt, self.neg_prompt], - outputs=[self.delete], - show_progress=False, - ).then(refresh_styles, outputs=[self.dropdown, self.selection], show_progress=False) - - self.delete.click( - fn=delete_style, - _js='function(name){ if(name == "") return ""; return confirm("Delete style " + name + "?") ? name : ""; }', - inputs=[self.selection], - outputs=[self.selection, self.prompt, self.neg_prompt], - show_progress=False, - ).then(refresh_styles, outputs=[self.dropdown, self.selection], show_progress=False) - - self.materialize.click( - fn=materialize_styles, - inputs=[main_ui_prompt, main_ui_negative_prompt, self.dropdown], - outputs=[main_ui_prompt, main_ui_negative_prompt, self.dropdown], - show_progress=False, - ).then(fn=None, _js="function(){update_"+tabname+"_tokens(); closePopup();}", show_progress=False) - - ui_common.setup_dialog(button_show=edit_button, dialog=styles_dialog, button_close=self.close) - """ diff --git a/modules/ui_sections.py b/modules/ui_sections.py index c2bd4ecd9..874ac7d30 100644 --- a/modules/ui_sections.py +++ b/modules/ui_sections.py @@ -16,7 +16,7 @@ def parse_style(styles): if id_part is None: id_part = "img2img" if is_img2img else "txt2img" with gr.Row(elem_id=f"{id_part}_toprow", variant="compact"): - with gr.Column(elem_id=f"{id_part}_prompt_container", scale=6): + with gr.Column(elem_id=f"{id_part}_prompt_container", scale=5): with gr.Row(): with gr.Column(scale=80): with gr.Row(): diff --git a/modules/unipc/uni_pc.py b/modules/unipc/uni_pc.py index ca7fdd7dc..6ba3a31fa 100644 --- a/modules/unipc/uni_pc.py +++ b/modules/unipc/uni_pc.py @@ -14,86 +14,6 @@ def __init__( continuous_beta_0=0.1, continuous_beta_1=20., ): - """Create a wrapper class for the forward SDE (VP type). - - *** - Update: We support discrete-time diffusion models by implementing a picewise linear interpolation for log_alpha_t. - We recommend to use schedule='discrete' for the discrete-time diffusion models, especially for high-resolution images. - *** - - The forward SDE ensures that the condition distribution q_{t|0}(x_t | x_0) = N ( alpha_t * x_0, sigma_t^2 * I ). - We further define lambda_t = log(alpha_t) - log(sigma_t), which is the half-logSNR (described in the DPM-Solver paper). - Therefore, we implement the functions for computing alpha_t, sigma_t and lambda_t. For t in [0, T], we have: - - log_alpha_t = self.marginal_log_mean_coeff(t) - sigma_t = self.marginal_std(t) - lambda_t = self.marginal_lambda(t) - - Moreover, as lambda(t) is an invertible function, we also support its inverse function: - - t = self.inverse_lambda(lambda_t) - - =============================================================== - - We support both discrete-time DPMs (trained on n = 0, 1, ..., N-1) and continuous-time DPMs (trained on t in [t_0, T]). - - 1. For discrete-time DPMs: - - For discrete-time DPMs trained on n = 0, 1, ..., N-1, we convert the discrete steps to continuous time steps by: - t_i = (i + 1) / N - e.g. for N = 1000, we have t_0 = 1e-3 and T = t_{N-1} = 1. - We solve the corresponding diffusion ODE from time T = 1 to time t_0 = 1e-3. - - Args: - betas: A `torch.Tensor`. The beta array for the discrete-time DPM. (See the original DDPM paper for details) - alphas_cumprod: A `torch.Tensor`. The cumprod alphas for the discrete-time DPM. (See the original DDPM paper for details) - - Note that we always have alphas_cumprod = cumprod(betas). Therefore, we only need to set one of `betas` and `alphas_cumprod`. - - **Important**: Please pay special attention for the args for `alphas_cumprod`: - The `alphas_cumprod` is the \hat{alpha_n} arrays in the notations of DDPM. Specifically, DDPMs assume that - q_{t_n | 0}(x_{t_n} | x_0) = N ( \sqrt{\hat{alpha_n}} * x_0, (1 - \hat{alpha_n}) * I ). - Therefore, the notation \hat{alpha_n} is different from the notation alpha_t in DPM-Solver. In fact, we have - alpha_{t_n} = \sqrt{\hat{alpha_n}}, - and - log(alpha_{t_n}) = 0.5 * log(\hat{alpha_n}). - - - 2. For continuous-time DPMs: - - We support two types of VPSDEs: linear (DDPM) and cosine (improved-DDPM). The hyperparameters for the noise - schedule are the default settings in DDPM and improved-DDPM: - - Args: - beta_min: A `float` number. The smallest beta for the linear schedule. - beta_max: A `float` number. The largest beta for the linear schedule. - cosine_s: A `float` number. The hyperparameter in the cosine schedule. - cosine_beta_max: A `float` number. The hyperparameter in the cosine schedule. - T: A `float` number. The ending time of the forward process. - - =============================================================== - - Args: - schedule: A `str`. The noise schedule of the forward SDE. 'discrete' for discrete-time DPMs, - 'linear' or 'cosine' for continuous-time DPMs. - Returns: - A wrapper object of the forward SDE (VP type). - - =============================================================== - - Example: - - # For discrete-time DPMs, given betas (the beta array for n = 0, 1, ..., N - 1): - >>> ns = NoiseScheduleVP('discrete', betas=betas) - - # For discrete-time DPMs, given alphas_cumprod (the \hat{alpha_n} array for n = 0, 1, ..., N - 1): - >>> ns = NoiseScheduleVP('discrete', alphas_cumprod=alphas_cumprod) - - # For continuous-time DPMs (VPSDE), linear schedule: - >>> ns = NoiseScheduleVP('linear', continuous_beta_0=0.1, continuous_beta_1=20.) - - """ - if schedule not in ['discrete', 'linear', 'cosine']: raise ValueError(f"Unsupported noise schedule {schedule}. The schedule needs to be 'discrete' or 'linear' or 'cosine'") diff --git a/modules/vqa.py b/modules/vqa.py index 8344b15bf..357a604d9 100644 --- a/modules/vqa.py +++ b/modules/vqa.py @@ -8,6 +8,8 @@ model = None loaded: str = None MODELS = { + "MS Florence 2 Base": "microsoft/Florence-2-base", # 0.5GB + "MS Florence 2 Large": "microsoft/Florence-2-large", # 1.5GB "Moondream 2": "vikhyatk/moondream2", # 3.7GB "GIT TextCaps Base": "microsoft/git-base-textcaps", # 0.7GB "GIT VQA Base": "microsoft/git-base-vqav2", # 0.7GB @@ -124,7 +126,44 @@ def moondream(question: str, image: Image.Image, repo: str = None): return response +def florence(question: str, image: Image.Image, repo: str = None): + global processor, model, loaded # pylint: disable=global-statement + if model is None or loaded != repo: + model = transformers.AutoModelForCausalLM.from_pretrained(repo, trust_remote_code=True) + processor = transformers.AutoProcessor.from_pretrained(repo, trust_remote_code=True) + loaded = repo + model.eval() + model.to(devices.device, devices.dtype) + shared.log.debug(f'VQA: class={model.__class__.__name__} processor={processor.__class__} model={repo}') + + if question.startswith('<'): + task = question.split('>', 1)[0] + '>' + else: + task = '' + question = task + question + inputs = processor(text=question, images=image, return_tensors="pt") + input_ids = inputs['input_ids'].to(devices.device) + pixel_values = inputs['pixel_values'].to(devices.device, devices.dtype) + with devices.inference_context(): + generated_ids = model.generate( + input_ids=input_ids, + pixel_values=pixel_values, + max_new_tokens=1024, + num_beams=3, + do_sample=False + ) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] + response = processor.post_process_generation(generated_text, task="task", image_size=(image.width, image.height)) + + if 'task' in response: + response = response['task'] + shared.log.debug(f'VQA: task={task} response="{response}"') + return response + + def interrogate(vqa_question, vqa_image, vqa_model_req): + from installer import install + install('flash_attn', quiet=True) vqa_model = MODELS.get(vqa_model_req, None) shared.log.debug(f'VQA: model="{vqa_model}" question="{vqa_question}" image={vqa_image}') if vqa_image is None: @@ -146,6 +185,8 @@ def interrogate(vqa_question, vqa_image, vqa_model_req): answer = pix(vqa_question, vqa_image, vqa_model) if 'moondream2' in vqa_model.lower(): answer = moondream(vqa_question, vqa_image, vqa_model) + if 'florence' in vqa_model.lower(): + answer = florence(vqa_question, vqa_image, vqa_model) else: answer = 'unknown model' if model is not None: diff --git a/requirements.txt b/requirements.txt index 681f1b4d1..555d99e65 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,7 +27,7 @@ fasteners orjson invisible-watermark pi-heif -diffusers==0.29.0 +diffusers==0.29.1 safetensors==0.4.3 tensordict==0.1.2 peft==0.11.1 @@ -54,7 +54,7 @@ protobuf==4.25.3 pytorch_lightning==1.9.4 tokenizers==0.19.1 transformers==4.41.2 -urllib3==1.26.18 +urllib3==1.26.19 Pillow==10.3.0 timm==0.9.16 pydantic==1.10.15 diff --git a/scripts/face-details.py b/scripts/face_details.py similarity index 97% rename from scripts/face-details.py rename to scripts/face_details.py index 3604ecb47..b68d197db 100644 --- a/scripts/face-details.py +++ b/scripts/face_details.py @@ -104,11 +104,12 @@ def restore(self, np_image, p: processing.StableDiffusionProcessing = None): return np_image self.load() if self.model is None: - shared.log.error(f"Model load: type=FaceHires model='{self.model_name}' dir={self.model_dir} url={self.model_url}") + shared.log.debug('Face HiRes: model not loaded') return np_image image = Image.fromarray(np_image) faces = self.predict(image) if len(faces) == 0: + shared.log.debug('Face HiRes: no faces detected') return np_image # create backups @@ -140,6 +141,7 @@ def restore(self, np_image, p: processing.StableDiffusionProcessing = None): if args['denoising_strength'] == 0: shared.log.debug('Face HiRes skip: strength=0') control_pipeline = None + orig_class = shared.sd_model.__class__ if getattr(p, 'is_control', False): from modules.control import run control_pipeline = shared.sd_model @@ -177,6 +179,8 @@ def restore(self, np_image, p: processing.StableDiffusionProcessing = None): # restore pipeline if control_pipeline is not None: shared.sd_model = control_pipeline + else: + shared.sd_model.__class__ = orig_class p = processing_class.switch_class(p, orig_cls, orig_p) p.init_images = getattr(orig_p, 'init_images', None) p.image_mask = getattr(orig_p, 'image_mask', None) diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index a7ff92532..7de4b138f 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -65,6 +65,7 @@ def apply_sampler(p, x, xs): else: p.sampler_name = sampler_name + def apply_hr_sampler_name(p, x, xs): hr_sampler_name = sd_samplers.samplers_map.get(x.lower(), None) if hr_sampler_name is None: @@ -72,6 +73,7 @@ def apply_hr_sampler_name(p, x, xs): else: p.hr_sampler_name = hr_sampler_name + def confirm_samplers(p, xs): for x in xs: if x.lower() not in sd_samplers.samplers_map: @@ -138,6 +140,24 @@ def apply_vae(p, x, xs): sd_vae.reload_vae_weights(shared.sd_model, vae_file=find_vae(x)) +def list_lora(): + import sys + lora = [v for k, v in sys.modules.items() if k == 'networks'][0] + loras = [v.name for v in lora.available_networks.values()] + return ['None'] + loras + + +def apply_lora(p, x, xs): + if x == 'None': + return + p.prompt = p.prompt + f" " + + +def apply_te(p, x, xs): + shared.opts.data["sd_text_encoder"] = x + sd_models.reload_text_encoder() + + def apply_styles(p: processing.StableDiffusionProcessingTxt2Img, x: str, _): p.styles.extend(x.split(',')) @@ -230,6 +250,8 @@ def __init__(self, *args, **kwargs): AxisOption("Prompt S/R", str, apply_prompt, fmt=format_value), AxisOption("Model", str, apply_checkpoint, fmt=format_value, cost=1.0, choices=lambda: sorted(sd_models.checkpoints_list)), AxisOption("VAE", str, apply_vae, cost=0.7, choices=lambda: ['None'] + list(sd_vae.vae_dict)), + AxisOption("LoRA", str, apply_lora, cost=0.5, choices=list_lora), + AxisOption("Text encoder", str, apply_te, cost=0.7, choices=lambda: ['None', 'T5 FP4', 'T5 FP8', 'T5 FP16']), AxisOption("Styles", str, apply_styles, choices=lambda: [s.name for s in shared.prompt_styles.styles.values()]), AxisOption("Seed", int, apply_field("seed")), AxisOption("Steps", int, apply_field("steps")), @@ -251,6 +273,7 @@ def __init__(self, *args, **kwargs): AxisOption("[Sampler] Sigma tmax", float, apply_field("s_tmax")), AxisOption("[Sampler] Sigma Churn", float, apply_field("s_churn")), AxisOption("[Sampler] Sigma noise", float, apply_field("s_noise")), + AxisOption("[Sampler] Shift", float, apply_setting("schedulers_shift")), AxisOption("[Sampler] ETA", float, apply_setting("scheduler_eta")), AxisOption("[Sampler] Solver order", int, apply_setting("schedulers_solver_order")), AxisOption("[Second pass] Upscaler", str, apply_field("hr_upscaler"), choices=lambda: [*shared.latent_upscale_modes, *[x.name for x in shared.sd_upscalers]]), diff --git a/train/templates/style.txt b/train/templates/style.txt deleted file mode 100644 index b6cd90b7d..000000000 --- a/train/templates/style.txt +++ /dev/null @@ -1 +0,0 @@ -a painting, art by [name] diff --git a/train/templates/style_filewords.txt b/train/templates/style_filewords.txt deleted file mode 100644 index 2b73960fc..000000000 --- a/train/templates/style_filewords.txt +++ /dev/null @@ -1 +0,0 @@ -photo of [filewords], art by [name] diff --git a/train/templates/subject.txt b/train/templates/subject.txt deleted file mode 100644 index 1c5a3ae93..000000000 --- a/train/templates/subject.txt +++ /dev/null @@ -1 +0,0 @@ -photo of [name] diff --git a/train/templates/subject_filewords.txt b/train/templates/subject_filewords.txt deleted file mode 100644 index be2cbdfe7..000000000 --- a/train/templates/subject_filewords.txt +++ /dev/null @@ -1 +0,0 @@ -photo of [name], [filewords] diff --git a/train/templates/unknown.txt b/train/templates/unknown.txt deleted file mode 100644 index 259b7ef34..000000000 --- a/train/templates/unknown.txt +++ /dev/null @@ -1 +0,0 @@ -[name] diff --git a/train/templates/unknown_filewords.txt b/train/templates/unknown_filewords.txt deleted file mode 100644 index 43a4ef17a..000000000 --- a/train/templates/unknown_filewords.txt +++ /dev/null @@ -1 +0,0 @@ -[name], [filewords] diff --git a/webui.py b/webui.py index 2d66d36f5..c9af90e75 100644 --- a/webui.py +++ b/webui.py @@ -118,7 +118,6 @@ def initialize(): shared.opts.onchange("temp_dir", gr_tempdir.on_tmpdir_changed) timer.startup.record("onchange") - modules.textual_inversion.textual_inversion.list_textual_inversion_templates() shared.reload_hypernetworks() shared.prompt_styles.reload() @@ -169,7 +168,7 @@ def load_model(): thread_refiner.join() shared.opts.onchange("sd_model_checkpoint", wrap_queued_call(lambda: modules.sd_models.reload_model_weights(op='model')), call=False) shared.opts.onchange("sd_model_refiner", wrap_queued_call(lambda: modules.sd_models.reload_model_weights(op='refiner')), call=False) - shared.opts.onchange("sd_te3", wrap_queued_call(lambda: modules.sd_models.reload_text_encoder()), call=False) + shared.opts.onchange("sd_text_encoder", wrap_queued_call(lambda: modules.sd_models.reload_text_encoder()), call=False) shared.opts.onchange("sd_model_dict", wrap_queued_call(lambda: modules.sd_models.reload_model_weights(op='dict')), call=False) shared.opts.onchange("sd_vae", wrap_queued_call(lambda: modules.sd_vae.reload_vae_weights()), call=False) shared.opts.onchange("sd_backend", wrap_queued_call(lambda: modules.sd_models.change_backend()), call=False) diff --git a/wiki b/wiki index 0db3587f4..c5c9e8998 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit 0db3587f4491680cbedb20282a6d3dd52e1d5769 +Subproject commit c5c9e89981c8bd35b51823315418a4a4864bb5e1
Type{page.title}
Alias{getattr(item, 'alias', 'N/A')}
Filename{item.filename}
Hash{getattr(item, 'hash', 'N/A')}
Size{round(stat.st_size/1024/1024, 2) if stat is not None else 'N/A'} MB
Last modified{datetime.fromtimestamp(stat.st_mtime) if stat is not None else 'N/A'}
Source URL{url}