diff --git a/.pylintrc b/.pylintrc index 113525778..977fd68e9 100644 --- a/.pylintrc +++ b/.pylintrc @@ -14,9 +14,8 @@ ignore-paths=/usr/lib/.*$, ^extensions/.*$, ^extensions-builtin/.*$, ^modules/dml/.*$, - ^modules/models/diffusion/.*$, - ^modules/xadapters/.*$, ^modules/tcd/.*$, + ^modules/xadapters/.*$, ignore-patterns= ignored-modules= jobs=0 diff --git a/.vscode/settings.json b/.vscode/settings.json index 37a12179f..428fc2335 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,12 +2,10 @@ "python.analysis.extraPaths": [ ".", "./modules", - "./repositories/BLIP", - "./repositories/CodeFormer", - "./repositories/k-diffusion", - "./repositories/taming-transformers", - "./repositories/stable-diffusion-stability-ai", - "./repositories/stable-diffusion-stability-ai/ldm" + "./repositories/blip", + "./repositories/codeformer", + "./repositories/ldm", + "./repositories/taming" ], "python.analysis.typeCheckingMode": "off", "editor.formatOnSave": false diff --git a/CHANGELOG.md b/CHANGELOG.md index f48e47b66..7cc802dd7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,30 +2,129 @@ ## TODO -- EDM samplers for Playground require `diffusers==0.27.0` -- StableCascade requires diffusers `kashif/diffusers.git@wuerstchen-v3` +- reference styles +- quick apply style -## Update for 2024-03-01 +## Update for 2024-03-19 +### Highlights 2024-03-19 + +New models: +- [Stable Cascade](https://github.com/Stability-AI/StableCascade) *Full* and *Lite* +- [Playground v2.5](https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic) +- [KOALA 700M](https://github.com/youngwanLEE/sdxl-koala) +- [Stable Video Diffusion XT 1.1](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt-1-1) +- [VGen](https://huggingface.co/ali-vilab/i2vgen-xl) + +New pipelines and features: +- Img2img using [LEdit++](https://leditsplusplus-project.static.hf.space/index.html), context aware method with image analysis and positive/negative prompt handling +- Trajectory Consistency Distillation [TCD](https://mhh0318.github.io/tcd) for processing in even less steps +- Visual Query & Answer using [moondream2](https://github.com/vikhyat/moondream) as an addition to standard interrogate methods +- **Face-HiRes**: simple built-in detailer for face refinements +- Even simpler outpaint: when resizing image, simply pick outpaint method and if image has different aspect ratio, blank areas will be outpainted! +- UI aspect-ratio controls and other UI improvements +- User controllable invisibile and visible watermarking +- Native composable LoRA + +What else? + +- **Reference models**: *Networks -> Models -> Reference*: All reference models now come with recommended settings that can be auto-applied if desired +- **Styles**: Not just for prompts! Styles can apply *generate parameters* as templates and can be used to *apply wildcards* to prompts +improvements, Additional API endpoints +- Given the high interest in [ZLUDA](https://github.com/vosen/ZLUDA) engine introduced in last release we've updated much more flexible/automatic install procedure (see [wiki](https://github.com/vladmandic/automatic/wiki/ZLUDA) for details) +- Plus Additional Improvements such as: Smooth tiling, Refine/HiRes workflow improvements, Control workflow + +Further details: +- For basic instructions, see [README](https://github.com/vladmandic/automatic/blob/master/README.md) +- For more details on all new features see full [CHANGELOG](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) +- For documentation, see [WiKi](https://github.com/vladmandic/automatic/wiki) +- [Discord](https://discord.com/invite/sd-next-federal-batch-inspectors-1101998836328697867) server + +### Full Changelog 2024-03-19 + +- [Stable Cascade](https://github.com/Stability-AI/StableCascade) *Full* and *Lite* + - large multi-stage high-quality model from warp-ai/wuerstchen team and released by stabilityai + - download using networks -> reference + - see [wiki](https://github.com/vladmandic/automatic/wiki/Stable-Cascade) for details - [Playground v2.5](https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic) - new model version from Playground: based on SDXL, but with some cool new concepts - download using networks -> reference - set sampler to *DPM++ 2M EDM* or *Euler EDM* - [KOALA 700M](https://github.com/youngwanLEE/sdxl-koala) - - another very fast & light sd-xl model where original unet was compressed and distilled to 54% of original size + - another very fast & light sdxl model where original unet was compressed and distilled to 54% of original size - download using networks -> reference - *note* to download fp16 variant (recommended), set settings -> diffusers -> preferred model variant -- **Image2Video** - - new module for creating videos from images - - simply enable from *img2img -> scripts -> image2video* - - based on [VGen](https://huggingface.co/ali-vilab/i2vgen-xl) -- **VQA** visual question & answer in interrogate - - with support for multiple variations of base models: *GIT, BLIP, ViLT, PIX* +- [LEdit++](https://leditsplusplus-project.static.hf.space/index.html) + - context aware img2img method with image analysis and positive/negative prompt handling + - enable via img2img -> scripts -> ledit + - uses following params from standard img2img: cfg scale (recommended ~3), steps (recommended ~50), denoise strength (recommended ~0.7) + - can use postive and/or negative prompt to guide editing process + - positive prompt: what to enhance, strength and threshold for auto-masking + - negative prompt: what to remove, strength and threshold for auto-masking + - *note*: not compatible with model offloading - **Second Pass / Refine** - independent upscale and hires options: run hires without upscale or upscale without hires or both - upscale can now run 0.1-8.0 scale and will also run if enabled at 1.0 to allow for upscalers that simply improve image quality - update ui section to reflect changes - *note*: behavior using backend:original is unchanged for backwards compatibilty +- **Visual Query** visual query & answer in process tab + - go to process -> visual query + - ask your questions, e.g. "describe the image", "what is behind the subject", "what are predominant colors of the image?" + - primary model is [moondream2](https://github.com/vikhyat/moondream), a *tiny* 1.86B vision language model + *note*: its still 3.7GB in size, so not really tiny + - additional support for multiple variations of several base models: *GIT, BLIP, ViLT, PIX*, sizes range from 0.3 to 1.7GB +- **Video** + - **Image2Video** + - new module for creating videos from images + - simply enable from *img2img -> scripts -> image2video* + - model is auto-downloaded on first use + - based on [VGen](https://huggingface.co/ali-vilab/i2vgen-xl) + - **Stable Video Diffusion** + - updated with *SVD 1.0, SVD XT 1.0 and SVD XT 1.1* + - models are auto-downloaded on first use + - simply enable from *img2img -> scripts -> stable video diffusion* + - for svd 1.0, use frames=~14, for xt models use frames=~25 +- **Composable LoRA**, thanks @AI-Casanova + - control lora strength for each step + for example: `` means strength=0.1 for step at 0% and intepolate towards strength=0.9 for step at 100% + - *note*: this is a very experimental feature and may not work as expected +- **Control** + - added *refiner/hires* workflows + - added resize methods to before/after/mask: fixed, crop, fill +- **Styles**: styles are not just for prompts! + - new styles editor: *networks -> styles -> edit* + - styles can apply generate parameters, for example to have a style that enables and configures hires: + parameters=`enable_hr: True, hr_scale: 2, hr_upscaler: Latent Bilinear antialias, hr_sampler_name: DEIS, hr_second_pass_steps: 20, denoising_strength: 0.5` + - styles can apply wildcards to prompts, for example: + wildcards=`movie=mad max, dune, star wars, star trek; intricate=realistic, color sketch, pencil sketch, intricate` + - as usual, you can apply any number of styles so you can choose which settings are applied and in which order and which wildcards are used +- **UI** + - *aspect-ratio** add selector and lock to width/height control + allowed aspect ration can be configured via *settings -> user interface* + - *interrogate* tab is now merged into *process* tab + - *image viewer* now displays image metadata + - *themes* improve on-the-fly switching + - *log monitor* flag server warnings/errors and overall improve display + - *control* separate processor settings from unit settings +- **Face HiRes** + - new *face restore* option, works similar to well-known *adetailer* by running an inpaint on detected faces but with just a checkbox to enable/disable + - set as default face restorer in settings -> postprocessing + - disabled by default, to enable simply check *face restore* in your generate advanced settings + - strength, steps and sampler are set using by hires section in refine menu + - strength can be overriden in settings -> postprocessing + - will use secondary prompt and secondary negative prompt if present in refine +- **Watermarking** + - SD.Next disables all known watermarks in models, but does allow user to set custom watermark + - see *settings -> image options -> watermarking* + - invisible watermark: using steganogephy + - image watermark: overlaid on top of image +- **Reference models** + - additional reference models available for single-click download & run: + *Stable Cascade, Stable Cascade lite, Stable Video Diffusion XT 1.1* + - reference models will now download *fp16* variation by default + - reference models will print recommended settings to log if present + - new setting in extra network: *use reference values when available* + disabled by default, if enabled will force use of reference settings for models that have them - **Samplers** - [TCD](https://mhh0318.github.io/tcd/): Trajectory Consistency Distillation new sampler that produces consistent results in a very low number of steps (comparable to LCM but without reliance on LoRA) @@ -37,22 +136,56 @@ - **FaceID** extend support for LoRA, HyperTile and FreeU, thanks @Trojaner - **Tiling** now extends to both Unet and VAE producing smoother outputs, thanks @AI-Casanova - new setting in image options: *include mask in output* + - improved params parsing from from prompt string and styles - default theme updates and additional built-in theme *black-gray* - - add **ROCm** 6.0 nightly option to installer, thanks @jicka - support models with their own YAML model config files - support models with their own JSON per-component config files, for example: `playground-v2.5_vae.config` + - prompt can have comments enclosed with `/*` and `*/` + comments are extracted from prompt and added to image metadata +- **ROCm** + - add **ROCm** 6.0 nightly option to installer, thanks @jicka + - add *flash attention* support for rdna3, thanks @Disty0 + install flash_attn package for rdna3 manually and enable *flash attention* from *compute settings* + to install flash_attn, activate the venv and run `pip install -U git+https://github.com/ROCm/flash-attention@howiejay/navi_support` +- **IPEX** + - disabled IPEX Optimize by default +- **API** + - add preprocessor api endpoints + GET:`/sdapi/v1/preprocessors`, POST:`/sdapi/v1/preprocess`, sample script:`cli/simple-preprocess.py` + - add masking api endpoints + GET:`/sdapi/v1/masking`, POST:`/sdapi/v1/mask`, sample script:`cli/simple-mask.py` - **Internal** + - improved vram efficiency for model compile, thanks @Disty0 + - **stable-fast** compatibility with torch 2.2.1 - remove obsolete textual inversion training code - remove obsolete hypernetworks training code +- **Refiner** validated workflows: + - Fully functional: SD15 + SD15, SDXL + SDXL, SDXL + SDXL-R + - Functional, but result is not as good: SD15 + SDXL, SDXL + SD15, SD15 + SDXL-R +- **SDXL Lightning** models just-work, just makes sure to set CFG Scale to 0 + and choose a best-suited sampler, it may not be the one you're used to (e.g. maybe even basic Euler) - **Fixes** - - improve model cpu offload compatibility - - improve model sequential offload compatibility - - improve bfloat16 compatibility + - improve *model cpu offload* compatibility + - improve *model sequential offload* compatibility + - improve *bfloat16* compatibility + - improve *xformers* installer to match cuda version and install triton - fix extra networks refresh - - fix sdp memory attention in backend original + - fix *sdp memory attention* in backend original - fix autodetect sd21 models - fix api info endpoint - - fix sampler eta in xyz grid, thanks @AI-Casanova + - fix *sampler eta* in xyz grid, thanks @AI-Casanova + - fix *requires_aesthetics_score* errors + - fix t2i-canny + - fix *differenital diffusion* for manual mask, thanks @23pennies + - fix ipadapter apply/unapply on batch runs + - fix control with multiple units and override images + - fix control with hires + - fix control-lllite + - fix font fallback, thanks @NetroScript + - update civitai downloader to handler new metadata + - improve control error handling + - use default model variant if specified variant doesnt exist + - use diffusers lora load override for *lcm/tcd/turbo loras* - exception handler around vram memory stats gather - improve ZLUDA installer with `--use-zluda` cli param, thanks @lshqqytiger @@ -61,7 +194,7 @@ Only 3 weeks since last release, but here's another feature-packed one! This time release schedule was shorter as we wanted to get some of the fixes out faster. -### Highlights +### Highlights 2024-02-22 - **IP-Adapters** & **FaceID**: multi-adapter and multi-image suport - New optimization engines: [DeepCache](https://github.com/horseee/DeepCache), [ZLUDA](https://github.com/vosen/ZLUDA) and **Dynamic Attention Slicing** @@ -293,7 +426,7 @@ Further details: - full implementation for *SD15* and *SD-XL*, to use simply select from *Scripts* **Base** (93MB) uses *InsightFace* to generate face embeds and *OpenCLIP-ViT-H-14* (2.5GB) as image encoder **Plus** (150MB) uses *InsightFace* to generate face embeds and *CLIP-ViT-H-14-laion2B* (3.8GB) as image encoder - **SXDL** (1022MB) uses *InsightFace* to generate face embeds and *OpenCLIP-ViT-bigG-14* (3.7GB) as image encoder + **SDXL** (1022MB) uses *InsightFace* to generate face embeds and *OpenCLIP-ViT-bigG-14* (3.7GB) as image encoder - [FaceSwap](https://github.com/deepinsight/insightface/blob/master/examples/in_swapper/README.md) - face swap performs face swapping at the end of generation - based on InsightFace in-swapper @@ -310,7 +443,7 @@ Further details: - [IPAdapter](https://huggingface.co/h94/IP-Adapter) - additional models for *SD15* and *SD-XL*, to use simply select from *Scripts*: **SD15**: Base, Base ViT-G, Light, Plus, Plus Face, Full Face - **SDXL**: Base SXDL, Base ViT-H SXDL, Plus ViT-H SXDL, Plus Face ViT-H SXDL + **SDXL**: Base SDXL, Base ViT-H SDXL, Plus ViT-H SDXL, Plus Face ViT-H SDXL - enable use via api, thanks @trojaner - [Segmind SegMoE](https://github.com/segmind/segmoe) - initial support for reference models diff --git a/README.md b/README.md index 2c78667de..f4dd1f514 100644 --- a/README.md +++ b/README.md @@ -20,13 +20,14 @@ All individual features are not listed here, instead check [ChangeLog](CHANGELOG - Multiple backends! ▹ **Diffusers | Original** - Multiple diffusion models! - ▹ **Stable Diffusion 1.5/2.1 | SD-XL | LCM | Segmind | Kandinsky | Pixart-α | Würstchen | aMUSEd | DeepFloyd IF | UniDiffusion | SD-Distilled | BLiP Diffusion | etc.** + ▹ **Stable Diffusion 1.5/2.1 | SD-XL | LCM | Segmind | Kandinsky | Pixart-α | Stable Cascade | Würstchen | aMUSEd | DeepFloyd IF | UniDiffusion | SD-Distilled | BLiP Diffusion | KOALA | etc.** - Built-in Control for Text, Image, Batch and video processing! ▹ **ControlNet | ControlNet XS | Control LLLite | T2I Adapters | IP Adapters** - Multiplatform! - ▹ **Windows | Linux | MacOS with CPU | nVidia | AMD | IntelArc | DirectML | OpenVINO | ONNX+Olive** + ▹ **Windows | Linux | MacOS with CPU | nVidia | AMD | IntelArc | DirectML | OpenVINO | ONNX+Olive | ZLUDA** - Platform specific autodetection and tuning performed on install -- Optimized processing with latest `torch` developments with built-in support for `torch.compile` and multiple compile backends +- Optimized processing with latest `torch` developments with built-in support for `torch.compile` + and multiple compile backends: *Triton, ZLUDA, StableFast, DeepCache, OpenVINO, NNCF, IPEX* - Improved prompt parser - Enhanced *Lora*/*LoCon*/*Lyco* code supporting latest trends in training - Built-in queue management @@ -62,8 +63,10 @@ Additional models will be added as they become available and there is public int - [RunwayML Stable Diffusion](https://github.com/Stability-AI/stablediffusion/) 1.x and 2.x *(all variants)* - [StabilityAI Stable Diffusion XL](https://github.com/Stability-AI/generative-models) -- [StabilityAI Stable Video Diffusion](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid) Base and XT +- [StabilityAI Stable Video Diffusion](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid) Base, XT 1.0, XT 1.1 - [LCM: Latent Consistency Models](https://github.com/openai/consistency_models) +- [Playground](https://huggingface.co/playgroundai/playground-v2-256px-base) *v1, v2 256, v2 512, v2 1024 and latest v2.5* +- [Stable Cascade](https://github.com/Stability-AI/StableCascade) *Full* and *Lite* - [aMUSEd 256](https://huggingface.co/amused/amused-256) 256 and 512 - [Segmind Vega](https://huggingface.co/segmind/Segmind-Vega) - [Segmind SSD-1B](https://huggingface.co/segmind/SSD-1B) @@ -71,12 +74,13 @@ Additional models will be added as they become available and there is public int - [Kandinsky](https://github.com/ai-forever/Kandinsky-2) *2.1 and 2.2 and latest 3.0* - [PixArt-α XL 2](https://github.com/PixArt-alpha/PixArt-alpha) *Medium and Large* - [Warp Wuerstchen](https://huggingface.co/blog/wuertschen) -- [Playground](https://huggingface.co/playgroundai/playground-v2-256px-base) *v1, v2 256, v2 512, v2 1024* - [Tsinghua UniDiffusion](https://github.com/thu-ml/unidiffuser) - [DeepFloyd IF](https://github.com/deep-floyd/IF) *Medium and Large* - [ModelScope T2V](https://huggingface.co/damo-vilab/text-to-video-ms-1.7b) - [Segmind SD Distilled](https://huggingface.co/blog/sd_distillation) *(all variants)* - [BLIP-Diffusion](https://dxli94.github.io/BLIP-Diffusion-website/) +- [KOALA 700M](https://github.com/youngwanLEE/sdxl-koala) +- [VGen](https://huggingface.co/ali-vilab/i2vgen-xl) Also supported are modifiers such as: diff --git a/TODO.md b/TODO.md index 79ab68161..95641f008 100644 --- a/TODO.md +++ b/TODO.md @@ -9,7 +9,7 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma - ipadapter masking: - x-adapter: - async lowvram: -- init latents: variations, tiling, img2img +- init latents: variations, img2img - diffusers public callbacks - remove builtin: controlnet - remove builtin: image-browser @@ -18,5 +18,3 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma - second pass: - control api -- masking api -- preprocess api diff --git a/cli/clone.py b/cli/clone.py new file mode 100755 index 000000000..2ba0851d3 --- /dev/null +++ b/cli/clone.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +import os +import logging +import git +from rich import console, progress + + +class GitRemoteProgress(git.RemoteProgress): + OP_CODES = ["BEGIN", "CHECKING_OUT", "COMPRESSING", "COUNTING", "END", "FINDING_SOURCES", "RECEIVING", "RESOLVING", "WRITING"] + OP_CODE_MAP = { getattr(git.RemoteProgress, _op_code): _op_code for _op_code in OP_CODES } + + def __init__(self, url, folder) -> None: + super().__init__() + self.url = url + self.folder = folder + self.progressbar = progress.Progress( + progress.SpinnerColumn(), + progress.TextColumn("[cyan][progress.description]{task.description}"), + progress.BarColumn(), + progress.TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + progress.TimeRemainingColumn(), + progress.TextColumn("[yellow]<{task.fields[url]}>"), + progress.TextColumn("{task.fields[message]}"), + console=console.Console(), + transient=False, + ) + self.progressbar.start() + self.active_task = None + + def __del__(self) -> None: + self.progressbar.stop() + + @classmethod + def get_curr_op(cls, op_code: int) -> str: + op_code_masked = op_code & cls.OP_MASK + return cls.OP_CODE_MAP.get(op_code_masked, "?").title() + + def update(self, op_code: int, cur_count: str | float, max_count: str | float | None = None, message: str | None = "") -> None: + if op_code & self.BEGIN: + self.curr_op = self.get_curr_op(op_code) # pylint: disable=attribute-defined-outside-init + self.active_task = self.progressbar.add_task(description=self.curr_op, total=max_count, message=message, url=self.url) + self.progressbar.update(task_id=self.active_task, completed=cur_count, message=message) + if op_code & self.END: + self.progressbar.update(task_id=self.active_task, message=f"[bright_black]{message}") + + +def clone(url: str, folder: str): + git.Repo.clone_from( + url=url, + to_path=folder, + progress=GitRemoteProgress(url=url, folder=folder), + multi_options=['--config core.compression=0', '--config core.loosecompression=0', '--config pack.window=0'], + allow_unsafe_options=True, + depth=1, + ) + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser(description = 'downloader') + parser.add_argument('--url', required=True, help="download url, required") + parser.add_argument('--folder', required=False, help="output folder, default: autodetect") + args = parser.parse_args() + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s") + log = logging.getLogger(__name__) + try: + if not args.url.startswith('http'): + raise ValueError(f'invalid url: {args.url}') + f = args.url.split('/')[-1].split('.')[0] if args.folder is None else args.folder + if os.path.exists(f): + raise FileExistsError(f'folder already exists: {f}') + log.info(f'Clone start: url={args.url} folder={f}') + clone(url=args.url, folder=f) + log.info(f'Clone complete: url={args.url} folder={f}') + except KeyboardInterrupt: + log.warning(f'Clone cancelled: url={args.url} folder={f}') + except Exception as e: + log.error(f'Clone: url={args.url} {e}') diff --git a/cli/image-exif.py b/cli/image-exif.py index 43ea8df18..fcbbb44c8 100755 --- a/cli/image-exif.py +++ b/cli/image-exif.py @@ -4,10 +4,62 @@ import io import re import sys +import json from PIL import Image, ExifTags, TiffImagePlugin, PngImagePlugin from rich import print # pylint: disable=redefined-builtin +def unquote(text): + if len(text) == 0 or text[0] != '"' or text[-1] != '"': + return text + try: + return json.loads(text) + except Exception: + return text + + +def parse_generation_parameters(infotext): # copied from modules.generation_parameters_copypaste + if not isinstance(infotext, str): + return {} + + re_param = re.compile(r'\s*([\w ]+):\s*("(?:\\"[^,]|\\"|\\|[^\"])+"|[^,]*)(?:,|$)') # multi-word: value + re_size = re.compile(r"^(\d+)x(\d+)$") # int x int + sanitized = infotext.replace('prompt:', 'Prompt:').replace('negative prompt:', 'Negative prompt:').replace('Negative Prompt', 'Negative prompt') # cleanup everything in brackets so re_params can work + sanitized = re.sub(r'<[^>]*>', lambda match: ' ' * len(match.group()), sanitized) + sanitized = re.sub(r'\([^)]*\)', lambda match: ' ' * len(match.group()), sanitized) + sanitized = re.sub(r'\{[^}]*\}', lambda match: ' ' * len(match.group()), sanitized) + + params = dict(re_param.findall(sanitized)) + params = { k.strip():params[k].strip() for k in params if k.lower() not in ['hashes', 'lora', 'embeddings', 'prompt', 'negative prompt']} # remove some keys + first_param = next(iter(params)) if params else None + params_idx = sanitized.find(f'{first_param}:') if first_param else -1 + negative_idx = infotext.find("Negative prompt:") + + prompt = infotext[:params_idx] if negative_idx == -1 else infotext[:negative_idx] # prompt can be with or without negative prompt + negative = infotext[negative_idx:params_idx] if negative_idx >= 0 else '' + + for k, v in params.copy().items(): # avoid dict-has-changed + if len(v) > 0 and v[0] == '"' and v[-1] == '"': + v = unquote(v) + m = re_size.match(v) + if v.replace('.', '', 1).isdigit(): + params[k] = float(v) if '.' in v else int(v) + elif v == "True": + params[k] = True + elif v == "False": + params[k] = False + elif m is not None: + params[f"{k}-1"] = int(m.group(1)) + params[f"{k}-2"] = int(m.group(2)) + elif k == 'VAE' and v == 'TAESD': + params["Full quality"] = False + else: + params[k] = v + params["Prompt"] = prompt.replace('Prompt:', '').strip() + params["Negative prompt"] = negative.replace('Negative prompt:', '').strip() + return params + + class Exif: # pylint: disable=single-string-used-for-slots __slots__ = ('__dict__') # pylint: disable=superfluous-parens def __init__(self, image = None): @@ -67,34 +119,9 @@ def decode(self, s: bytes): return None def parse(self): - re_param_code = r'\s*([\w ]+):\s*("(?:\\"[^,]|\\"|\\|[^\"])+"|[^,]*)(?:,|$)' - re_param = re.compile(re_param_code) x = self.exif.pop('parameters', None) or self.exif.pop('UserComment', None) - res = {} - if x is None: - return res - remaining = x.replace('\n', ' ').strip() - if len(remaining) == 0: - return res - remaining = x[7:] if x.startswith('Prompt: ') else x - remaining = x[11:] if x.startswith('parameters: ') else x - if 'Steps: ' in remaining and 'Negative prompt: ' not in remaining: - remaining = remaining.replace('Steps: ', 'Negative prompt: Steps: ') - prompt, remaining = remaining.strip().split('Negative prompt: ', maxsplit=1) if 'Negative prompt: ' in remaining else (remaining, '') - res["Prompt"] = prompt.strip() - negative, remaining = remaining.strip().split('Steps: ', maxsplit=1) if 'Steps: ' in remaining else (remaining, None) - res["Negative prompt"] = negative.strip() - if remaining is None: - return res - remaining = f'Steps: {remaining}' - for k, v in re_param.findall(remaining.strip()): - if v.isdigit(): - res[k] = float(v) if '.' in v else int(v) - else: - res[k] = v - from types import SimpleNamespace - ns = SimpleNamespace(**res) - return ns + res = parse_generation_parameters(x) + return res def get_bytes(self): ifd = TiffImagePlugin.ImageFileDirectory_v2() diff --git a/cli/install-sf.py b/cli/install-sf.py index eb0d0b577..894e36720 100755 --- a/cli/install-sf.py +++ b/cli/install-sf.py @@ -3,7 +3,7 @@ import re import sys -torch_supported = ['211', '212'] +torch_supported = ['211', '212','220','221'] cuda_supported = ['cu118', 'cu121'] python_supported = ['39', '310', '311'] repo_url = 'https://github.com/chengzeyi/stable-fast' diff --git a/cli/simple-mask.py b/cli/simple-mask.py new file mode 100755 index 000000000..2ea12234e --- /dev/null +++ b/cli/simple-mask.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python +import io +import os +import time +import base64 +import logging +import argparse +import requests +import urllib3 +from PIL import Image + + +sd_url = os.environ.get('SDAPI_URL', "http://127.0.0.1:7860") +sd_username = os.environ.get('SDAPI_USR', None) +sd_password = os.environ.get('SDAPI_PWD', None) + + +logging.basicConfig(level = logging.INFO, format = '%(asctime)s %(levelname)s: %(message)s') +log = logging.getLogger(__name__) +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + + +def auth(): + if sd_username is not None and sd_password is not None: + return requests.auth.HTTPBasicAuth(sd_username, sd_password) + return None + + +def get(endpoint: str, dct: dict = None): + req = requests.get(f'{sd_url}{endpoint}', json=dct, timeout=300, verify=False, auth=auth()) + if req.status_code != 200: + return { 'error': req.status_code, 'reason': req.reason, 'url': req.url } + else: + return req.json() + + +def post(endpoint: str, dct: dict = None): + req = requests.post(f'{sd_url}{endpoint}', json = dct, timeout=300, verify=False, auth=auth()) + if req.status_code != 200: + return { 'error': req.status_code, 'reason': req.reason, 'url': req.url } + else: + return req.json() + + +def info(args): # pylint: disable=redefined-outer-name + t0 = time.time() + with open(args.input, 'rb') as f: + image = base64.b64encode(f.read()).decode() + if args.mask: + with open(args.mask, 'rb') as f: + mask = base64.b64encode(f.read()).decode() + else: + mask = None + options = get('/sdapi/v1/masking') + log.info(f'options: {options}') + req = { + 'image': image, + 'mask': mask, + 'type': args.type or 'Composite', + 'params': { 'auto_mask': 'Grayscale' if mask is None else None }, + } + data = post('/sdapi/v1/mask', req) + t1 = time.time() + if 'mask' in data: + b64 = data['mask'].split(',',1)[0] + image = Image.open(io.BytesIO(base64.b64decode(b64))) + log.info(f'received image: size={image.size} time={t1-t0:.2f}') + if args.output: + image.save(args.output) + log.info(f'saved image: fn={args.output}') + else: + log.info(f'received: {data} time={t1-t0:.2f}') + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description = 'simple-info') + parser.add_argument('--input', required=True, help='input image') + parser.add_argument('--mask', required=False, help='input mask') + parser.add_argument('--type', required=False, help='output mask type') + parser.add_argument('--output', required=False, help='output image') + args = parser.parse_args() + log.info(f'info: {args}') + info(args) diff --git a/cli/simple-preprocess.py b/cli/simple-preprocess.py new file mode 100755 index 000000000..2b96750bf --- /dev/null +++ b/cli/simple-preprocess.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python +import io +import os +import time +import base64 +import logging +import argparse +import requests +import urllib3 +from PIL import Image + + +sd_url = os.environ.get('SDAPI_URL', "http://127.0.0.1:7860") +sd_username = os.environ.get('SDAPI_USR', None) +sd_password = os.environ.get('SDAPI_PWD', None) + + +logging.basicConfig(level = logging.INFO, format = '%(asctime)s %(levelname)s: %(message)s') +log = logging.getLogger(__name__) +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + + +def auth(): + if sd_username is not None and sd_password is not None: + return requests.auth.HTTPBasicAuth(sd_username, sd_password) + return None + + +def get(endpoint: str, dct: dict = None): + req = requests.get(f'{sd_url}{endpoint}', json=dct, timeout=300, verify=False, auth=auth()) + if req.status_code != 200: + return { 'error': req.status_code, 'reason': req.reason, 'url': req.url } + else: + return req.json() + + +def post(endpoint: str, dct: dict = None): + req = requests.post(f'{sd_url}{endpoint}', json = dct, timeout=300, verify=False, auth=auth()) + if req.status_code != 200: + return { 'error': req.status_code, 'reason': req.reason, 'url': req.url } + else: + return req.json() + + +def info(args): # pylint: disable=redefined-outer-name + t0 = time.time() + with open(args.input, 'rb') as f: + content = f.read() + models = get('/sdapi/v1/preprocessors') + log.info(f'models: {models}') + req = { + 'model': args.model or 'Canny', + 'image': base64.b64encode(content).decode(), + 'config': { 'low_threshold': 50 }, + } + data = post('/sdapi/v1/preprocess', req) + t1 = time.time() + if 'image' in data: + b64 = data['image'].split(',',1)[0] + image = Image.open(io.BytesIO(base64.b64decode(b64))) + log.info(f'received image: size={image.size} time={t1-t0:.2f}') + if args.output: + image.save(args.output) + log.info(f'saved image: fn={args.output}') + else: + log.info(f'received: {data} time={t1-t0:.2f}') + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description = 'simple-info') + parser.add_argument('--input', required=True, help='input image') + parser.add_argument('--model', required=True, help='preprocessing model') + parser.add_argument('--output', required=False, help='output image') + args = parser.parse_args() + log.info(f'info: {args}') + info(args) diff --git a/extensions-builtin/Lora/extra_networks_lora.py b/extensions-builtin/Lora/extra_networks_lora.py index 57df5b010..cb6989fb7 100644 --- a/extensions-builtin/Lora/extra_networks_lora.py +++ b/extensions-builtin/Lora/extra_networks_lora.py @@ -1,8 +1,43 @@ import time +import numpy as np +import re import networks import lora_patches from modules import extra_networks, shared +# from https://github.com/cheald/sd-webui-loractl/blob/master/loractl/lib/utils.py +def get_stepwise(param, step, steps): + def sorted_positions(raw_steps): + steps = [[float(s.strip()) for s in re.split("[@~]", x)] + for x in re.split("[,;]", str(raw_steps))] + # If we just got a single number, just return it + if len(steps[0]) == 1: + return steps[0][0] + + # Add implicit 1s to any steps which don't have a weight + steps = [[s[0], s[1] if len(s) == 2 else 1] for s in steps] + + # Sort by index + steps.sort(key=lambda k: k[1]) + + steps = [list(v) for v in zip(*steps)] + return steps + + def calculate_weight(m, step, max_steps, step_offset=2): + if isinstance(m, list): + if m[1][-1] <= 1.0: + if max_steps > 0: + step = (step) / (max_steps - step_offset) + else: + step = 1.0 + else: + step = step + v = np.interp(step, m[1], m[0]) + return v + else: + return m + return calculate_weight(sorted_positions(param), step, steps) + class ExtraNetworkLora(extra_networks.ExtraNetwork): @@ -14,7 +49,7 @@ def __init__(self): """mapping of network names to the number of errors the network had during operation""" - def activate(self, p, params_list): + def activate(self, p, params_list, step=0): t0 = time.time() self.errors.clear() if len(params_list) > 0: @@ -29,13 +64,21 @@ def activate(self, p, params_list): for params in params_list: assert params.items names.append(params.positional[0]) - te_multiplier = float(params.positional[1]) if len(params.positional) > 1 else 1.0 - te_multiplier = float(params.named.get("te", te_multiplier)) - unet_multiplier = [float(params.positional[2]) if len(params.positional) > 2 else te_multiplier] * 3 - unet_multiplier = [float(params.named.get("unet", unet_multiplier[0]))] * 3 - unet_multiplier[0] = float(params.named.get("in", unet_multiplier[0])) - unet_multiplier[1] = float(params.named.get("mid", unet_multiplier[1])) - unet_multiplier[2] = float(params.named.get("out", unet_multiplier[2])) + te_multiplier = params.named.get("te", params.positional[1] if len(params.positional) > 1 else 1.0) + if isinstance(te_multiplier, str) and "@" in te_multiplier: + te_multiplier = get_stepwise(te_multiplier, step, p.steps) + else: + te_multiplier = float(te_multiplier) + unet_multiplier = [params.positional[2] if len(params.positional) > 2 else te_multiplier] * 3 + unet_multiplier = [params.named.get("unet", unet_multiplier[0])] * 3 + unet_multiplier[0] = params.named.get("in", unet_multiplier[0]) + unet_multiplier[1] = params.named.get("mid", unet_multiplier[1]) + unet_multiplier[2] = params.named.get("out", unet_multiplier[2]) + for i in range(len(unet_multiplier)): + if isinstance(unet_multiplier[i], str) and "@" in unet_multiplier[i]: + unet_multiplier[i] = get_stepwise(unet_multiplier[i], step, p.steps) + else: + unet_multiplier[i] = float(unet_multiplier[i]) dyn_dim = int(params.positional[3]) if len(params.positional) > 3 else None dyn_dim = int(params.named["dyn"]) if "dyn" in params.named else dyn_dim te_multipliers.append(te_multiplier) @@ -57,7 +100,7 @@ def activate(self, p, params_list): network_hashes.append(f"{alias}: {shorthash}") if network_hashes: p.extra_generation_params["Lora hashes"] = ", ".join(network_hashes) - if len(names) > 0: + if len(names) > 0 and step == 0: shared.log.info(f'LoRA apply: {names} patch={t1-t0:.2f} load={t2-t1:.2f}') elif self.active: self.active = False diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py index a3113f54b..5a50964ad 100644 --- a/extensions-builtin/Lora/networks.py +++ b/extensions-builtin/Lora/networks.py @@ -193,10 +193,13 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No try: if recompile_model: shared.compiled_model_state.lora_model.append(f"{name}:{te_multipliers[i] if te_multipliers else 1.0}") - if shared.backend == shared.Backend.DIFFUSERS and shared.opts.lora_force_diffusers: # OpenVINO only works with Diffusers LoRa loading. - # or getattr(network_on_disk, 'shorthash', '').lower() == 'aaebf6360f7d' # sd15-lcm - # or getattr(network_on_disk, 'shorthash', '').lower() == '3d18b05e4f56' # sdxl-lcm - # or getattr(network_on_disk, 'shorthash', '').lower() == '813ea5fb1c67' # turbo sdxl-turbo + shorthash = getattr(network_on_disk, 'shorthash', '').lower() + if shared.backend == shared.Backend.DIFFUSERS and (shared.opts.lora_force_diffusers # OpenVINO only works with Diffusers LoRa loading. + or shorthash == 'aaebf6360f7d' # sd15-lcm + or shorthash == '3d18b05e4f56' # sdxl-lcm + or shorthash == 'b71dcb732467' # sdxl-tcd + or shorthash == '813ea5fb1c67' # sdxl-turbo + ): net = load_diffusers(name, network_on_disk, lora_scale=te_multipliers[i] if te_multipliers else 1.0) else: net = load_network(name, network_on_disk) @@ -226,7 +229,11 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No if recompile_model: shared.log.info("LoRA recompiling model") backup_lora_model = shared.compiled_model_state.lora_model - sd_models_compile.compile_diffusers(shared.sd_model) + if shared.opts.nncf_compress_weights and not (shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx"): + shared.sd_model = sd_models_compile.nncf_compress_weights(shared.sd_model) + if shared.opts.cuda_compile: + shared.sd_model = sd_models_compile.compile_diffusers(shared.sd_model) + shared.compiled_model_state.lora_model = backup_lora_model diff --git a/extensions-builtin/sd-webui-agent-scheduler b/extensions-builtin/sd-webui-agent-scheduler index 39159f2d5..721a36f59 160000 --- a/extensions-builtin/sd-webui-agent-scheduler +++ b/extensions-builtin/sd-webui-agent-scheduler @@ -1 +1 @@ -Subproject commit 39159f2d52f53a5cf7ba0dfd1a0f085cff3e71e5 +Subproject commit 721a36f59507e625c9982397c22edd7c14a0f62a diff --git a/extensions-builtin/sd-webui-controlnet b/extensions-builtin/sd-webui-controlnet index ecd33eb82..2091b6fb2 160000 --- a/extensions-builtin/sd-webui-controlnet +++ b/extensions-builtin/sd-webui-controlnet @@ -1 +1 @@ -Subproject commit ecd33eb82b25c77cec3185cc9647db904fcb7c03 +Subproject commit 2091b6fb21d9c76becb2a8860c8d2975ad3e428a diff --git a/html/locale_en.json b/html/locale_en.json index 85fbd3570..48bf19e90 100644 --- a/html/locale_en.json +++ b/html/locale_en.json @@ -165,7 +165,7 @@ {"id":"","label":"Separate Init Image","localized":"","hint":"Creates an additional window next to Control input labeled Init input, so you can have a separate image for both Control operations and an init source."} ], "process tab": [ - {"id":"","label":"Single Image","localized":"","hint":"Process single image"}, + {"id":"","label":"Process Image","localized":"","hint":"Process single image"}, {"id":"","label":"Process Batch","localized":"","hint":"Process batch of images"}, {"id":"","label":"Process Folder","localized":"","hint":"Process all images in a folder"}, {"id":"","label":"Scale by","localized":"","hint":"Use this tab to resize the source image(s) by a chosen factor"}, diff --git a/html/logo-wm.png b/html/logo-wm.png new file mode 100644 index 000000000..1bfdbf53a Binary files /dev/null and b/html/logo-wm.png differ diff --git a/html/reference.json b/html/reference.json index e67014603..eca499bc8 100644 --- a/html/reference.json +++ b/html/reference.json @@ -1,206 +1,229 @@ { - "DreamShaper SD 1.5 v8": { + + "DreamShaper SD v8": { + "original": true, "path": "dreamshaper_8.safetensors@https://civitai.com/api/download/models/128713", - "desc": "Showcase finetuned model based on Stable diffusion 1.5", "preview": "dreamshaper_8.jpg", - "original": true + "desc": "Showcase finetuned model based on Stable diffusion 1.5", + "extras": "width: 512, height: 512, sampler: DEIS, steps: 20, cfg_scale: 6.0" }, - "DreamShaper SD XL Turbo": { - "path": "dreamshaperXL_turboDpmppSDE.safetensors@https://civitai.com/api/download/models/251662", + "Dreamshaper SD v7 LCM": { + "path": "SimianLuo/LCM_Dreamshaper_v7", + "preview": "SimianLuo--LCM_Dreamshaper_v7.jpg", + "desc": "Latent Consistencey Models enable swift inference with minimal steps on any pre-trained LDMs, including Stable Diffusion. By distilling classifier-free guidance into the model's input, LCM can generate high-quality images in very short inference time. LCM can generate quality images in as few as 3-4 steps, making it blazingly fast.", + "extras": "width: 512, height: 512, sampler: LCM, steps: 4, cfg_scale: 0.0" + }, + "DreamShaper SD-XL Turbo": { + "path": "dreamshaperXL_v21TurboDPMSDE.safetensors@https://civitai.com/api/download/models/351306", + "preview": "dreamshaperXL_v21TurboDPMSDE.jpg", "desc": "Showcase finetuned model based on Stable diffusion XL", - "preview": "dreamshaperXL_turboDpmppSDE.jpg" + "extras": "width: 1024, height: 1024, sampler: DPM SDE, steps: 8, cfg_scale: 2.0" }, - "Juggernaut Reborn": { + "Juggernaut SD Reborn": { + "original": true, "path": "juggernaut_reborn.safetensors@https://civitai.com/api/download/models/274039", - "desc": "Showcase finetuned model based on Stable diffusion 1.5", "preview": "juggernaut_reborn.jpg", - "original": true + "desc": "Showcase finetuned model based on Stable diffusion 1.5", + "extras": "width: 512, height: 512, sampler: DEIS, steps: 20, cfg_scale: 6.0" }, - "Juggernaut XL v7 RunDiffusion": { - "path": "juggernautXL_v7Rundiffusion.safetensors@https://civitai.com/api/download/models/240840", + "Juggernaut SD-XL v9": { + "path": "juggernautXL_v9Rundiffusionphoto2.safetensors@https://civitai.com/api/download/models/348913", + "preview": "juggernautXL_v9Rundiffusionphoto2.jpg", "desc": "Showcase finetuned model based on Stable diffusion XL", - "preview": "juggernautXL_v7Rundiffusion.jpg" + "extras": "width: 1024, height: 1024, sampler: DEIS, steps: 20, cfg_scale: 6.0" }, + "Juggernaut SD-XL v9 Lightning": { + "path": "juggernautXL_v9Rdphoto2Lightning.safetensors@https://civitai.com/api/download/models/357609", + "preview": "juggernautXL_v9Rdphoto2Lightning.jpg", + "desc": "Showcase finetuned model based on Stable diffusion XL", + "extras": "width: 1024, height: 1024, sampler: DPM SDE, steps: 6, cfg_scale: 2.0" + }, + "Tempest SD-XL v0.1": { + "path": "TempestV0.1-Artistic.safetensors@https://huggingface.co/dataautogpt3/TempestV0.1/resolve/main/TempestV0.1-Artistic.safetensors?download=true", + "preview": "TempestV0.1-Artistic.jpg", + "desc": "The TempestV0.1 Initiative is a powerhouse in image generation, leveraging an unparalleled dataset of over 6 million images. The collection's vast scale, with resolutions from 1400x2100 to 4800x7200, encompasses 200GB of high-quality content.", + "extras": "width: 2048, height: 1024, sampler: DEIS, steps: 40, cfg_scale: 6.0" + }, + "RunwayML SD 1.5": { - "path": "runwayml/stable-diffusion-v1-5", - "alt": "v1-5-pruned-emaonly.safetensors@https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors?download=true", + "original": true, + "path": "v1-5-pruned-fp16-emaonly.safetensors@https://huggingface.co/Aptronym/SDNext/resolve/main/Reference/v1-5-pruned-fp16-emaonly.safetensors?download=true", + "preview": "v1-5-pruned-fp16-emaonly.jpg", "desc": "Stable Diffusion 1.5 is the base model all other 1.5 checkpoint were trained from. It's a latent text-to-image diffusion model capable of generating photo-realistic images given any text input. The Stable-Diffusion-v1-5 checkpoint was initialized with the weights of the Stable-Diffusion-v1-2 checkpoint and subsequently fine-tuned on 595k steps at resolution 512x512.", - "preview": "runwayml--stable-diffusion-v1-5.jpg", - "original": true + "extras": "width: 512, height: 512, sampler: DEIS, steps: 20, cfg_scale: 6.0" }, - "StabilityAI SD 2.1 EMA": { - "path": "stabilityai/stable-diffusion-2-1-base", - "alt": "v2-1_512-ema-pruned.safetensors@https://huggingface.co/stabilityai/stable-diffusion-2-1-base/resolve/main/v2-1_512-ema-pruned.safetensors?download=true", + "StabilityAI SD 2.1": { + "path": "huggingface/stabilityai/stable-diffusion-2-1-base", + "preview": "stabilityai--stable-diffusion-2-1-base.jpg", + "skip": true, + "variant": "fp16", "desc": "This stable-diffusion-2-1-base model fine-tunes stable-diffusion-2-base (512-base-ema.ckpt) with 220k extra steps taken", - "preview": "stabilityai--stable-diffusion-2.1-base.jpg", - "original": true + "extras": "width: 512, height: 512, sampler: DEIS, steps: 20, cfg_scale: 6.0" }, "StabilityAI SD 2.1 V": { - "path": "stabilityai/stable-diffusion-2-1-base", - "alt": "v2-1_768-ema-pruned.safetensors@https://huggingface.co/stabilityai/stable-diffusion-2-1/resolve/main/v2-1_768-ema-pruned.safetensors?download=true", + "path": "huggingface/stabilityai/stable-diffusion-2-1", + "preview": "stabilityai--stable-diffusion-2-1.jpg", + "skip": true, + "variant": "fp16", "desc": "This stable-diffusion-2 model is resumed from stable-diffusion-2-base (512-base-ema.ckpt) and trained for 150k steps using a v-objective on the same dataset. Resumed for another 140k steps on 768x768 images", - "preview": "stabilityai--stable-diffusion-2.1-base.jpg", - "original": true + "extras": "width: 768, height: 768, sampler: DEIS, steps: 20, cfg_scale: 6.0" }, "StabilityAI SD-XL 1.0 Base": { - "path": "stabilityai/stable-diffusion-xl-base-1.0", - "variant": "fp16", + "path": "sd_xl_base_1.0.safetensors@https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors?download=true", + "preview": "sd_xl_base_1.0.jpg", "desc": "Stable Diffusion XL (SDXL) is the latest AI image generation model that is tailored towards more photorealistic outputs with more detailed imagery and composition compared to previous SD models, including SD 2.1. It can make realistic faces, legible text within the images, and better image composition, all while using shorter and simpler prompts at a greatly increased base resolution of 1024x1024. Just like its predecessors, SDXL has the ability to generate image variations using image-to-image prompting, inpainting (reimagining of the selected parts of an image), and outpainting (creating new parts that lie outside the image borders).", - "preview": "stabilityai--stable-diffusion-xl-base-1.0.jpg" - }, - "StabilityAI SD 2.1 Turbo": { - "_path": "stabilityai/sd-turbo", - "path": "sd_turbo.safetensors@https://huggingface.co/stabilityai/sd-turbo/resolve/main/sd_turbo.safetensors?download=true", - "variant": "fp16", - "desc": "SD-Turbo is a distilled version of Stable Diffusion 2.1, trained for real-time synthesis. SD-Turbo is based on a novel training method called Adversarial Diffusion Distillation (ADD) (see the technical report), which allows sampling large-scale foundational image diffusion models in 1 to 4 steps at high image quality. This approach uses score distillation to leverage large-scale off-the-shelf image diffusion models as a teacher signal and combines this with an adversarial loss to ensure high image fidelity even in the low-step regime of one or two sampling steps.", - "preview": "stabilityai--sd-turbo.jpg", - "original": true - }, - "StabilityAI SD-XL Turbo": { - "_path": "stabilityai/sdxl-turbo", - "path": "sdxl_turbo.safetensors@https://huggingface.co/stabilityai/sdxl-turbo/resolve/main/sd_xl_turbo_1.0_fp16.safetensors?download=true", - "variant": "fp16", - "desc": "SDXL-Turbo is a distilled version of SDXL 1.0, trained for real-time synthesis. SDXL-Turbo is based on a novel training method called Adversarial Diffusion Distillation (ADD) (see the technical report), which allows sampling large-scale foundational image diffusion models in 1 to 4 steps at high image quality. This approach uses score distillation to leverage large-scale off-the-shelf image diffusion models as a teacher signal and combines this with an adversarial loss to ensure high image fidelity even in the low-step regime of one or two sampling steps.", - "preview": "stabilityai--sdxl-turbo.jpg" - }, - "StabilityAI Stable Video Diffusion": { - "path": "stabilityai/stable-video-diffusion-img2vid", - "desc": "(SVD) Image-to-Video is a latent diffusion model trained to generate short video clips from an image conditioning. This model was trained to generate 14 frames at resolution 576x1024 given a context frame of the same size. We also finetune the widely used f8-decoder for temporal consistency.", - "preview": "stabilityai--stable-video-diffusion-img2vid.jpg" - }, - "StabilityAI Stable Video Diffusion XT": { - "path": "stabilityai/stable-video-diffusion-img2vid-xt", - "desc": "(SVD) Image-to-Video is a latent diffusion model trained to generate short video clips from an image conditioning. This model was trained to generate 25 frames at resolution 576x1024 given a context frame of the same size, finetuned from SVD Image-to-Video [14 frames]. We also finetune the widely used f8-decoder for temporal consistency.", - "preview": "stabilityai--stable-video-diffusion-img2vid-xt.jpg" + "extras": "width: 1024, height: 1024, sampler: DEIS, steps: 20, cfg_scale: 6.0" }, + "StabilityAI Stable Cascade": { "path": "huggingface/stabilityai/stable-cascade", "skip": true, + "variant": "bf16", + "desc": "Stable Cascade is a diffusion model built upon the Würstchen architecture and its main difference to other models like Stable Diffusion is that it is working at a much smaller latent space. Why is this important? The smaller the latent space, the faster you can run inference and the cheaper the training becomes. How small is the latent space? Stable Diffusion uses a compression factor of 8, resulting in a 1024x1024 image being encoded to 128x128. Stable Cascade achieves a compression factor of 42, meaning that it is possible to encode a 1024x1024 image to 24x24, while maintaining crisp reconstructions. The text-conditional model is then trained in the highly compressed latent space. Previous versions of this architecture, achieved a 16x cost reduction over Stable Diffusion 1.5", + "preview": "stabilityai--stable-cascade.jpg", + "extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 4.0, image_cfg_scale: 1.0" + }, + "StabilityAI Stable Cascade Lite": { + "path": "huggingface/stabilityai/stable-cascade-lite", + "skip": true, + "variant": "bf16", "desc": "Stable Cascade is a diffusion model built upon the Würstchen architecture and its main difference to other models like Stable Diffusion is that it is working at a much smaller latent space. Why is this important? The smaller the latent space, the faster you can run inference and the cheaper the training becomes. How small is the latent space? Stable Diffusion uses a compression factor of 8, resulting in a 1024x1024 image being encoded to 128x128. Stable Cascade achieves a compression factor of 42, meaning that it is possible to encode a 1024x1024 image to 24x24, while maintaining crisp reconstructions. The text-conditional model is then trained in the highly compressed latent space. Previous versions of this architecture, achieved a 16x cost reduction over Stable Diffusion 1.5", - "preview": "stabilityai--stable-cascade.jpg" + "preview": "stabilityai--stable-cascade.jpg", + "extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 4.0, image_cfg_scale: 1.0" }, + "Segmind Vega": { - "path": "segmind/Segmind-Vega", + "path": "huggingface/segmind/Segmind-Vega", + "preview": "segmind--Segmind-Vega.jpg", "desc": "The Segmind-Vega Model is a distilled version of the Stable Diffusion XL (SDXL), offering a remarkable 70% reduction in size and an impressive 100% speedup while retaining high-quality text-to-image generation capabilities. Trained on diverse datasets, including Grit and Midjourney scrape data, it excels at creating a wide range of visual content based on textual prompts. Employing a knowledge distillation strategy, Segmind-Vega leverages the teachings of several expert models, including SDXL, ZavyChromaXL, and JuggernautXL, to combine their strengths and produce compelling visual outputs.", - "preview": "segmind--Segmind-Vega.jpg" + "variant": "fp16", + "skip": true, + "extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 9.0" }, "Segmind SSD-1B": { - "path": "segmind/SSD-1B", + "path": "huggingface/segmind/SSD-1B", + "preview": "segmind--SSD-1B.jpg", "desc": "The Segmind Stable Diffusion Model (SSD-1B) offers a compact, efficient, and distilled version of the SDXL model. At 50% smaller and 60% faster than Stable Diffusion XL (SDXL), it provides quick and seamless performance without sacrificing image quality.", - "preview": "segmind--SSD-1B.jpg" + "variant": "fp16", + "skip": true, + "extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 9.0" }, "Segmind Tiny": { "path": "segmind/tiny-sd", + "preview": "segmind--tiny-sd.jpg", "desc": "Segmind's Tiny-SD offers a compact, efficient, and distilled version of Realistic Vision 4.0 and is up to 80% faster than SD1.5", - "preview": "segmind--tiny-sd.jpg" + "extras": "width: 512, height: 512, sampler: Default, cfg_scale: 9.0" }, "Segmind SegMoE SD 4x2": { "path": "segmind/SegMoE-SD-4x2-v0", + "preview": "segmind--SegMoE-SD-4x2-v0.jpg", "desc": "SegMoE-SD-4x2-v0 is an untrained Segmind Mixture of Diffusion Experts Model generated using segmoe from 4 Expert SD1.5 models. SegMoE is a powerful framework for dynamically combining Stable Diffusion Models into a Mixture of Experts within minutes without training", - "preview": "segmind--SegMoE-SD-4x2-v0.jpg" - }, - "Segmind SegMoE XL 2x1": { - "path": "segmind/SegMoE-2x1-v0", - "desc": "SegMoE-2x1-v0 is an untrained Segmind Mixture of Diffusion Experts Model generated using segmoe from 2 Expert SDXL models. SegMoE is a powerful framework for dynamically combining Stable Diffusion Models into a Mixture of Experts within minutes without training", - "preview": "segmind--SegMoE-2x1-v0.jpg" + "extras": "width: 512, height: 512, sampler: Default" }, "Segmind SegMoE XL 4x2": { "path": "segmind/SegMoE-4x2-v0", + "preview": "segmind--SegMoE-4x2-v0.jpg", "desc": "SegMoE-4x2-v0 is an untrained Segmind Mixture of Diffusion Experts Model generated using segmoe from 4 Expert SDXL models. SegMoE is a powerful framework for dynamically combining Stable Diffusion Models into a Mixture of Experts within minutes without training", - "preview": "segmind--SegMoE-4x2-v0.jpg" + "extras": "width: 1024, height: 1024, sampler: Default" }, - "LCM SD-1.5 Dreamshaper 7": { - "path": "SimianLuo/LCM_Dreamshaper_v7", - "desc": "Latent Consistencey Models enable swift inference with minimal steps on any pre-trained LDMs, including Stable Diffusion. By distilling classifier-free guidance into the model's input, LCM can generate high-quality images in very short inference time. LCM can generate quality images in as few as 3-4 steps, making it blazingly fast.", - "preview": "SimianLuo--LCM_Dreamshaper_v7.jpg" - }, - "Pixart-α XL 2 Medium 512": { + + "Pixart-α XL 2 Medium": { "path": "PixArt-alpha/PixArt-XL-2-512x512", "desc": "PixArt-α is a Transformer-based T2I diffusion model whose image generation quality is competitive with state-of-the-art image generators (e.g., Imagen, SDXL, and even Midjourney), and the training speed markedly surpasses existing large-scale T2I models. Extensive experiments demonstrate that PIXART-α excels in image quality, artistry, and semantic control. It can directly generate 512px images from text prompts within a single sampling process.", - "preview": "PixArt-alpha--PixArt-XL-2-512x512.jpg" + "preview": "PixArt-alpha--PixArt-XL-2-512x512.jpg", + "extras": "width: 512, height: 512, sampler: Default, cfg_scale: 2.0" }, - "Pixart-α XL 2 Large 1024": { + "Pixart-α XL 2 Large": { "path": "PixArt-alpha/PixArt-XL-2-1024-MS", "desc": "PixArt-α is a Transformer-based T2I diffusion model whose image generation quality is competitive with state-of-the-art image generators (e.g., Imagen, SDXL, and even Midjourney), and the training speed markedly surpasses existing large-scale T2I models. Extensive experiments demonstrate that PIXART-α excels in image quality, artistry, and semantic control. It can directly generate 1024px images from text prompts within a single sampling process.", - "preview": "PixArt-alpha--PixArt-XL-2-1024-MS.jpg" - }, - "Pixart-α XL 2 Large LCM": { - "path": "PixArt-alpha/PixArt-LCM-XL-2-1024-MS", - "desc": "Pixart-α consists of pure transformer blocks for latent diffusion: It can directly generate 1024px images from text prompts within a single sampling process. LCMs is a diffusion distillation method which predict PF-ODE's solution directly in latent space, achieving super fast inference with few steps. Following LCM LoRA, we illustrative of the generation speed we achieve on various computers. Let us stress again how liberating it is to explore image generation so easily with PixArt-LCM.", - "preview": "PixArt-alpha--PixArt-XL-2-1024-MS.jpg" - }, - "Warp Wuerstchen": { - "path": "warp-ai/wuerstchen", - "desc": "Würstchen is a diffusion model whose text-conditional model works in a highly compressed latent space of images. Why is this important? Compressing data can reduce computational costs for both training and inference by magnitudes. Training on 1024x1024 images, is way more expensive than training at 32x32. Usually, other works make use of a relatively small compression, in the range of 4x - 8x spatial compression. Würstchen takes this to an extreme. Through its novel design, we achieve a 42x spatial compression. Würstchen employs a two-stage compression, what we call Stage A and Stage B. Stage A is a VQGAN, and Stage B is a Diffusion Autoencoder (more details can be found in the paper). A third model, Stage C, is learned in that highly compressed latent space. This training requires fractions of the compute used for current top-performing models, allowing also cheaper and faster inference.", - "preview": "warp-ai--wuerstchen.jpg" + "preview": "PixArt-alpha--PixArt-XL-2-1024-MS.jpg", + "extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 2.0" }, + "Kandinsky 2.1": { "path": "kandinsky-community/kandinsky-2-1", "desc": "Kandinsky 2.1 is a text-conditional diffusion model based on unCLIP and latent diffusion, composed of a transformer-based image prior model, a unet diffusion model, and a decoder. Kandinsky 2.1 inherits best practices from Dall-E 2 and Latent diffusion while introducing some new ideas. It uses the CLIP model as a text and image encoder, and diffusion image prior (mapping) between latent spaces of CLIP modalities. This approach increases the visual performance of the model and unveils new horizons in blending images and text-guided image manipulation.", - "preview": "kandinsky-community--kandinsky-2-1.jpg" + "preview": "kandinsky-community--kandinsky-2-1.jpg", + "extras": "width: 768, height: 768, sampler: Default" }, "Kandinsky 2.2": { "path": "kandinsky-community/kandinsky-2-2-decoder", "desc": "Kandinsky 2.2 is a text-conditional diffusion model (+0.1!) based on unCLIP and latent diffusion, composed of a transformer-based image prior model, a unet diffusion model, and a decoder. Kandinsky 2.1 inherits best practices from Dall-E 2 and Latent diffusion while introducing some new ideas. It uses the CLIP model as a text and image encoder, and diffusion image prior (mapping) between latent spaces of CLIP modalities. This approach increases the visual performance of the model and unveils new horizons in blending images and text-guided image manipulation.", - "preview": "kandinsky-community--kandinsky-2-2-decoder.jpg" + "preview": "kandinsky-community--kandinsky-2-2-decoder.jpg", + "extras": "width: 768, height: 768, sampler: Default" }, "Kandinsky 3": { "path": "kandinsky-community/kandinsky-3", "desc": "Kandinsky 3.0 is an open-source text-to-image diffusion model built upon the Kandinsky2-x model family. In comparison to its predecessors, Kandinsky 3.0 incorporates more data and specifically related to Russian culture, which allows to generate pictures related to Russin culture. Furthermore, enhancements have been made to the text understanding and visual quality of the model, achieved by increasing the size of the text encoder and Diffusion U-Net models, respectively.", - "preview": "kandinsky-community--kandinsky-3.jpg" + "preview": "kandinsky-community--kandinsky-3.jpg", + "variant": "fp16", + "extras": "width: 1024, height: 1024, sampler: Default" }, + "Playground v1": { "path": "playgroundai/playground-v1", "desc": "Playground v1 is a latent diffusion model that improves the overall HDR quality to get more stunning images.", - "preview": "playgroundai--playground-v1.jpg" + "preview": "playgroundai--playground-v1.jpg", + "extras": "width: 512, height: 512, sampler: Default" }, - "Playground v2 256": { + "Playground v2 Small": { "path": "playgroundai/playground-v2-256px-base", "desc": "Playground v2 is a diffusion-based text-to-image generative model. The model was trained from scratch by the research team at Playground. Images generated by Playground v2 are favored 2.5 times more than those produced by Stable Diffusion XL, according to Playground’s user study.", - "preview": "playgroundai--playground-v2-256px-base.jpg" + "preview": "playgroundai--playground-v2-256px-base.jpg", + "extras": "width: 256, height: 256, sampler: Default" }, - "Playground v2 512": { + "Playground v2 Medium": { "path": "playgroundai/playground-v2-512px-base", "desc": "Playground v2 is a diffusion-based text-to-image generative model. The model was trained from scratch by the research team at Playground. Images generated by Playground v2 are favored 2.5 times more than those produced by Stable Diffusion XL, according to Playground’s user study.", - "preview": "playgroundai--playground-v2-512px-base.jpg" + "preview": "playgroundai--playground-v2-512px-base.jpg", + "extras": "width: 512, height: 512, sampler: Default" }, - "Playground v2 1024": { + "Playground v2 Large": { "path": "playgroundai/playground-v2-1024px-aesthetic", "desc": "Playground v2 is a diffusion-based text-to-image generative model. The model was trained from scratch by the research team at Playground. Images generated by Playground v2 are favored 2.5 times more than those produced by Stable Diffusion XL, according to Playground’s user study.", - "preview": "playgroundai--playground-v2-1024px-aesthetic.jpg" + "preview": "playgroundai--playground-v2-1024px-aesthetic.jpg", + "extras": "width: 1024, height: 1024, sampler: Default" }, "Playground v2.5": { "path": "playground-v2.5-1024px-aesthetic.fp16.safetensors@https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic/resolve/main/playground-v2.5-1024px-aesthetic.fp16.safetensors?download=true", "desc": "Playground v2.5 is a diffusion-based text-to-image generative model, and a successor to Playground v2. Playground v2.5 is the state-of-the-art open-source model in aesthetic quality. Our user studies demonstrate that our model outperforms SDXL, Playground v2, PixArt-α, DALL-E 3, and Midjourney 5.2.", - "preview": "playgroundai--playground-v2-1024px-aesthetic.jpg" - }, - "DeepFloyd IF Medium": { - "path": "DeepFloyd/IF-I-M-v1.0", - "desc": "DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model, that can generate pictures with new state-of-the-art for photorealism and language understanding. The result is a highly efficient model that outperforms current state-of-the-art models, achieving a zero-shot FID-30K score of 6.66 on the COCO dataset. It is modular and composed of frozen text mode and three pixel cascaded diffusion modules, each designed to generate images of increasing resolution: 64x64, 256x256, and 1024x1024.", - "preview": "DeepFloyd--IF-I-M-v1.0.jpg" + "preview": "playgroundai--playground-v2-1024px-aesthetic.jpg", + "extras": "width: 1024, height: 1024, sampler: DPM++ 2M EDM" }, + "aMUSEd 256": { - "path": "amused/amused-256", + "path": "huggingface/amused/amused-256", + "skip": true, "desc": "Amused is a lightweight text to image model based off of the muse architecture. Amused is particularly useful in applications that require a lightweight and fast model such as generating many images quickly at once.", - "preview": "amused--amused-256.jpg" + "preview": "amused--amused-256.jpg", + "extras": "width: 256, height: 256, sampler: Default" }, "aMUSEd 512": { "path": "amused/amused-512", "desc": "Amused is a lightweight text to image model based off of the muse architecture. Amused is particularly useful in applications that require a lightweight and fast model such as generating many images quickly at once.", - "preview": "amused--amused-512.jpg" + "preview": "amused--amused-512.jpg", + "extras": "width: 512, height: 512, sampler: Default" + }, + + "Warp Wuerstchen": { + "path": "warp-ai/wuerstchen", + "desc": "Würstchen is a diffusion model whose text-conditional model works in a highly compressed latent space of images. Why is this important? Compressing data can reduce computational costs for both training and inference by magnitudes. Training on 1024x1024 images, is way more expensive than training at 32x32. Usually, other works make use of a relatively small compression, in the range of 4x - 8x spatial compression. Würstchen takes this to an extreme. Through its novel design, we achieve a 42x spatial compression. Würstchen employs a two-stage compression, what we call Stage A and Stage B. Stage A is a VQGAN, and Stage B is a Diffusion Autoencoder (more details can be found in the paper). A third model, Stage C, is learned in that highly compressed latent space. This training requires fractions of the compute used for current top-performing models, allowing also cheaper and faster inference.", + "preview": "warp-ai--wuerstchen.jpg", + "extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 4.0, image_cfg_scale: 0.0" }, "KOALA 700M": { "path": "huggingface/etri-vilab/koala-700m-llava-cap", "variant": "fp16", "skip": true, "desc": "Fast text-to-image model, called KOALA, by compressing SDXL's U-Net and distilling knowledge from SDXL into our model. KOALA-700M can generate a 1024x1024 image in less than 1.5 seconds on an NVIDIA 4090 GPU, which is more than 2x faster than SDXL.", - "preview": "etri-vilab--koala-700m-llava-cap.jpg" + "preview": "etri-vilab--koala-700m-llava-cap.jpg", + "extras": "width: 1024, height: 1024, sampler: Default" }, - "Tsinghua UniDiffuser": { "path": "thu-ml/unidiffuser-v1", "desc": "UniDiffuser is a unified diffusion framework to fit all distributions relevant to a set of multi-modal data in one transformer. UniDiffuser is able to perform image, text, text-to-image, image-to-text, and image-text pair generation by setting proper timesteps without additional overhead.\nSpecifically, UniDiffuser employs a variation of transformer, called U-ViT, which parameterizes the joint noise prediction network. Other components perform as encoders and decoders of different modalities, including a pretrained image autoencoder from Stable Diffusion, a pretrained image ViT-B/32 CLIP encoder, a pretrained text ViT-L CLIP encoder, and a GPT-2 text decoder finetuned by ourselves.", - "preview": "thu-ml--unidiffuser-v1.jpg" + "preview": "thu-ml--unidiffuser-v1.jpg", + "extras": "width: 512, height: 512, sampler: Default" }, "SalesForce BLIP-Diffusion": { "path": "salesforce/blipdiffusion", @@ -211,5 +234,12 @@ "path": "XCLiu/instaflow_0_9B_from_sd_1_5", "desc": "InstaFlow is an ultra-fast, one-step image generator that achieves image quality close to Stable Diffusion. This efficiency is made possible through a recent Rectified Flow technique, which trains probability flows with straight trajectories, hence inherently requiring only a single step for fast inference.", "preview": "XCLiu--instaflow_0_9B_from_sd_1_5.jpg" + }, + "DeepFloyd IF Medium": { + "path": "DeepFloyd/IF-I-M-v1.0", + "desc": "DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model, that can generate pictures with new state-of-the-art for photorealism and language understanding. The result is a highly efficient model that outperforms current state-of-the-art models, achieving a zero-shot FID-30K score of 6.66 on the COCO dataset. It is modular and composed of frozen text mode and three pixel cascaded diffusion modules, each designed to generate images of increasing resolution: 64x64, 256x256, and 1024x1024.", + "preview": "DeepFloyd--IF-I-M-v1.0.jpg", + "extras": "width: 1024, height: 1024, sampler: Default" } -} \ No newline at end of file + +} diff --git a/html/screenshot-ledit.jpg b/html/screenshot-ledit.jpg new file mode 100644 index 000000000..f43189d81 Binary files /dev/null and b/html/screenshot-ledit.jpg differ diff --git a/installer.py b/installer.py index 8b7aa18cc..a01e5d4ca 100644 --- a/installer.py +++ b/installer.py @@ -67,6 +67,12 @@ def __init__(self, capacity): self.formatter = logging.Formatter('{ "asctime":"%(asctime)s", "created":%(created)f, "facility":"%(name)s", "pid":%(process)d, "tid":%(thread)d, "level":"%(levelname)s", "module":"%(module)s", "func":"%(funcName)s", "msg":"%(message)s" }') def emit(self, record): + if record.msg is not None and not isinstance(record.msg, str): + record.msg = str(record.msg) + try: + record.msg = record.msg.replace('"', "'") + except Exception: + pass msg = self.format(record) # self.buffer.append(json.loads(msg)) self.buffer.append(msg) @@ -425,9 +431,12 @@ def is_rocm_available(): log.info('nVidia CUDA toolkit detected: nvidia-smi present') if not args.use_xformers: torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision --index-url https://download.pytorch.org/whl/cu121') + xformers_package = os.environ.get('XFORMERS_PACKAGE', '--pre triton xformers --index-url https://download.pytorch.org/whl/cu121') else: torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision --index-url https://download.pytorch.org/whl/cu118') - xformers_package = os.environ.get('XFORMERS_PACKAGE', '--pre xformers' if opts.get('cross_attention_optimization', '') == 'xFormers' else 'none') + xformers_package = os.environ.get('XFORMERS_PACKAGE', '--pre triton xformers --index-url https://download.pytorch.org/whl/cu118') + if opts.get('cross_attention_optimization', '') != 'xFormers': + xformers_package = 'none' install('onnxruntime-gpu', 'onnxruntime-gpu', ignore=True) elif is_rocm_available(): is_windows = platform.system() == 'Windows' @@ -479,7 +488,6 @@ def is_rocm_available(): log.debug(f'ROCm hipconfig failed: {e}') rocm_ver = None if args.use_zluda: - torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.2.0 torchvision --index-url https://download.pytorch.org/whl/cu118') log.warning("ZLUDA support: experimental") zluda_need_dll_patch = is_windows and not installed('torch') zluda_path = find_zluda() @@ -493,15 +501,24 @@ def is_rocm_available(): import tarfile archive_type = tarfile.TarFile zluda_url = 'https://github.com/vosen/ZLUDA/releases/download/v3/zluda-3-linux.tar.gz' - urllib.request.urlretrieve(zluda_url, '_zluda') - with archive_type('_zluda', 'r') as f: - f.extractall('.zluda') - zluda_path = os.path.abspath('./.zluda') - os.remove('_zluda') - log.debug(f'Found ZLUDA in {zluda_path}') - paths = os.environ.get('PATH', '.') - if zluda_path not in paths: - os.environ['PATH'] = paths + ';' + zluda_path + try: + urllib.request.urlretrieve(zluda_url, '_zluda') + with archive_type('_zluda', 'r') as f: + f.extractall('.zluda') + zluda_path = os.path.abspath('./.zluda') + os.remove('_zluda') + except Exception as e: + log.warning(f'Failed to install ZLUDA: {e}') + if os.path.exists(os.path.join(zluda_path, 'nvcuda.dll')): + log.info(f'Using ZLUDA in {zluda_path}') + torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.2.1 torchvision --index-url https://download.pytorch.org/whl/cu118') + paths = os.environ.get('PATH', '.') + if zluda_path not in paths: + os.environ['PATH'] = paths + ';' + zluda_path + else: + log.info('Using CPU-only torch') + torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision') + zluda_need_dll_patch = False elif is_windows: # TODO TBD after ROCm for Windows is released log.warning("HIP SDK is detected, but no Torch release for Windows available") log.info("For ZLUDA support specify '--use-zluda'") @@ -515,7 +532,9 @@ def is_rocm_available(): else: torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision --index-url https://download.pytorch.org/whl/rocm5.5') # ROCm 5.5 is oldest for PyTorch 2.1 if rocm_ver is not None: - install(os.environ.get('ONNXRUNTIME_PACKAGE', get_onnxruntime_source_for_rocm(arr)), "onnxruntime-training built with ROCm", ignore=True) + ort_version = os.environ.get('ONNXRUNTIME_VERSION', None) + ort_package = os.environ.get('ONNXRUNTIME_PACKAGE', f"--pre onnxruntime-training{'' if ort_version is None else ('==' + ort_version)} --index-url https://pypi.lsh.sh/{rocm_ver[0]}{rocm_ver[2]} --extra-index-url https://pypi.org/simple") + install(ort_package, 'onnxruntime-training') xformers_package = os.environ.get('XFORMERS_PACKAGE', 'none') elif allow_ipex and (args.use_ipex or shutil.which('sycl-ls') is not None or shutil.which('sycl-ls.exe') is not None or os.environ.get('ONEAPI_ROOT') is not None or os.path.exists('/opt/intel/oneapi') or os.path.exists("C:/Program Files (x86)/Intel/oneAPI") or os.path.exists("C:/oneAPI")): args.use_ipex = True # pylint: disable=attribute-defined-outside-init @@ -618,9 +637,7 @@ def is_rocm_available(): if 'xformers' in xformers_package: install(f'--no-deps {xformers_package}', ignore=True) import torch - import xformers - if torch.__version__ != '2.0.1+cu118' and xformers.__version__ in ['0.0.22', '0.0.21', '0.0.20']: - log.warning(f'Likely incompatible torch with: xformers=={xformers.__version__} installed: torch=={torch.__version__} required: torch==2.1.0+cu118 - build xformers manually or downgrade torch') + import xformers # pylint: disable=unused-import elif not args.experimental and not args.use_xformers: uninstall('xformers') except Exception as e: @@ -910,6 +927,7 @@ def get_version(): 'app': 'sd.next', 'updated': updated, 'hash': githash, + 'branch': branch_name.replace('\n', ''), 'url': origin.replace('\n', '') + '/tree/' + branch_name.replace('\n', '') } except Exception: @@ -917,18 +935,6 @@ def get_version(): return version -def get_onnxruntime_source_for_rocm(rocm_ver): - ort_version = "1.16.3" # hardcoded - cp_str = f"{sys.version_info.major}{sys.version_info.minor}" - if rocm_ver is None: - command = subprocess.run('hipconfig --version', shell=True, check=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - rocm_ver = command.stdout.decode(encoding="utf8", errors="ignore").split('.') - if "linux" in sys.platform: - return f"https://download.onnxruntime.ai/onnxruntime_training-{ort_version}%2Brocm{rocm_ver[0]}{rocm_ver[1]}-cp{cp_str}-cp{cp_str}-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" - else: - return 'onnxruntime-gpu' - - def find_zluda(): zluda_path = os.environ.get('ZLUDA', None) if zluda_path is None: diff --git a/javascript/amethyst-nightfall.css b/javascript/amethyst-nightfall.css index 1fc112a62..ec69db97a 100644 --- a/javascript/amethyst-nightfall.css +++ b/javascript/amethyst-nightfall.css @@ -1,6 +1,6 @@ /* generic html tags */ :root { - --font: "Source Sans Pro", 'ui-sans-serif', 'system-ui', sans-serif; + --font: "Source Sans Pro", 'ui-sans-serif', 'system-ui', sans-serif, 'NotoSans'; --font-size: 16px; --highlight-color: #8a3df6; /* Purple color */ --inactive-color: #404040; /* Darker shade of gray */ @@ -93,7 +93,7 @@ svg.feather.feather-image, .feather .feather-image { display: none } #img2img_settings { min-width: calc(2 * var(--left-column)); max-width: calc(2 * var(--left-column)); background-color: #111111; padding-top: 16px; } #interrogate, #deepbooru { margin: 0 0px 10px 0px; max-width: 80px; max-height: 80px; font-weight: normal; font-size: 0.95em; } #quicksettings .gr-button-tool { font-size: 1.6rem; box-shadow: none; margin-left: -20px; margin-top: -2px; height: 2.4em; } -#open_folder_extras, #footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } +#footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } #save-animation { border-radius: var(--radius-sm) !important; margin-bottom: 16px; background-color: #111111; } #script_list { padding: 4px; margin-top: 16px; margin-bottom: 8px; } #tab_extensions table { background-color: #222222; } diff --git a/javascript/base.css b/javascript/base.css index 18ff4d42f..4aafcee4d 100644 --- a/javascript/base.css +++ b/javascript/base.css @@ -1,3 +1,5 @@ +@font-face { font-family: 'NotoSans'; font-display: swap; font-style: normal; font-weight: 100; src: local('NotoSans'), url('notosans-nerdfont-regular.ttf') } + /* toolbutton */ .gradio-button.tool { max-width: min-content; min-width: min-content !important; align-self: end; font-size: 1.4em; color: var(--body-text-color) !important; } @@ -26,7 +28,7 @@ /* fullpage image viewer */ #lightboxModal{ display: none; position: fixed; z-index: 1001; left: 0; top: 0; width: 100%; height: 100%; overflow: auto; background-color: rgba(20, 20, 20, 0.75); backdrop-filter: blur(6px); - user-select: none; -webkit-user-select: none; flex-direction: row; } + user-select: none; -webkit-user-select: none; flex-direction: row; font-family: 'NotoSans'; } .modalControls { display: flex; justify-content: space-evenly; background-color: transparent; position: absolute; width: 99%; z-index: 1; } .modalControls:hover { background-color: #50505050; } .modalControls span { color: white; font-size: 2em; font-weight: bold; cursor: pointer; filter: grayscale(100%); } diff --git a/javascript/black-gray.css b/javascript/black-gray.css index 03e07cb63..a78305032 100644 --- a/javascript/black-gray.css +++ b/javascript/black-gray.css @@ -111,7 +111,7 @@ svg.feather.feather-image, .feather .feather-image { display: none } #img2img_settings { min-width: calc(2 * var(--left-column)); max-width: calc(2 * var(--left-column)); background-color: var(--primary-950); padding-top: 16px; } #interrogate, #deepbooru { margin: 0 0px 10px 0px; max-width: 80px; max-height: 80px; font-weight: normal; font-size: 0.95em; } #quicksettings .gr-button-tool { font-size: 1.6rem; box-shadow: none; margin-left: -20px; margin-top: -2px; height: 2.4em; } -#open_folder_extras, #footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } +#footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } #save-animation { border-radius: var(--radius-sm) !important; margin-bottom: 16px; background-color: var(--primary-950); } #script_list { padding: 4px; margin-top: 16px; margin-bottom: 8px; } #settings > div.flex-wrap { width: 15em; } diff --git a/javascript/black-orange.css b/javascript/black-orange.css index 4b9845144..816394d0d 100644 --- a/javascript/black-orange.css +++ b/javascript/black-orange.css @@ -110,7 +110,7 @@ svg.feather.feather-image, .feather .feather-image { display: none } #img2img_settings { min-width: calc(2 * var(--left-column)); max-width: calc(2 * var(--left-column)); background-color: #111111; padding-top: 16px; } #interrogate, #deepbooru { margin: 0 0px 10px 0px; max-width: 80px; max-height: 80px; font-weight: normal; font-size: 0.95em; } #quicksettings .gr-button-tool { font-size: 1.6rem; box-shadow: none; margin-left: -20px; margin-top: -2px; height: 2.4em; } -#open_folder_extras, #footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } +#footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } #save-animation { border-radius: var(--radius-sm) !important; margin-bottom: 16px; background-color: #111111; } #script_list { padding: 4px; margin-top: 16px; margin-bottom: 8px; } #settings > div.flex-wrap { width: 15em; } diff --git a/javascript/black-teal.css b/javascript/black-teal.css index 34250a660..a0ea61b63 100644 --- a/javascript/black-teal.css +++ b/javascript/black-teal.css @@ -130,7 +130,7 @@ svg.feather.feather-image, .feather .feather-image { display: none } #img2img_settings { min-width: calc(2 * var(--left-column)); max-width: calc(2 * var(--left-column)); background-color: var(--neutral-950); padding-top: 16px; } #interrogate, #deepbooru { margin: 0 0px 10px 0px; max-width: 80px; max-height: 80px; font-weight: normal; font-size: 0.95em; } #quicksettings .gr-button-tool { font-size: 1.6rem; box-shadow: none; margin-left: -20px; margin-top: -2px; height: 2.4em; } -#open_folder_extras, #footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } +#footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } #save-animation { border-radius: var(--radius-sm) !important; margin-bottom: 16px; background-color: var(--neutral-950); } #script_list { padding: 4px; margin-top: 16px; margin-bottom: 8px; } #settings > div.flex-wrap { width: 15em; } @@ -144,6 +144,7 @@ textarea[rows="1"] { height: 33px !important; width: 99% !important; padding: 8p #txt2img_settings { min-width: var(--left-column); max-width: var(--left-column); background-color: var(--neutral-950); padding-top: 16px; } #pnginfo_html2_info { margin-top: -18px; background-color: var(--input-background-fill); padding: var(--input-padding) } #txt2img_styles_row, #img2img_styles_row, #control_styles_row { margin-top: -6px; } +.block > span { margin-bottom: 0 !important; margin-top: var(--spacing-lg); } /* based on gradio built-in dark theme */ :root, .light, .dark { @@ -187,7 +188,7 @@ textarea[rows="1"] { height: 33px !important; width: 99% !important; padding: 8p --checkbox-label-border-color: var(--border-color-primary); --checkbox-label-border-color-hover: var(--checkbox-label-border-color); --checkbox-label-border-width: var(--input-border-width); - --checkbox-label-text-color: var(--body-text-color); + --checkbox-label-text-color: var(--block-title-text-color); --checkbox-label-text-color-selected: var(--checkbox-label-text-color); --error-background-fill: var(--background-fill-primary); --error-border-color: var(--border-color-primary); diff --git a/javascript/emerald-paradise.css b/javascript/emerald-paradise.css index a3825e894..5b3bc419f 100644 --- a/javascript/emerald-paradise.css +++ b/javascript/emerald-paradise.css @@ -1,6 +1,6 @@ /* generic html tags */ :root, .light, .dark { - --font: 'system-ui', 'ui-sans-serif', 'system-ui', "Roboto", sans-serif; + --font: 'system-ui', 'ui-sans-serif', 'system-ui', "Roboto", sans-serif, 'NotoSans'; --font-mono: 'ui-monospace', 'Consolas', monospace; --font-size: 16px; --primary-100: #1e2223; /* bg color*/ @@ -107,7 +107,7 @@ button.selected {background: var(--button-primary-background-fill);} #interrogate, #deepbooru { margin: 0 0px 10px 0px; max-width: 80px; max-height: 80px; font-weight: normal; font-size: 0.95em; } #quicksettings .gr-button-tool { font-size: 1.6rem; box-shadow: none; margin-top: -2px; height: 2.4em; } #quicksettings button {padding: 0 0.5em 0.1em 0.5em;} -#open_folder_extras, #footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } +#footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } #save-animation { border-radius: var(--radius-sm) !important; margin-bottom: 16px; background-color: #111111; } #script_list { padding: 4px; margin-top: 16px; margin-bottom: 8px; } #settings > div.flex-wrap { width: 15em; } diff --git a/javascript/exifr.js b/javascript/exifr.js new file mode 100644 index 000000000..f97bca6ba --- /dev/null +++ b/javascript/exifr.js @@ -0,0 +1 @@ +!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?t(exports):"function"==typeof define&&define.amd?define("exifr",["exports"],t):t((e="undefined"!=typeof globalThis?globalThis:e||self).exifr={})}(this,(function(e){"use strict";var t="undefined"!=typeof self?self:global;const i="undefined"!=typeof navigator,n=i&&"undefined"==typeof HTMLImageElement,s=!("undefined"==typeof global||"undefined"==typeof process||!process.versions||!process.versions.node),r=t.Buffer,a=t.BigInt,o=!!r,l=e=>e;function h(e,t=l){if(s)try{return"function"==typeof require?Promise.resolve(t(require(e))):import(/* webpackIgnore: true */ e).then(t)}catch(t){console.warn(`Couldn't load ${e}`)}}let u=t.fetch;const c=e=>u=e;if(!t.fetch){const e=h("http",(e=>e)),t=h("https",(e=>e)),i=(n,{headers:s}={})=>new Promise((async(r,a)=>{let{port:o,hostname:l,pathname:h,protocol:u,search:c}=new URL(n);const f={method:"GET",hostname:l,path:encodeURI(h)+c,headers:s};""!==o&&(f.port=Number(o));const d=("https:"===u?await t:await e).request(f,(e=>{if(301===e.statusCode||302===e.statusCode){let t=new URL(e.headers.location,n).toString();return i(t,{headers:s}).then(r).catch(a)}r({status:e.statusCode,arrayBuffer:()=>new Promise((t=>{let i=[];e.on("data",(e=>i.push(e))),e.on("end",(()=>t(Buffer.concat(i))))}))})}));d.on("error",a),d.end()}));c(i)}function f(e,t,i){return t in e?Object.defineProperty(e,t,{value:i,enumerable:!0,configurable:!0,writable:!0}):e[t]=i,e}const d=e=>g(e)?void 0:e,p=e=>void 0!==e;function g(e){return void 0===e||(e instanceof Map?0===e.size:0===Object.values(e).filter(p).length)}function m(e){let t=new Error(e);throw delete t.stack,t}function S(e){return""===(e=function(e){for(;e.endsWith("\0");)e=e.slice(0,-1);return e}(e).trim())?void 0:e}function C(e){let t=function(e){let t=0;return e.ifd0.enabled&&(t+=1024),e.exif.enabled&&(t+=2048),e.makerNote&&(t+=2048),e.userComment&&(t+=1024),e.gps.enabled&&(t+=512),e.interop.enabled&&(t+=100),e.ifd1.enabled&&(t+=1024),t+2048}(e);return e.jfif.enabled&&(t+=50),e.xmp.enabled&&(t+=2e4),e.iptc.enabled&&(t+=14e3),e.icc.enabled&&(t+=6e3),t}const y=e=>String.fromCharCode.apply(null,e),b="undefined"!=typeof TextDecoder?new TextDecoder("utf-8"):void 0;function P(e){return b?b.decode(e):o?Buffer.from(e).toString("utf8"):decodeURIComponent(escape(y(e)))}class I{static from(e,t){return e instanceof this&&e.le===t?e:new I(e,void 0,void 0,t)}constructor(e,t=0,i,n){if("boolean"==typeof n&&(this.le=n),Array.isArray(e)&&(e=new Uint8Array(e)),0===e)this.byteOffset=0,this.byteLength=0;else if(e instanceof ArrayBuffer){void 0===i&&(i=e.byteLength-t);let n=new DataView(e,t,i);this._swapDataView(n)}else if(e instanceof Uint8Array||e instanceof DataView||e instanceof I){void 0===i&&(i=e.byteLength-t),(t+=e.byteOffset)+i>e.byteOffset+e.byteLength&&m("Creating view outside of available memory in ArrayBuffer");let n=new DataView(e.buffer,t,i);this._swapDataView(n)}else if("number"==typeof e){let t=new DataView(new ArrayBuffer(e));this._swapDataView(t)}else m("Invalid input argument for BufferView: "+e)}_swapArrayBuffer(e){this._swapDataView(new DataView(e))}_swapBuffer(e){this._swapDataView(new DataView(e.buffer,e.byteOffset,e.byteLength))}_swapDataView(e){this.dataView=e,this.buffer=e.buffer,this.byteOffset=e.byteOffset,this.byteLength=e.byteLength}_lengthToEnd(e){return this.byteLength-e}set(e,t,i=I){return e instanceof DataView||e instanceof I?e=new Uint8Array(e.buffer,e.byteOffset,e.byteLength):e instanceof ArrayBuffer&&(e=new Uint8Array(e)),e instanceof Uint8Array||m("BufferView.set(): Invalid data argument."),this.toUint8().set(e,t),new i(this,t,e.byteLength)}subarray(e,t){return t=t||this._lengthToEnd(e),new I(this,e,t)}toUint8(){return new Uint8Array(this.buffer,this.byteOffset,this.byteLength)}getUint8Array(e,t){return new Uint8Array(this.buffer,this.byteOffset+e,t)}getString(e=0,t=this.byteLength){return P(this.getUint8Array(e,t))}getLatin1String(e=0,t=this.byteLength){let i=this.getUint8Array(e,t);return y(i)}getUnicodeString(e=0,t=this.byteLength){const i=[];for(let n=0;n1e4?R(e,t,"base64"):s&&e.includes("://")?M(e,t,"url",L):s?R(e,t,"fs"):i?M(e,t,"url",L):void m(O);var n}async function M(e,t,i,n){return D.has(i)?R(e,t,i):n?async function(e,t){let i=await t(e);return new I(i)}(e,n):void m(`Parser ${i} is not loaded`)}async function R(e,t,i){let n=new(D.get(i))(e,t);return await n.read(),n}const L=e=>u(e).then((e=>e.arrayBuffer())),U=e=>new Promise(((t,i)=>{let n=new FileReader;n.onloadend=()=>t(n.result||new ArrayBuffer),n.onerror=i,n.readAsArrayBuffer(e)}));class F extends Map{get tagKeys(){return this.allKeys||(this.allKeys=Array.from(this.keys())),this.allKeys}get tagValues(){return this.allValues||(this.allValues=Array.from(this.values())),this.allValues}}function B(e,t,i){let n=new F;for(let[e,t]of i)n.set(e,t);if(Array.isArray(t))for(let i of t)e.set(i,n);else e.set(t,n);return n}function E(e,t,i){let n,s=e.get(t);for(n of i)s.set(n[0],n[1])}const N=new Map,G=new Map,V=new Map,z=37500,H=37510,j=700,W=33723,K=34675,X=34665,_=34853,Y=40965,$=["chunked","firstChunkSize","firstChunkSizeNode","firstChunkSizeBrowser","chunkSize","chunkLimit"],J=["jfif","xmp","icc","iptc","ihdr"],q=["tiff",...J],Q=["ifd0","ifd1","exif","gps","interop"],Z=[...q,...Q],ee=["makerNote","userComment"],te=["translateKeys","translateValues","reviveValues","multiSegment"],ie=[...te,"sanitize","mergeOutput","silentErrors"];class ne{get translate(){return this.translateKeys||this.translateValues||this.reviveValues}}class se extends ne{get needed(){return this.enabled||this.deps.size>0}constructor(e,t,i,n){if(super(),f(this,"enabled",!1),f(this,"skip",new Set),f(this,"pick",new Set),f(this,"deps",new Set),f(this,"translateKeys",!1),f(this,"translateValues",!1),f(this,"reviveValues",!1),this.key=e,this.enabled=t,this.parse=this.enabled,this.applyInheritables(n),this.canBeFiltered=Q.includes(e),this.canBeFiltered&&(this.dict=N.get(e)),void 0!==i)if(Array.isArray(i))this.parse=this.enabled=!0,this.canBeFiltered&&i.length>0&&this.translateTagSet(i,this.pick);else if("object"==typeof i){if(this.enabled=!0,this.parse=!1!==i.parse,this.canBeFiltered){let{pick:e,skip:t}=i;e&&e.length>0&&this.translateTagSet(e,this.pick),t&&t.length>0&&this.translateTagSet(t,this.skip)}this.applyInheritables(i)}else!0===i||!1===i?this.parse=this.enabled=i:m(`Invalid options argument: ${i}`)}applyInheritables(e){let t,i;for(t of te)i=e[t],void 0!==i&&(this[t]=i)}translateTagSet(e,t){if(this.dict){let i,n,{tagKeys:s,tagValues:r}=this.dict;for(i of e)"string"==typeof i?(n=r.indexOf(i),-1===n&&(n=s.indexOf(Number(i))),-1!==n&&t.add(Number(s[n]))):t.add(i)}else for(let i of e)t.add(i)}finalizeFilters(){!this.enabled&&this.deps.size>0?(this.enabled=!0,ue(this.pick,this.deps)):this.enabled&&this.pick.size>0&&ue(this.pick,this.deps)}}var re={jfif:!1,tiff:!0,xmp:!1,icc:!1,iptc:!1,ifd0:!0,ifd1:!1,exif:!0,gps:!0,interop:!1,ihdr:void 0,makerNote:!1,userComment:!1,multiSegment:!1,skip:[],pick:[],translateKeys:!0,translateValues:!0,reviveValues:!0,sanitize:!0,mergeOutput:!0,silentErrors:!0,chunked:!0,firstChunkSize:void 0,firstChunkSizeNode:512,firstChunkSizeBrowser:65536,chunkSize:65536,chunkLimit:5},ae=new Map;class oe extends ne{static useCached(e){let t=ae.get(e);return void 0!==t||(t=new this(e),ae.set(e,t)),t}constructor(e){super(),!0===e?this.setupFromTrue():void 0===e?this.setupFromUndefined():Array.isArray(e)?this.setupFromArray(e):"object"==typeof e?this.setupFromObject(e):m(`Invalid options argument ${e}`),void 0===this.firstChunkSize&&(this.firstChunkSize=i?this.firstChunkSizeBrowser:this.firstChunkSizeNode),this.mergeOutput&&(this.ifd1.enabled=!1),this.filterNestedSegmentTags(),this.traverseTiffDependencyTree(),this.checkLoadedPlugins()}setupFromUndefined(){let e;for(e of $)this[e]=re[e];for(e of ie)this[e]=re[e];for(e of ee)this[e]=re[e];for(e of Z)this[e]=new se(e,re[e],void 0,this)}setupFromTrue(){let e;for(e of $)this[e]=re[e];for(e of ie)this[e]=re[e];for(e of ee)this[e]=!0;for(e of Z)this[e]=new se(e,!0,void 0,this)}setupFromArray(e){let t;for(t of $)this[t]=re[t];for(t of ie)this[t]=re[t];for(t of ee)this[t]=re[t];for(t of Z)this[t]=new se(t,!1,void 0,this);this.setupGlobalFilters(e,void 0,Q)}setupFromObject(e){let t;for(t of(Q.ifd0=Q.ifd0||Q.image,Q.ifd1=Q.ifd1||Q.thumbnail,Object.assign(this,e),$))this[t]=he(e[t],re[t]);for(t of ie)this[t]=he(e[t],re[t]);for(t of ee)this[t]=he(e[t],re[t]);for(t of q)this[t]=new se(t,re[t],e[t],this);for(t of Q)this[t]=new se(t,re[t],e[t],this.tiff);this.setupGlobalFilters(e.pick,e.skip,Q,Z),!0===e.tiff?this.batchEnableWithBool(Q,!0):!1===e.tiff?this.batchEnableWithUserValue(Q,e):Array.isArray(e.tiff)?this.setupGlobalFilters(e.tiff,void 0,Q):"object"==typeof e.tiff&&this.setupGlobalFilters(e.tiff.pick,e.tiff.skip,Q)}batchEnableWithBool(e,t){for(let i of e)this[i].enabled=t}batchEnableWithUserValue(e,t){for(let i of e){let e=t[i];this[i].enabled=!1!==e&&void 0!==e}}setupGlobalFilters(e,t,i,n=i){if(e&&e.length){for(let e of n)this[e].enabled=!1;let t=le(e,i);for(let[e,i]of t)ue(this[e].pick,i),this[e].enabled=!0}else if(t&&t.length){let e=le(t,i);for(let[t,i]of e)ue(this[t].skip,i)}}filterNestedSegmentTags(){let{ifd0:e,exif:t,xmp:i,iptc:n,icc:s}=this;this.makerNote?t.deps.add(z):t.skip.add(z),this.userComment?t.deps.add(H):t.skip.add(H),i.enabled||e.skip.add(j),n.enabled||e.skip.add(W),s.enabled||e.skip.add(K)}traverseTiffDependencyTree(){let{ifd0:e,exif:t,gps:i,interop:n}=this;n.needed&&(t.deps.add(Y),e.deps.add(Y)),t.needed&&e.deps.add(X),i.needed&&e.deps.add(_),this.tiff.enabled=Q.some((e=>!0===this[e].enabled))||this.makerNote||this.userComment;for(let e of Q)this[e].finalizeFilters()}get onlyTiff(){return!J.map((e=>this[e].enabled)).some((e=>!0===e))&&this.tiff.enabled}checkLoadedPlugins(){for(let e of q)this[e].enabled&&!A.has(e)&&k("segment parser",e)}}function le(e,t){let i,n,s,r,a=[];for(s of t){for(r of(i=N.get(s),n=[],i))(e.includes(r[0])||e.includes(r[1]))&&n.push(r[0]);n.length&&a.push([s,n])}return a}function he(e,t){return void 0!==e?e:void 0!==t?t:void 0}function ue(e,t){for(let i of t)e.add(i)}f(oe,"default",re);class ce{constructor(e){f(this,"parsers",{}),f(this,"output",{}),f(this,"errors",[]),f(this,"pushToErrors",(e=>this.errors.push(e))),this.options=oe.useCached(e)}async read(e){this.file=await x(e,this.options)}setup(){if(this.fileParser)return;let{file:e}=this,t=e.getUint16(0);for(let[i,n]of T)if(n.canHandle(e,t))return this.fileParser=new n(this.options,this.file,this.parsers),e[i]=!0;this.file.close&&this.file.close(),m("Unknown file format")}async parse(){let{output:e,errors:t}=this;return this.setup(),this.options.silentErrors?(await this.executeParsers().catch(this.pushToErrors),t.push(...this.fileParser.errors)):await this.executeParsers(),this.file.close&&this.file.close(),this.options.silentErrors&&t.length>0&&(e.errors=t),d(e)}async executeParsers(){let{output:e}=this;await this.fileParser.parse();let t=Object.values(this.parsers).map((async t=>{let i=await t.parse();t.assignToOutput(e,i)}));this.options.silentErrors&&(t=t.map((e=>e.catch(this.pushToErrors)))),await Promise.all(t)}async extractThumbnail(){this.setup();let{options:e,file:t}=this,i=A.get("tiff",e);var n;if(t.tiff?n={start:0,type:"tiff"}:t.jpeg&&(n=await this.fileParser.getOrFindSegment("tiff")),void 0===n)return;let s=await this.fileParser.ensureSegmentChunk(n),r=this.parsers.tiff=new i(s,e,t),a=await r.extractThumbnail();return t.close&&t.close(),a}}async function fe(e,t){let i=new ce(t);return await i.read(e),i.parse()}var de=Object.freeze({__proto__:null,parse:fe,Exifr:ce,fileParsers:T,segmentParsers:A,fileReaders:D,tagKeys:N,tagValues:G,tagRevivers:V,createDictionary:B,extendDictionary:E,fetchUrlAsArrayBuffer:L,readBlobAsArrayBuffer:U,chunkedProps:$,otherSegments:J,segments:q,tiffBlocks:Q,segmentsAndBlocks:Z,tiffExtractables:ee,inheritables:te,allFormatters:ie,Options:oe});class pe{constructor(e,t,i){f(this,"errors",[]),f(this,"ensureSegmentChunk",(async e=>{let t=e.start,i=e.size||65536;if(this.file.chunked)if(this.file.available(t,i))e.chunk=this.file.subarray(t,i);else try{e.chunk=await this.file.readChunk(t,i)}catch(t){m(`Couldn't read segment: ${JSON.stringify(e)}. ${t.message}`)}else this.file.byteLength>t+i?e.chunk=this.file.subarray(t,i):void 0===e.size?e.chunk=this.file.subarray(t):m("Segment unreachable: "+JSON.stringify(e));return e.chunk})),this.extendOptions&&this.extendOptions(e),this.options=e,this.file=t,this.parsers=i}injectSegment(e,t){this.options[e].enabled&&this.createParser(e,t)}createParser(e,t){let i=new(A.get(e))(t,this.options,this.file);return this.parsers[e]=i}createParsers(e){for(let t of e){let{type:e,chunk:i}=t,n=this.options[e];if(n&&n.enabled){let t=this.parsers[e];t&&t.append||t||this.createParser(e,i)}}}async readSegments(e){let t=e.map(this.ensureSegmentChunk);await Promise.all(t)}}class ge{static findPosition(e,t){let i=e.getUint16(t+2)+2,n="function"==typeof this.headerLength?this.headerLength(e,t,i):this.headerLength,s=t+n,r=i-n;return{offset:t,length:i,headerLength:n,start:s,size:r,end:s+r}}static parse(e,t={}){return new this(e,new oe({[this.type]:t}),e).parse()}normalizeInput(e){return e instanceof I?e:new I(e)}constructor(e,t={},i){f(this,"errors",[]),f(this,"raw",new Map),f(this,"handleError",(e=>{if(!this.options.silentErrors)throw e;this.errors.push(e.message)})),this.chunk=this.normalizeInput(e),this.file=i,this.type=this.constructor.type,this.globalOptions=this.options=t,this.localOptions=t[this.type],this.canTranslate=this.localOptions&&this.localOptions.translate}translate(){this.canTranslate&&(this.translated=this.translateBlock(this.raw,this.type))}get output(){return this.translated?this.translated:this.raw?Object.fromEntries(this.raw):void 0}translateBlock(e,t){let i=V.get(t),n=G.get(t),s=N.get(t),r=this.options[t],a=r.reviveValues&&!!i,o=r.translateValues&&!!n,l=r.translateKeys&&!!s,h={};for(let[t,r]of e)a&&i.has(t)?r=i.get(t)(r):o&&n.has(t)&&(r=this.translateValue(r,n.get(t))),l&&s.has(t)&&(t=s.get(t)||t),h[t]=r;return h}translateValue(e,t){return t[e]||t.DEFAULT||e}assignToOutput(e,t){this.assignObjectToOutput(e,this.constructor.type,t)}assignObjectToOutput(e,t,i){if(this.globalOptions.mergeOutput)return Object.assign(e,i);e[t]?Object.assign(e[t],i):e[t]=i}}f(ge,"headerLength",4),f(ge,"type",void 0),f(ge,"multiSegment",!1),f(ge,"canHandle",(()=>!1));function me(e){return 192===e||194===e||196===e||219===e||221===e||218===e||254===e}function Se(e){return e>=224&&e<=239}function Ce(e,t,i){for(let[n,s]of A)if(s.canHandle(e,t,i))return n}class ye extends pe{constructor(...e){super(...e),f(this,"appSegments",[]),f(this,"jpegSegments",[]),f(this,"unknownSegments",[])}static canHandle(e,t){return 65496===t}async parse(){await this.findAppSegments(),await this.readSegments(this.appSegments),this.mergeMultiSegments(),this.createParsers(this.mergedAppSegments||this.appSegments)}setupSegmentFinderArgs(e){!0===e?(this.findAll=!0,this.wanted=new Set(A.keyList())):(e=void 0===e?A.keyList().filter((e=>this.options[e].enabled)):e.filter((e=>this.options[e].enabled&&A.has(e))),this.findAll=!1,this.remaining=new Set(e),this.wanted=new Set(e)),this.unfinishedMultiSegment=!1}async findAppSegments(e=0,t){this.setupSegmentFinderArgs(t);let{file:i,findAll:n,wanted:s,remaining:r}=this;if(!n&&this.file.chunked&&(n=Array.from(s).some((e=>{let t=A.get(e),i=this.options[e];return t.multiSegment&&i.multiSegment})),n&&await this.file.readWhole()),e=this.findAppSegmentsInRange(e,i.byteLength),!this.options.onlyTiff&&i.chunked){let t=!1;for(;r.size>0&&!t&&(i.canReadNextChunk||this.unfinishedMultiSegment);){let{nextChunkOffset:n}=i,s=this.appSegments.some((e=>!this.file.available(e.offset||e.start,e.length||e.size)));if(t=e>n&&!s?!await i.readNextChunk(e):!await i.readNextChunk(n),void 0===(e=this.findAppSegmentsInRange(e,i.byteLength)))return}}}findAppSegmentsInRange(e,t){t-=2;let i,n,s,r,a,o,{file:l,findAll:h,wanted:u,remaining:c,options:f}=this;for(;ee.multiSegment)))return;let e=function(e,t){let i,n,s,r=new Map;for(let a=0;a{let i=A.get(e,this.options);if(i.handleMultiSegments){return{type:e,chunk:i.handleMultiSegments(t)}}return t[0]}))}getSegment(e){return this.appSegments.find((t=>t.type===e))}async getOrFindSegment(e){let t=this.getSegment(e);return void 0===t&&(await this.findAppSegments(0,[e]),t=this.getSegment(e)),t}}f(ye,"type","jpeg"),T.set("jpeg",ye);const be=[void 0,1,1,2,4,8,1,1,2,4,8,4,8,4];class Pe extends ge{parseHeader(){var e=this.chunk.getUint16();18761===e?this.le=!0:19789===e&&(this.le=!1),this.chunk.le=this.le,this.headerParsed=!0}parseTags(e,t,i=new Map){let{pick:n,skip:s}=this.options[t];n=new Set(n);let r=n.size>0,a=0===s.size,o=this.chunk.getUint16(e);e+=2;for(let l=0;l13)&&m(`Invalid TIFF value type. block: ${i.toUpperCase()}, tag: ${t.toString(16)}, type: ${s}, offset ${e}`),e>n.byteLength&&m(`Invalid TIFF value offset. block: ${i.toUpperCase()}, tag: ${t.toString(16)}, type: ${s}, offset ${e} is outside of chunk size ${n.byteLength}`),1===s)return n.getUint8Array(e,r);if(2===s)return S(n.getString(e,r));if(7===s)return n.getUint8Array(e,r);if(1===r)return this.parseTagValue(s,e);{let t=new(function(e){switch(e){case 1:return Uint8Array;case 3:return Uint16Array;case 4:return Uint32Array;case 5:return Array;case 6:return Int8Array;case 8:return Int16Array;case 9:return Int32Array;case 10:return Array;case 11:return Float32Array;case 12:return Float64Array;default:return Array}}(s))(r),i=a;for(let n=0;ne.byteLength&&m(`IFD0 offset points to outside of file.\nthis.ifd0Offset: ${this.ifd0Offset}, file.byteLength: ${e.byteLength}`),e.tiff&&await e.ensureChunk(this.ifd0Offset,C(this.options));let t=this.parseBlock(this.ifd0Offset,"ifd0");return 0!==t.size?(this.exifOffset=t.get(X),this.interopOffset=t.get(Y),this.gpsOffset=t.get(_),this.xmp=t.get(j),this.iptc=t.get(W),this.icc=t.get(K),this.options.sanitize&&(t.delete(X),t.delete(Y),t.delete(_),t.delete(j),t.delete(W),t.delete(K)),t):void 0}async parseExifBlock(){if(this.exif)return;if(this.ifd0||await this.parseIfd0Block(),void 0===this.exifOffset)return;this.file.tiff&&await this.file.ensureChunk(this.exifOffset,C(this.options));let e=this.parseBlock(this.exifOffset,"exif");return this.interopOffset||(this.interopOffset=e.get(Y)),this.makerNote=e.get(z),this.userComment=e.get(H),this.options.sanitize&&(e.delete(Y),e.delete(z),e.delete(H)),this.unpack(e,41728),this.unpack(e,41729),e}unpack(e,t){let i=e.get(t);i&&1===i.length&&e.set(t,i[0])}async parseGpsBlock(){if(this.gps)return;if(this.ifd0||await this.parseIfd0Block(),void 0===this.gpsOffset)return;let e=this.parseBlock(this.gpsOffset,"gps");return e&&e.has(2)&&e.has(4)&&(e.set("latitude",ke(...e.get(2),e.get(1))),e.set("longitude",ke(...e.get(4),e.get(3)))),e}async parseInteropBlock(){if(!this.interop&&(this.ifd0||await this.parseIfd0Block(),void 0!==this.interopOffset||this.exif||await this.parseExifBlock(),void 0!==this.interopOffset))return this.parseBlock(this.interopOffset,"interop")}async parseThumbnailBlock(e=!1){if(!this.ifd1&&!this.ifd1Parsed&&(!this.options.mergeOutput||e))return this.findIfd1Offset(),this.ifd1Offset>0&&(this.parseBlock(this.ifd1Offset,"ifd1"),this.ifd1Parsed=!0),this.ifd1}async extractThumbnail(){if(this.headerParsed||this.parseHeader(),this.ifd1Parsed||await this.parseThumbnailBlock(!0),void 0===this.ifd1)return;let e=this.ifd1.get(513),t=this.ifd1.get(514);return this.chunk.getUint8Array(e,t)}get image(){return this.ifd0}get thumbnail(){return this.ifd1}createOutput(){let e,t,i,n={};for(t of Q)if(e=this[t],!g(e))if(i=this.canTranslate?this.translateBlock(e,t):Object.fromEntries(e),this.options.mergeOutput){if("ifd1"===t)continue;Object.assign(n,i)}else n[t]=i;return this.makerNote&&(n.makerNote=this.makerNote),this.userComment&&(n.userComment=this.userComment),n}assignToOutput(e,t){if(this.globalOptions.mergeOutput)Object.assign(e,t);else for(let[i,n]of Object.entries(t))this.assignObjectToOutput(e,i,n)}}function ke(e,t,i,n){var s=e+t/60+i/3600;return"S"!==n&&"W"!==n||(s*=-1),s}f(Ie,"type","tiff"),f(Ie,"headerLength",10),A.set("tiff",Ie);var we=Object.freeze({__proto__:null,default:de,Exifr:ce,fileParsers:T,segmentParsers:A,fileReaders:D,tagKeys:N,tagValues:G,tagRevivers:V,createDictionary:B,extendDictionary:E,fetchUrlAsArrayBuffer:L,readBlobAsArrayBuffer:U,chunkedProps:$,otherSegments:J,segments:q,tiffBlocks:Q,segmentsAndBlocks:Z,tiffExtractables:ee,inheritables:te,allFormatters:ie,Options:oe,parse:fe});const Te={ifd0:!1,ifd1:!1,exif:!1,gps:!1,interop:!1,sanitize:!1,reviveValues:!0,translateKeys:!1,translateValues:!1,mergeOutput:!1},Ae=Object.assign({},Te,{firstChunkSize:4e4,gps:[1,2,3,4]});async function De(e){let t=new ce(Ae);await t.read(e);let i=await t.parse();if(i&&i.gps){let{latitude:e,longitude:t}=i.gps;return{latitude:e,longitude:t}}}const Oe=Object.assign({},Te,{tiff:!1,ifd1:!0,mergeOutput:!1});async function xe(e){let t=new ce(Oe);await t.read(e);let i=await t.extractThumbnail();return i&&o?r.from(i):i}async function ve(e){let t=await this.thumbnail(e);if(void 0!==t){let e=new Blob([t]);return URL.createObjectURL(e)}}const Me=Object.assign({},Te,{firstChunkSize:4e4,ifd0:[274]});async function Re(e){let t=new ce(Me);await t.read(e);let i=await t.parse();if(i&&i.ifd0)return i.ifd0[274]}const Le=Object.freeze({1:{dimensionSwapped:!1,scaleX:1,scaleY:1,deg:0,rad:0},2:{dimensionSwapped:!1,scaleX:-1,scaleY:1,deg:0,rad:0},3:{dimensionSwapped:!1,scaleX:1,scaleY:1,deg:180,rad:180*Math.PI/180},4:{dimensionSwapped:!1,scaleX:-1,scaleY:1,deg:180,rad:180*Math.PI/180},5:{dimensionSwapped:!0,scaleX:1,scaleY:-1,deg:90,rad:90*Math.PI/180},6:{dimensionSwapped:!0,scaleX:1,scaleY:1,deg:90,rad:90*Math.PI/180},7:{dimensionSwapped:!0,scaleX:1,scaleY:-1,deg:270,rad:270*Math.PI/180},8:{dimensionSwapped:!0,scaleX:1,scaleY:1,deg:270,rad:270*Math.PI/180}});if(e.rotateCanvas=!0,e.rotateCss=!0,"object"==typeof navigator){let t=navigator.userAgent;if(t.includes("iPad")||t.includes("iPhone")){let i=t.match(/OS (\d+)_(\d+)/);if(i){let[,t,n]=i,s=Number(t)+.1*Number(n);e.rotateCanvas=s<13.4,e.rotateCss=!1}}else if(t.includes("OS X 10")){let[,i]=t.match(/OS X 10[_.](\d+)/);e.rotateCanvas=e.rotateCss=Number(i)<15}if(t.includes("Chrome/")){let[,i]=t.match(/Chrome\/(\d+)/);e.rotateCanvas=e.rotateCss=Number(i)<81}else if(t.includes("Firefox/")){let[,i]=t.match(/Firefox\/(\d+)/);e.rotateCanvas=e.rotateCss=Number(i)<77}}async function Ue(t){let i=await Re(t);return Object.assign({canvas:e.rotateCanvas,css:e.rotateCss},Le[i])}class Fe extends I{constructor(...e){super(...e),f(this,"ranges",new Be),0!==this.byteLength&&this.ranges.add(0,this.byteLength)}_tryExtend(e,t,i){if(0===e&&0===this.byteLength&&i){let e=new DataView(i.buffer||i,i.byteOffset,i.byteLength);this._swapDataView(e)}else{let i=e+t;if(i>this.byteLength){let{dataView:e}=this._extend(i);this._swapDataView(e)}}}_extend(e){let t;t=o?r.allocUnsafe(e):new Uint8Array(e);let i=new DataView(t.buffer,t.byteOffset,t.byteLength);return t.set(new Uint8Array(this.buffer,this.byteOffset,this.byteLength),0),{uintView:t,dataView:i}}subarray(e,t,i=!1){return t=t||this._lengthToEnd(e),i&&this._tryExtend(e,t),this.ranges.add(e,t),super.subarray(e,t)}set(e,t,i=!1){i&&this._tryExtend(t,e.byteLength,e);let n=super.set(e,t);return this.ranges.add(t,n.byteLength),n}async ensureChunk(e,t){this.chunked&&(this.ranges.available(e,t)||await this.readChunk(e,t))}available(e,t){return this.ranges.available(e,t)}}class Be{constructor(){f(this,"list",[])}get length(){return this.list.length}add(e,t,i=0){let n=e+t,s=this.list.filter((t=>Ee(e,t.offset,n)||Ee(e,t.end,n)));if(s.length>0){e=Math.min(e,...s.map((e=>e.offset))),n=Math.max(n,...s.map((e=>e.end))),t=n-e;let i=s.shift();i.offset=e,i.length=t,i.end=n,this.list=this.list.filter((e=>!s.includes(e)))}else this.list.push({offset:e,length:t,end:n})}available(e,t){let i=e+t;return this.list.some((t=>t.offset<=e&&i<=t.end))}}function Ee(e,t,i){return e<=t&&t<=i}class Ne extends Fe{constructor(e,t){super(0),f(this,"chunksRead",0),this.input=e,this.options=t}async readWhole(){this.chunked=!1,await this.readChunk(this.nextChunkOffset)}async readChunked(){this.chunked=!0,await this.readChunk(0,this.options.firstChunkSize)}async readNextChunk(e=this.nextChunkOffset){if(this.fullyRead)return this.chunksRead++,!1;let t=this.options.chunkSize,i=await this.readChunk(e,t);return!!i&&i.byteLength===t}async readChunk(e,t){if(this.chunksRead++,0!==(t=this.safeWrapAddress(e,t)))return this._readChunk(e,t)}safeWrapAddress(e,t){return void 0!==this.size&&e+t>this.size?Math.max(0,this.size-e):t}get nextChunkOffset(){if(0!==this.ranges.list.length)return this.ranges.list[0].length}get canReadNextChunk(){return this.chunksReade.kind===t))}parseBoxHead(e){let t=this.file.getUint32(e),i=this.file.getString(e+4,4),n=e+8;return 1===t&&(t=this.file.getUint64(e+8),n+=8),{offset:e,length:t,kind:i,start:n}}parseBoxFullHead(e){if(void 0!==e.version)return;let t=this.file.getUint32(e.start);e.version=t>>24,e.start+=4}}class ze extends Ve{static canHandle(e,t){if(0!==t)return!1;let i=e.getUint16(2);if(i>50)return!1;let n=16,s=[];for(;n=2&&(n=3===t.version?4:2,s=this.file.getString(i+n+2,4),"Exif"===s))return this.file.getUintBytes(i,n);r+=t.length}}get8bits(e){let t=this.file.getUint8(e);return[t>>4,15&t]}findExtentInIloc(e,t){this.parseBoxFullHead(e);let i=e.start,[n,s]=this.get8bits(i++),[r,a]=this.get8bits(i++),o=2===e.version?4:2,l=1===e.version||2===e.version?2:0,h=a+n+s,u=2===e.version?4:2,c=this.file.getUintBytes(i,u);for(i+=u;c--;){let e=this.file.getUintBytes(i,o);i+=o+l+2+r;let u=this.file.getUint16(i);if(i+=2,e===t)return u>1&&console.warn("ILOC box has more than one extent but we're only processing one\nPlease create an issue at https://github.com/MikeKovarik/exifr with this file"),[this.file.getUintBytes(i+a,n),this.file.getUintBytes(i+a+n,s)];i+=u*h}}}class He extends ze{}f(He,"type","heic");class je extends ze{}f(je,"type","avif"),T.set("heic",He),T.set("avif",je),B(N,["ifd0","ifd1"],[[256,"ImageWidth"],[257,"ImageHeight"],[258,"BitsPerSample"],[259,"Compression"],[262,"PhotometricInterpretation"],[270,"ImageDescription"],[271,"Make"],[272,"Model"],[273,"StripOffsets"],[274,"Orientation"],[277,"SamplesPerPixel"],[278,"RowsPerStrip"],[279,"StripByteCounts"],[282,"XResolution"],[283,"YResolution"],[284,"PlanarConfiguration"],[296,"ResolutionUnit"],[301,"TransferFunction"],[305,"Software"],[306,"ModifyDate"],[315,"Artist"],[316,"HostComputer"],[317,"Predictor"],[318,"WhitePoint"],[319,"PrimaryChromaticities"],[513,"ThumbnailOffset"],[514,"ThumbnailLength"],[529,"YCbCrCoefficients"],[530,"YCbCrSubSampling"],[531,"YCbCrPositioning"],[532,"ReferenceBlackWhite"],[700,"ApplicationNotes"],[33432,"Copyright"],[33723,"IPTC"],[34665,"ExifIFD"],[34675,"ICC"],[34853,"GpsIFD"],[330,"SubIFD"],[40965,"InteropIFD"],[40091,"XPTitle"],[40092,"XPComment"],[40093,"XPAuthor"],[40094,"XPKeywords"],[40095,"XPSubject"]]),B(N,"exif",[[33434,"ExposureTime"],[33437,"FNumber"],[34850,"ExposureProgram"],[34852,"SpectralSensitivity"],[34855,"ISO"],[34858,"TimeZoneOffset"],[34859,"SelfTimerMode"],[34864,"SensitivityType"],[34865,"StandardOutputSensitivity"],[34866,"RecommendedExposureIndex"],[34867,"ISOSpeed"],[34868,"ISOSpeedLatitudeyyy"],[34869,"ISOSpeedLatitudezzz"],[36864,"ExifVersion"],[36867,"DateTimeOriginal"],[36868,"CreateDate"],[36873,"GooglePlusUploadCode"],[36880,"OffsetTime"],[36881,"OffsetTimeOriginal"],[36882,"OffsetTimeDigitized"],[37121,"ComponentsConfiguration"],[37122,"CompressedBitsPerPixel"],[37377,"ShutterSpeedValue"],[37378,"ApertureValue"],[37379,"BrightnessValue"],[37380,"ExposureCompensation"],[37381,"MaxApertureValue"],[37382,"SubjectDistance"],[37383,"MeteringMode"],[37384,"LightSource"],[37385,"Flash"],[37386,"FocalLength"],[37393,"ImageNumber"],[37394,"SecurityClassification"],[37395,"ImageHistory"],[37396,"SubjectArea"],[37500,"MakerNote"],[37510,"UserComment"],[37520,"SubSecTime"],[37521,"SubSecTimeOriginal"],[37522,"SubSecTimeDigitized"],[37888,"AmbientTemperature"],[37889,"Humidity"],[37890,"Pressure"],[37891,"WaterDepth"],[37892,"Acceleration"],[37893,"CameraElevationAngle"],[40960,"FlashpixVersion"],[40961,"ColorSpace"],[40962,"ExifImageWidth"],[40963,"ExifImageHeight"],[40964,"RelatedSoundFile"],[41483,"FlashEnergy"],[41486,"FocalPlaneXResolution"],[41487,"FocalPlaneYResolution"],[41488,"FocalPlaneResolutionUnit"],[41492,"SubjectLocation"],[41493,"ExposureIndex"],[41495,"SensingMethod"],[41728,"FileSource"],[41729,"SceneType"],[41730,"CFAPattern"],[41985,"CustomRendered"],[41986,"ExposureMode"],[41987,"WhiteBalance"],[41988,"DigitalZoomRatio"],[41989,"FocalLengthIn35mmFormat"],[41990,"SceneCaptureType"],[41991,"GainControl"],[41992,"Contrast"],[41993,"Saturation"],[41994,"Sharpness"],[41996,"SubjectDistanceRange"],[42016,"ImageUniqueID"],[42032,"OwnerName"],[42033,"SerialNumber"],[42034,"LensInfo"],[42035,"LensMake"],[42036,"LensModel"],[42037,"LensSerialNumber"],[42080,"CompositeImage"],[42081,"CompositeImageCount"],[42082,"CompositeImageExposureTimes"],[42240,"Gamma"],[59932,"Padding"],[59933,"OffsetSchema"],[65e3,"OwnerName"],[65001,"SerialNumber"],[65002,"Lens"],[65100,"RawFile"],[65101,"Converter"],[65102,"WhiteBalance"],[65105,"Exposure"],[65106,"Shadows"],[65107,"Brightness"],[65108,"Contrast"],[65109,"Saturation"],[65110,"Sharpness"],[65111,"Smoothness"],[65112,"MoireFilter"],[40965,"InteropIFD"]]),B(N,"gps",[[0,"GPSVersionID"],[1,"GPSLatitudeRef"],[2,"GPSLatitude"],[3,"GPSLongitudeRef"],[4,"GPSLongitude"],[5,"GPSAltitudeRef"],[6,"GPSAltitude"],[7,"GPSTimeStamp"],[8,"GPSSatellites"],[9,"GPSStatus"],[10,"GPSMeasureMode"],[11,"GPSDOP"],[12,"GPSSpeedRef"],[13,"GPSSpeed"],[14,"GPSTrackRef"],[15,"GPSTrack"],[16,"GPSImgDirectionRef"],[17,"GPSImgDirection"],[18,"GPSMapDatum"],[19,"GPSDestLatitudeRef"],[20,"GPSDestLatitude"],[21,"GPSDestLongitudeRef"],[22,"GPSDestLongitude"],[23,"GPSDestBearingRef"],[24,"GPSDestBearing"],[25,"GPSDestDistanceRef"],[26,"GPSDestDistance"],[27,"GPSProcessingMethod"],[28,"GPSAreaInformation"],[29,"GPSDateStamp"],[30,"GPSDifferential"],[31,"GPSHPositioningError"]]),B(G,["ifd0","ifd1"],[[274,{1:"Horizontal (normal)",2:"Mirror horizontal",3:"Rotate 180",4:"Mirror vertical",5:"Mirror horizontal and rotate 270 CW",6:"Rotate 90 CW",7:"Mirror horizontal and rotate 90 CW",8:"Rotate 270 CW"}],[296,{1:"None",2:"inches",3:"cm"}]]);let We=B(G,"exif",[[34850,{0:"Not defined",1:"Manual",2:"Normal program",3:"Aperture priority",4:"Shutter priority",5:"Creative program",6:"Action program",7:"Portrait mode",8:"Landscape mode"}],[37121,{0:"-",1:"Y",2:"Cb",3:"Cr",4:"R",5:"G",6:"B"}],[37383,{0:"Unknown",1:"Average",2:"CenterWeightedAverage",3:"Spot",4:"MultiSpot",5:"Pattern",6:"Partial",255:"Other"}],[37384,{0:"Unknown",1:"Daylight",2:"Fluorescent",3:"Tungsten (incandescent light)",4:"Flash",9:"Fine weather",10:"Cloudy weather",11:"Shade",12:"Daylight fluorescent (D 5700 - 7100K)",13:"Day white fluorescent (N 4600 - 5400K)",14:"Cool white fluorescent (W 3900 - 4500K)",15:"White fluorescent (WW 3200 - 3700K)",17:"Standard light A",18:"Standard light B",19:"Standard light C",20:"D55",21:"D65",22:"D75",23:"D50",24:"ISO studio tungsten",255:"Other"}],[37385,{0:"Flash did not fire",1:"Flash fired",5:"Strobe return light not detected",7:"Strobe return light detected",9:"Flash fired, compulsory flash mode",13:"Flash fired, compulsory flash mode, return light not detected",15:"Flash fired, compulsory flash mode, return light detected",16:"Flash did not fire, compulsory flash mode",24:"Flash did not fire, auto mode",25:"Flash fired, auto mode",29:"Flash fired, auto mode, return light not detected",31:"Flash fired, auto mode, return light detected",32:"No flash function",65:"Flash fired, red-eye reduction mode",69:"Flash fired, red-eye reduction mode, return light not detected",71:"Flash fired, red-eye reduction mode, return light detected",73:"Flash fired, compulsory flash mode, red-eye reduction mode",77:"Flash fired, compulsory flash mode, red-eye reduction mode, return light not detected",79:"Flash fired, compulsory flash mode, red-eye reduction mode, return light detected",89:"Flash fired, auto mode, red-eye reduction mode",93:"Flash fired, auto mode, return light not detected, red-eye reduction mode",95:"Flash fired, auto mode, return light detected, red-eye reduction mode"}],[41495,{1:"Not defined",2:"One-chip color area sensor",3:"Two-chip color area sensor",4:"Three-chip color area sensor",5:"Color sequential area sensor",7:"Trilinear sensor",8:"Color sequential linear sensor"}],[41728,{1:"Film Scanner",2:"Reflection Print Scanner",3:"Digital Camera"}],[41729,{1:"Directly photographed"}],[41985,{0:"Normal",1:"Custom",2:"HDR (no original saved)",3:"HDR (original saved)",4:"Original (for HDR)",6:"Panorama",7:"Portrait HDR",8:"Portrait"}],[41986,{0:"Auto",1:"Manual",2:"Auto bracket"}],[41987,{0:"Auto",1:"Manual"}],[41990,{0:"Standard",1:"Landscape",2:"Portrait",3:"Night",4:"Other"}],[41991,{0:"None",1:"Low gain up",2:"High gain up",3:"Low gain down",4:"High gain down"}],[41996,{0:"Unknown",1:"Macro",2:"Close",3:"Distant"}],[42080,{0:"Unknown",1:"Not a Composite Image",2:"General Composite Image",3:"Composite Image Captured While Shooting"}]]);const Ke={1:"No absolute unit of measurement",2:"Inch",3:"Centimeter"};We.set(37392,Ke),We.set(41488,Ke);const Xe={0:"Normal",1:"Low",2:"High"};function _e(e){return"object"==typeof e&&void 0!==e.length?e[0]:e}function Ye(e){let t=Array.from(e).slice(1);return t[1]>15&&(t=t.map((e=>String.fromCharCode(e)))),"0"!==t[2]&&0!==t[2]||t.pop(),t.join(".")}function $e(e){if("string"==typeof e){var[t,i,n,s,r,a]=e.trim().split(/[-: ]/g).map(Number),o=new Date(t,i-1,n);return Number.isNaN(s)||Number.isNaN(r)||Number.isNaN(a)||(o.setHours(s),o.setMinutes(r),o.setSeconds(a)),Number.isNaN(+o)?e:o}}function Je(e){if("string"==typeof e)return e;let t=[];if(0===e[1]&&0===e[e.length-1])for(let i=0;iArray.from(e).join(".")],[7,e=>Array.from(e).join(":")]]);const Qe="http://ns.adobe.com/",Ze="http://ns.adobe.com/xmp/extension/";class et extends ge{static canHandle(e,t){return 225===e.getUint8(t+1)&&1752462448===e.getUint32(t+4)&&e.getString(t+4,Qe.length)===Qe}static headerLength(e,t){return e.getString(t+4,Ze.length)===Ze?79:4+"http://ns.adobe.com/xap/1.0/".length+1}static findPosition(e,t){let i=super.findPosition(e,t);return i.multiSegment=i.extended=79===i.headerLength,i.multiSegment?(i.chunkCount=e.getUint8(t+72),i.chunkNumber=e.getUint8(t+76),0!==e.getUint8(t+77)&&i.chunkNumber++):(i.chunkCount=1/0,i.chunkNumber=-1),i}static handleMultiSegments(e){return e.map((e=>e.chunk.getString())).join("")}normalizeInput(e){return"string"==typeof e?e:I.from(e).getString()}parse(e=this.chunk){if(!this.localOptions.parse)return e;e=function(e){let t={},i={};for(let e of ut)t[e]=[],i[e]=0;return e.replace(ct,((e,n,s)=>{if("<"===n){let n=++i[s];return t[s].push(n),`${e}#${n}`}return`${e}#${t[s].pop()}`}))}(e);let t=nt.findAll(e,"rdf","Description");0===t.length&&t.push(new nt("rdf","Description",void 0,e));let i,n={};for(let e of t)for(let t of e.properties)i=ot(t.ns,n),st(t,i);return function(e){let t;for(let i in e)t=e[i]=d(e[i]),void 0===t&&delete e[i];return d(e)}(n)}assignToOutput(e,t){if(this.localOptions.parse)for(let[i,n]of Object.entries(t))switch(i){case"tiff":this.assignObjectToOutput(e,"ifd0",n);break;case"exif":this.assignObjectToOutput(e,"exif",n);break;case"xmlns":break;default:this.assignObjectToOutput(e,i,n)}else e.xmp=t}}f(et,"type","xmp"),f(et,"multiSegment",!0),A.set("xmp",et);class tt{static findAll(e){return lt(e,/([a-zA-Z0-9-]+):([a-zA-Z0-9-]+)=("[^"]*"|'[^']*')/gm).map(tt.unpackMatch)}static unpackMatch(e){let t=e[1],i=e[2],n=e[3].slice(1,-1);return n=ht(n),new tt(t,i,n)}constructor(e,t,i){this.ns=e,this.name=t,this.value=i}serialize(){return this.value}}const it="[\\w\\d-]+";class nt{static findAll(e,t,i){if(void 0!==t||void 0!==i){t=t||it,i=i||it;var n=new RegExp(`<(${t}):(${i})(#\\d+)?((\\s+?[\\w\\d-:]+=("[^"]*"|'[^']*'))*\\s*)(\\/>|>([\\s\\S]*?)<\\/\\1:\\2\\3>)`,"gm")}else n=/<([\w\d-]+):([\w\d-]+)(#\d+)?((\s+?[\w\d-:]+=("[^"]*"|'[^']*'))*\s*)(\/>|>([\s\S]*?)<\/\1:\2\3>)/gm;return lt(e,n).map(nt.unpackMatch)}static unpackMatch(e){let t=e[1],i=e[2],n=e[4],s=e[8];return new nt(t,i,n,s)}constructor(e,t,i,n){this.ns=e,this.name=t,this.attrString=i,this.innerXml=n,this.attrs=tt.findAll(i),this.children=nt.findAll(n),this.value=0===this.children.length?ht(n):void 0,this.properties=[...this.attrs,...this.children]}get isPrimitive(){return void 0!==this.value&&0===this.attrs.length&&0===this.children.length}get isListContainer(){return 1===this.children.length&&this.children[0].isList}get isList(){let{ns:e,name:t}=this;return"rdf"===e&&("Seq"===t||"Bag"===t||"Alt"===t)}get isListItem(){return"rdf"===this.ns&&"li"===this.name}serialize(){if(0===this.properties.length&&void 0===this.value)return;if(this.isPrimitive)return this.value;if(this.isListContainer)return this.children[0].serialize();if(this.isList)return at(this.children.map(rt));if(this.isListItem&&1===this.children.length&&0===this.attrs.length)return this.children[0].serialize();let e={};for(let t of this.properties)st(t,e);return void 0!==this.value&&(e.value=this.value),d(e)}}function st(e,t){let i=e.serialize();void 0!==i&&(t[e.name]=i)}var rt=e=>e.serialize(),at=e=>1===e.length?e[0]:e,ot=(e,t)=>t[e]?t[e]:t[e]={};function lt(e,t){let i,n=[];if(!e)return n;for(;null!==(i=t.exec(e));)n.push(i);return n}function ht(e){if(function(e){return null==e||"null"===e||"undefined"===e||""===e||""===e.trim()}(e))return;let t=Number(e);if(!Number.isNaN(t))return t;let i=e.toLowerCase();return"true"===i||"false"!==i&&e.trim()}const ut=["rdf:li","rdf:Seq","rdf:Bag","rdf:Alt","rdf:Description"],ct=new RegExp(`(<|\\/)(${ut.join("|")})`,"g");var ft=Object.freeze({__proto__:null,default:Ge,Exifr:ce,fileParsers:T,segmentParsers:A,fileReaders:D,tagKeys:N,tagValues:G,tagRevivers:V,createDictionary:B,extendDictionary:E,fetchUrlAsArrayBuffer:L,readBlobAsArrayBuffer:U,chunkedProps:$,otherSegments:J,segments:q,tiffBlocks:Q,segmentsAndBlocks:Z,tiffExtractables:ee,inheritables:te,allFormatters:ie,Options:oe,parse:fe,gpsOnlyOptions:Ae,gps:De,thumbnailOnlyOptions:Oe,thumbnail:xe,thumbnailUrl:ve,orientationOnlyOptions:Me,orientation:Re,rotations:Le,get rotateCanvas(){return e.rotateCanvas},get rotateCss(){return e.rotateCss},rotation:Ue});const dt=["xmp","icc","iptc","tiff"],pt=()=>{};async function gt(e,t,i){let n=i[e];return n.enabled=!0,n.parse=!0,A.get(e).parse(t,n)}let mt=h("fs",(e=>e.promises));D.set("fs",class extends Ne{async readWhole(){this.chunked=!1,this.fs=await mt;let e=await this.fs.readFile(this.input);this._swapBuffer(e)}async readChunked(){this.chunked=!0,this.fs=await mt,await this.open(),await this.readChunk(0,this.options.firstChunkSize)}async open(){void 0===this.fh&&(this.fh=await this.fs.open(this.input,"r"),this.size=(await this.fh.stat(this.input)).size)}async _readChunk(e,t){void 0===this.fh&&await this.open(),e+t>this.size&&(t=this.size-e);var i=this.subarray(e,t,!0);return await this.fh.read(i.dataView,0,t,e),i}async close(){if(this.fh){let e=this.fh;this.fh=void 0,await e.close()}}});D.set("base64",class extends Ne{constructor(...e){super(...e),this.input=this.input.replace(/^data:([^;]+);base64,/gim,""),this.size=this.input.length/4*3,this.input.endsWith("==")?this.size-=2:this.input.endsWith("=")&&(this.size-=1)}async _readChunk(e,t){let i,n,s=this.input;void 0===e?(e=0,i=0,n=0):(i=4*Math.floor(e/3),n=e-i/4*3),void 0===t&&(t=this.size);let a=e+t,l=i+4*Math.ceil(a/3);s=s.slice(i,l);let h=Math.min(t,this.size-e);if(o){let t=r.from(s,"base64").slice(n,n+h);return this.set(t,e,!0)}{let t=this.subarray(e,h,!0),i=atob(s),r=t.toUint8();for(let e=0;ethis.errors.push(e))),f(this,"metaChunks",[]),f(this,"unknownChunks",[])}static canHandle(e,t){return 35152===t&&2303741511===e.getUint32(0)&&218765834===e.getUint32(4)}async parse(){let{file:e}=this;await this.findPngChunksInRange("‰PNG\r\n\n".length,e.byteLength),await this.readSegments(this.metaChunks),this.findIhdr(),this.parseTextChunks(),await this.findExif().catch(this.catchError),await this.findXmp().catch(this.catchError),await this.findIcc().catch(this.catchError)}async findPngChunksInRange(e,t){let{file:i}=this;for(;ee.type===It));for(let t of e){let[e,i]=this.file.getString(t.start,t.size).split("\0");this.injectKeyValToIhdr(e,i)}}injectKeyValToIhdr(e,t){let i=this.parsers.ihdr;i&&i.raw.set(e,t)}findIhdr(){let e=this.metaChunks.find((e=>e.type===bt));e&&!1!==this.options.ihdr.enabled&&this.createParser(bt,e.chunk)}async findExif(){let e=this.metaChunks.find((e=>"exif"===e.type));e&&this.injectSegment("tiff",e.chunk)}async findXmp(){let e=this.metaChunks.filter((e=>e.type===kt));for(let t of e){t.chunk.getString(0,yt.length)===yt&&this.injectSegment("xmp",t.chunk)}}async findIcc(){let e=this.metaChunks.find((e=>e.type===Pt));if(!e)return;let{chunk:t}=e,i=t.getUint8Array(0,81),n=0;for(;n<80&&0!==i[n];)n++;let r=n+2,a=t.getString(0,n);if(this.injectKeyValToIhdr("ProfileName",a),s){let e=await Ct,i=t.getUint8Array(r);i=e.inflateSync(i),this.injectSegment("icc",i)}}}f(Tt,"type","png"),T.set("png",Tt),B(N,"interop",[[1,"InteropIndex"],[2,"InteropVersion"],[4096,"RelatedImageFileFormat"],[4097,"RelatedImageWidth"],[4098,"RelatedImageHeight"]]),E(N,"ifd0",[[11,"ProcessingSoftware"],[254,"SubfileType"],[255,"OldSubfileType"],[263,"Thresholding"],[264,"CellWidth"],[265,"CellLength"],[266,"FillOrder"],[269,"DocumentName"],[280,"MinSampleValue"],[281,"MaxSampleValue"],[285,"PageName"],[286,"XPosition"],[287,"YPosition"],[290,"GrayResponseUnit"],[297,"PageNumber"],[321,"HalftoneHints"],[322,"TileWidth"],[323,"TileLength"],[332,"InkSet"],[337,"TargetPrinter"],[18246,"Rating"],[18249,"RatingPercent"],[33550,"PixelScale"],[34264,"ModelTransform"],[34377,"PhotoshopSettings"],[50706,"DNGVersion"],[50707,"DNGBackwardVersion"],[50708,"UniqueCameraModel"],[50709,"LocalizedCameraModel"],[50736,"DNGLensInfo"],[50739,"ShadowScale"],[50740,"DNGPrivateData"],[33920,"IntergraphMatrix"],[33922,"ModelTiePoint"],[34118,"SEMInfo"],[34735,"GeoTiffDirectory"],[34736,"GeoTiffDoubleParams"],[34737,"GeoTiffAsciiParams"],[50341,"PrintIM"],[50721,"ColorMatrix1"],[50722,"ColorMatrix2"],[50723,"CameraCalibration1"],[50724,"CameraCalibration2"],[50725,"ReductionMatrix1"],[50726,"ReductionMatrix2"],[50727,"AnalogBalance"],[50728,"AsShotNeutral"],[50729,"AsShotWhiteXY"],[50730,"BaselineExposure"],[50731,"BaselineNoise"],[50732,"BaselineSharpness"],[50734,"LinearResponseLimit"],[50735,"CameraSerialNumber"],[50741,"MakerNoteSafety"],[50778,"CalibrationIlluminant1"],[50779,"CalibrationIlluminant2"],[50781,"RawDataUniqueID"],[50827,"OriginalRawFileName"],[50828,"OriginalRawFileData"],[50831,"AsShotICCProfile"],[50832,"AsShotPreProfileMatrix"],[50833,"CurrentICCProfile"],[50834,"CurrentPreProfileMatrix"],[50879,"ColorimetricReference"],[50885,"SRawType"],[50898,"PanasonicTitle"],[50899,"PanasonicTitle2"],[50931,"CameraCalibrationSig"],[50932,"ProfileCalibrationSig"],[50933,"ProfileIFD"],[50934,"AsShotProfileName"],[50936,"ProfileName"],[50937,"ProfileHueSatMapDims"],[50938,"ProfileHueSatMapData1"],[50939,"ProfileHueSatMapData2"],[50940,"ProfileToneCurve"],[50941,"ProfileEmbedPolicy"],[50942,"ProfileCopyright"],[50964,"ForwardMatrix1"],[50965,"ForwardMatrix2"],[50966,"PreviewApplicationName"],[50967,"PreviewApplicationVersion"],[50968,"PreviewSettingsName"],[50969,"PreviewSettingsDigest"],[50970,"PreviewColorSpace"],[50971,"PreviewDateTime"],[50972,"RawImageDigest"],[50973,"OriginalRawFileDigest"],[50981,"ProfileLookTableDims"],[50982,"ProfileLookTableData"],[51043,"TimeCodes"],[51044,"FrameRate"],[51058,"TStop"],[51081,"ReelName"],[51089,"OriginalDefaultFinalSize"],[51090,"OriginalBestQualitySize"],[51091,"OriginalDefaultCropSize"],[51105,"CameraLabel"],[51107,"ProfileHueSatMapEncoding"],[51108,"ProfileLookTableEncoding"],[51109,"BaselineExposureOffset"],[51110,"DefaultBlackRender"],[51111,"NewRawImageDigest"],[51112,"RawToPreviewGain"]]);let At=[[273,"StripOffsets"],[279,"StripByteCounts"],[288,"FreeOffsets"],[289,"FreeByteCounts"],[291,"GrayResponseCurve"],[292,"T4Options"],[293,"T6Options"],[300,"ColorResponseUnit"],[320,"ColorMap"],[324,"TileOffsets"],[325,"TileByteCounts"],[326,"BadFaxLines"],[327,"CleanFaxData"],[328,"ConsecutiveBadFaxLines"],[330,"SubIFD"],[333,"InkNames"],[334,"NumberofInks"],[336,"DotRange"],[338,"ExtraSamples"],[339,"SampleFormat"],[340,"SMinSampleValue"],[341,"SMaxSampleValue"],[342,"TransferRange"],[343,"ClipPath"],[344,"XClipPathUnits"],[345,"YClipPathUnits"],[346,"Indexed"],[347,"JPEGTables"],[351,"OPIProxy"],[400,"GlobalParametersIFD"],[401,"ProfileType"],[402,"FaxProfile"],[403,"CodingMethods"],[404,"VersionYear"],[405,"ModeNumber"],[433,"Decode"],[434,"DefaultImageColor"],[435,"T82Options"],[437,"JPEGTables"],[512,"JPEGProc"],[515,"JPEGRestartInterval"],[517,"JPEGLosslessPredictors"],[518,"JPEGPointTransforms"],[519,"JPEGQTables"],[520,"JPEGDCTables"],[521,"JPEGACTables"],[559,"StripRowCounts"],[999,"USPTOMiscellaneous"],[18247,"XP_DIP_XML"],[18248,"StitchInfo"],[28672,"SonyRawFileType"],[28688,"SonyToneCurve"],[28721,"VignettingCorrection"],[28722,"VignettingCorrParams"],[28724,"ChromaticAberrationCorrection"],[28725,"ChromaticAberrationCorrParams"],[28726,"DistortionCorrection"],[28727,"DistortionCorrParams"],[29895,"SonyCropTopLeft"],[29896,"SonyCropSize"],[32781,"ImageID"],[32931,"WangTag1"],[32932,"WangAnnotation"],[32933,"WangTag3"],[32934,"WangTag4"],[32953,"ImageReferencePoints"],[32954,"RegionXformTackPoint"],[32955,"WarpQuadrilateral"],[32956,"AffineTransformMat"],[32995,"Matteing"],[32996,"DataType"],[32997,"ImageDepth"],[32998,"TileDepth"],[33300,"ImageFullWidth"],[33301,"ImageFullHeight"],[33302,"TextureFormat"],[33303,"WrapModes"],[33304,"FovCot"],[33305,"MatrixWorldToScreen"],[33306,"MatrixWorldToCamera"],[33405,"Model2"],[33421,"CFARepeatPatternDim"],[33422,"CFAPattern2"],[33423,"BatteryLevel"],[33424,"KodakIFD"],[33445,"MDFileTag"],[33446,"MDScalePixel"],[33447,"MDColorTable"],[33448,"MDLabName"],[33449,"MDSampleInfo"],[33450,"MDPrepDate"],[33451,"MDPrepTime"],[33452,"MDFileUnits"],[33589,"AdventScale"],[33590,"AdventRevision"],[33628,"UIC1Tag"],[33629,"UIC2Tag"],[33630,"UIC3Tag"],[33631,"UIC4Tag"],[33918,"IntergraphPacketData"],[33919,"IntergraphFlagRegisters"],[33921,"INGRReserved"],[34016,"Site"],[34017,"ColorSequence"],[34018,"IT8Header"],[34019,"RasterPadding"],[34020,"BitsPerRunLength"],[34021,"BitsPerExtendedRunLength"],[34022,"ColorTable"],[34023,"ImageColorIndicator"],[34024,"BackgroundColorIndicator"],[34025,"ImageColorValue"],[34026,"BackgroundColorValue"],[34027,"PixelIntensityRange"],[34028,"TransparencyIndicator"],[34029,"ColorCharacterization"],[34030,"HCUsage"],[34031,"TrapIndicator"],[34032,"CMYKEquivalent"],[34152,"AFCP_IPTC"],[34232,"PixelMagicJBIGOptions"],[34263,"JPLCartoIFD"],[34306,"WB_GRGBLevels"],[34310,"LeafData"],[34687,"TIFF_FXExtensions"],[34688,"MultiProfiles"],[34689,"SharedData"],[34690,"T88Options"],[34732,"ImageLayer"],[34750,"JBIGOptions"],[34856,"Opto-ElectricConvFactor"],[34857,"Interlace"],[34908,"FaxRecvParams"],[34909,"FaxSubAddress"],[34910,"FaxRecvTime"],[34929,"FedexEDR"],[34954,"LeafSubIFD"],[37387,"FlashEnergy"],[37388,"SpatialFrequencyResponse"],[37389,"Noise"],[37390,"FocalPlaneXResolution"],[37391,"FocalPlaneYResolution"],[37392,"FocalPlaneResolutionUnit"],[37397,"ExposureIndex"],[37398,"TIFF-EPStandardID"],[37399,"SensingMethod"],[37434,"CIP3DataFile"],[37435,"CIP3Sheet"],[37436,"CIP3Side"],[37439,"StoNits"],[37679,"MSDocumentText"],[37680,"MSPropertySetStorage"],[37681,"MSDocumentTextPosition"],[37724,"ImageSourceData"],[40965,"InteropIFD"],[40976,"SamsungRawPointersOffset"],[40977,"SamsungRawPointersLength"],[41217,"SamsungRawByteOrder"],[41218,"SamsungRawUnknown"],[41484,"SpatialFrequencyResponse"],[41485,"Noise"],[41489,"ImageNumber"],[41490,"SecurityClassification"],[41491,"ImageHistory"],[41494,"TIFF-EPStandardID"],[41995,"DeviceSettingDescription"],[42112,"GDALMetadata"],[42113,"GDALNoData"],[44992,"ExpandSoftware"],[44993,"ExpandLens"],[44994,"ExpandFilm"],[44995,"ExpandFilterLens"],[44996,"ExpandScanner"],[44997,"ExpandFlashLamp"],[46275,"HasselbladRawImage"],[48129,"PixelFormat"],[48130,"Transformation"],[48131,"Uncompressed"],[48132,"ImageType"],[48256,"ImageWidth"],[48257,"ImageHeight"],[48258,"WidthResolution"],[48259,"HeightResolution"],[48320,"ImageOffset"],[48321,"ImageByteCount"],[48322,"AlphaOffset"],[48323,"AlphaByteCount"],[48324,"ImageDataDiscard"],[48325,"AlphaDataDiscard"],[50215,"OceScanjobDesc"],[50216,"OceApplicationSelector"],[50217,"OceIDNumber"],[50218,"OceImageLogic"],[50255,"Annotations"],[50459,"HasselbladExif"],[50547,"OriginalFileName"],[50560,"USPTOOriginalContentType"],[50656,"CR2CFAPattern"],[50710,"CFAPlaneColor"],[50711,"CFALayout"],[50712,"LinearizationTable"],[50713,"BlackLevelRepeatDim"],[50714,"BlackLevel"],[50715,"BlackLevelDeltaH"],[50716,"BlackLevelDeltaV"],[50717,"WhiteLevel"],[50718,"DefaultScale"],[50719,"DefaultCropOrigin"],[50720,"DefaultCropSize"],[50733,"BayerGreenSplit"],[50737,"ChromaBlurRadius"],[50738,"AntiAliasStrength"],[50752,"RawImageSegmentation"],[50780,"BestQualityScale"],[50784,"AliasLayerMetadata"],[50829,"ActiveArea"],[50830,"MaskedAreas"],[50935,"NoiseReductionApplied"],[50974,"SubTileBlockSize"],[50975,"RowInterleaveFactor"],[51008,"OpcodeList1"],[51009,"OpcodeList2"],[51022,"OpcodeList3"],[51041,"NoiseProfile"],[51114,"CacheVersion"],[51125,"DefaultUserCrop"],[51157,"NikonNEFInfo"],[65024,"KdcIFD"]];E(N,"ifd0",At),E(N,"exif",At),B(G,"gps",[[23,{M:"Magnetic North",T:"True North"}],[25,{K:"Kilometers",M:"Miles",N:"Nautical Miles"}]]);class Dt extends ge{static canHandle(e,t){return 224===e.getUint8(t+1)&&1246120262===e.getUint32(t+4)&&0===e.getUint8(t+8)}parse(){return this.parseTags(),this.translate(),this.output}parseTags(){this.raw=new Map([[0,this.chunk.getUint16(0)],[2,this.chunk.getUint8(2)],[3,this.chunk.getUint16(3)],[5,this.chunk.getUint16(5)],[7,this.chunk.getUint8(7)],[8,this.chunk.getUint8(8)]])}}f(Dt,"type","jfif"),f(Dt,"headerLength",9),A.set("jfif",Dt),B(N,"jfif",[[0,"JFIFVersion"],[2,"ResolutionUnit"],[3,"XResolution"],[5,"YResolution"],[7,"ThumbnailWidth"],[8,"ThumbnailHeight"]]);class Ot extends ge{parse(){return this.parseTags(),this.translate(),this.output}parseTags(){this.raw=new Map([[0,this.chunk.getUint32(0)],[4,this.chunk.getUint32(4)],[8,this.chunk.getUint8(8)],[9,this.chunk.getUint8(9)],[10,this.chunk.getUint8(10)],[11,this.chunk.getUint8(11)],[12,this.chunk.getUint8(12)],...Array.from(this.raw)])}}f(Ot,"type","ihdr"),A.set("ihdr",Ot),B(N,"ihdr",[[0,"ImageWidth"],[4,"ImageHeight"],[8,"BitDepth"],[9,"ColorType"],[10,"Compression"],[11,"Filter"],[12,"Interlace"]]),B(G,"ihdr",[[9,{0:"Grayscale",2:"RGB",3:"Palette",4:"Grayscale with Alpha",6:"RGB with Alpha",DEFAULT:"Unknown"}],[10,{0:"Deflate/Inflate",DEFAULT:"Unknown"}],[11,{0:"Adaptive",DEFAULT:"Unknown"}],[12,{0:"Noninterlaced",1:"Adam7 Interlace",DEFAULT:"Unknown"}]]);const xt="\0\0\0\0";class vt extends ge{static canHandle(e,t){return 226===e.getUint8(t+1)&&1229144927===e.getUint32(t+4)}static findPosition(e,t){let i=super.findPosition(e,t);return i.chunkNumber=e.getUint8(t+16),i.chunkCount=e.getUint8(t+17),i.multiSegment=i.chunkCount>1,i}static handleMultiSegments(e){return function(e){let t=function(e){let t=e[0].constructor,i=0;for(let t of e)i+=t.length;let n=new t(i),s=0;for(let t of e)n.set(t,s),s+=t.length;return n}(e.map((e=>e.chunk.toUint8())));return new I(t)}(e)}parse(){return this.raw=new Map,this.parseHeader(),this.parseTags(),this.translate(),this.output}parseHeader(){let{raw:e}=this;this.chunk.byteLength<84&&m("ICC header is too short");for(let[t,i]of Object.entries(Mt)){t=parseInt(t,10);let n=i(this.chunk,t);n!==xt&&e.set(t,n)}}parseTags(){let e,t,i,n,s,{raw:r}=this,a=this.chunk.getUint32(128),o=132,l=this.chunk.byteLength;for(;a--;){if(e=this.chunk.getString(o,4),t=this.chunk.getUint32(o+4),i=this.chunk.getUint32(o+8),n=this.chunk.getString(t,4),t+i>l)return void console.warn("reached the end of the first ICC chunk. Enable options.tiff.multiSegment to read all ICC segments.");s=this.parseTag(n,t,i),void 0!==s&&s!==xt&&r.set(e,s),o+=12}}parseTag(e,t,i){switch(e){case"desc":return this.parseDesc(t);case"mluc":return this.parseMluc(t);case"text":return this.parseText(t,i);case"sig ":return this.parseSig(t)}if(!(t+i>this.chunk.byteLength))return this.chunk.getUint8Array(t,i)}parseDesc(e){let t=this.chunk.getUint32(e+8)-1;return S(this.chunk.getString(e+12,t))}parseText(e,t){return S(this.chunk.getString(e+8,t-8))}parseSig(e){return S(this.chunk.getString(e+8,4))}parseMluc(e){let{chunk:t}=this,i=t.getUint32(e+8),n=t.getUint32(e+12),s=e+16,r=[];for(let a=0;a>4,e.getUint8(t+1)%16].map((e=>e.toString(10))).join(".")},12:Rt,16:Rt,20:Rt,24:function(e,t){const i=e.getUint16(t),n=e.getUint16(t+2)-1,s=e.getUint16(t+4),r=e.getUint16(t+6),a=e.getUint16(t+8),o=e.getUint16(t+10);return new Date(Date.UTC(i,n,s,r,a,o))},36:Rt,40:Rt,48:Rt,52:Rt,64:(e,t)=>e.getUint32(t),80:Rt};function Rt(e,t){return S(e.getString(t,4))}A.set("icc",vt),B(N,"icc",[[4,"ProfileCMMType"],[8,"ProfileVersion"],[12,"ProfileClass"],[16,"ColorSpaceData"],[20,"ProfileConnectionSpace"],[24,"ProfileDateTime"],[36,"ProfileFileSignature"],[40,"PrimaryPlatform"],[44,"CMMFlags"],[48,"DeviceManufacturer"],[52,"DeviceModel"],[56,"DeviceAttributes"],[64,"RenderingIntent"],[68,"ConnectionSpaceIlluminant"],[80,"ProfileCreator"],[84,"ProfileID"],["Header","ProfileHeader"],["MS00","WCSProfiles"],["bTRC","BlueTRC"],["bXYZ","BlueMatrixColumn"],["bfd","UCRBG"],["bkpt","MediaBlackPoint"],["calt","CalibrationDateTime"],["chad","ChromaticAdaptation"],["chrm","Chromaticity"],["ciis","ColorimetricIntentImageState"],["clot","ColorantTableOut"],["clro","ColorantOrder"],["clrt","ColorantTable"],["cprt","ProfileCopyright"],["crdi","CRDInfo"],["desc","ProfileDescription"],["devs","DeviceSettings"],["dmdd","DeviceModelDesc"],["dmnd","DeviceMfgDesc"],["dscm","ProfileDescriptionML"],["fpce","FocalPlaneColorimetryEstimates"],["gTRC","GreenTRC"],["gXYZ","GreenMatrixColumn"],["gamt","Gamut"],["kTRC","GrayTRC"],["lumi","Luminance"],["meas","Measurement"],["meta","Metadata"],["mmod","MakeAndModel"],["ncl2","NamedColor2"],["ncol","NamedColor"],["ndin","NativeDisplayInfo"],["pre0","Preview0"],["pre1","Preview1"],["pre2","Preview2"],["ps2i","PS2RenderingIntent"],["ps2s","PostScript2CSA"],["psd0","PostScript2CRD0"],["psd1","PostScript2CRD1"],["psd2","PostScript2CRD2"],["psd3","PostScript2CRD3"],["pseq","ProfileSequenceDesc"],["psid","ProfileSequenceIdentifier"],["psvm","PS2CRDVMSize"],["rTRC","RedTRC"],["rXYZ","RedMatrixColumn"],["resp","OutputResponse"],["rhoc","ReflectionHardcopyOrigColorimetry"],["rig0","PerceptualRenderingIntentGamut"],["rig2","SaturationRenderingIntentGamut"],["rpoc","ReflectionPrintOutputColorimetry"],["sape","SceneAppearanceEstimates"],["scoe","SceneColorimetryEstimates"],["scrd","ScreeningDesc"],["scrn","Screening"],["targ","CharTarget"],["tech","Technology"],["vcgt","VideoCardGamma"],["view","ViewingConditions"],["vued","ViewingCondDesc"],["wtpt","MediaWhitePoint"]]);const Lt={"4d2p":"Erdt Systems",AAMA:"Aamazing Technologies",ACER:"Acer",ACLT:"Acolyte Color Research",ACTI:"Actix Sytems",ADAR:"Adara Technology",ADBE:"Adobe",ADI:"ADI Systems",AGFA:"Agfa Graphics",ALMD:"Alps Electric",ALPS:"Alps Electric",ALWN:"Alwan Color Expertise",AMTI:"Amiable Technologies",AOC:"AOC International",APAG:"Apago",APPL:"Apple Computer",AST:"AST","AT&T":"AT&T",BAEL:"BARBIERI electronic",BRCO:"Barco NV",BRKP:"Breakpoint",BROT:"Brother",BULL:"Bull",BUS:"Bus Computer Systems","C-IT":"C-Itoh",CAMR:"Intel",CANO:"Canon",CARR:"Carroll Touch",CASI:"Casio",CBUS:"Colorbus PL",CEL:"Crossfield",CELx:"Crossfield",CGS:"CGS Publishing Technologies International",CHM:"Rochester Robotics",CIGL:"Colour Imaging Group, London",CITI:"Citizen",CL00:"Candela",CLIQ:"Color IQ",CMCO:"Chromaco",CMiX:"CHROMiX",COLO:"Colorgraphic Communications",COMP:"Compaq",COMp:"Compeq/Focus Technology",CONR:"Conrac Display Products",CORD:"Cordata Technologies",CPQ:"Compaq",CPRO:"ColorPro",CRN:"Cornerstone",CTX:"CTX International",CVIS:"ColorVision",CWC:"Fujitsu Laboratories",DARI:"Darius Technology",DATA:"Dataproducts",DCP:"Dry Creek Photo",DCRC:"Digital Contents Resource Center, Chung-Ang University",DELL:"Dell Computer",DIC:"Dainippon Ink and Chemicals",DICO:"Diconix",DIGI:"Digital","DL&C":"Digital Light & Color",DPLG:"Doppelganger",DS:"Dainippon Screen",DSOL:"DOOSOL",DUPN:"DuPont",EPSO:"Epson",ESKO:"Esko-Graphics",ETRI:"Electronics and Telecommunications Research Institute",EVER:"Everex Systems",EXAC:"ExactCODE",Eizo:"Eizo",FALC:"Falco Data Products",FF:"Fuji Photo Film",FFEI:"FujiFilm Electronic Imaging",FNRD:"Fnord Software",FORA:"Fora",FORE:"Forefront Technology",FP:"Fujitsu",FPA:"WayTech Development",FUJI:"Fujitsu",FX:"Fuji Xerox",GCC:"GCC Technologies",GGSL:"Global Graphics Software",GMB:"Gretagmacbeth",GMG:"GMG",GOLD:"GoldStar Technology",GOOG:"Google",GPRT:"Giantprint",GTMB:"Gretagmacbeth",GVC:"WayTech Development",GW2K:"Sony",HCI:"HCI",HDM:"Heidelberger Druckmaschinen",HERM:"Hermes",HITA:"Hitachi America",HP:"Hewlett-Packard",HTC:"Hitachi",HiTi:"HiTi Digital",IBM:"IBM",IDNT:"Scitex",IEC:"Hewlett-Packard",IIYA:"Iiyama North America",IKEG:"Ikegami Electronics",IMAG:"Image Systems",IMI:"Ingram Micro",INTC:"Intel",INTL:"N/A (INTL)",INTR:"Intra Electronics",IOCO:"Iocomm International Technology",IPS:"InfoPrint Solutions Company",IRIS:"Scitex",ISL:"Ichikawa Soft Laboratory",ITNL:"N/A (ITNL)",IVM:"IVM",IWAT:"Iwatsu Electric",Idnt:"Scitex",Inca:"Inca Digital Printers",Iris:"Scitex",JPEG:"Joint Photographic Experts Group",JSFT:"Jetsoft Development",JVC:"JVC Information Products",KART:"Scitex",KFC:"KFC Computek Components",KLH:"KLH Computers",KMHD:"Konica Minolta",KNCA:"Konica",KODA:"Kodak",KYOC:"Kyocera",Kart:"Scitex",LCAG:"Leica",LCCD:"Leeds Colour",LDAK:"Left Dakota",LEAD:"Leading Technology",LEXM:"Lexmark International",LINK:"Link Computer",LINO:"Linotronic",LITE:"Lite-On",Leaf:"Leaf",Lino:"Linotronic",MAGC:"Mag Computronic",MAGI:"MAG Innovision",MANN:"Mannesmann",MICN:"Micron Technology",MICR:"Microtek",MICV:"Microvitec",MINO:"Minolta",MITS:"Mitsubishi Electronics America",MITs:"Mitsuba",MNLT:"Minolta",MODG:"Modgraph",MONI:"Monitronix",MONS:"Monaco Systems",MORS:"Morse Technology",MOTI:"Motive Systems",MSFT:"Microsoft",MUTO:"MUTOH INDUSTRIES",Mits:"Mitsubishi Electric",NANA:"NANAO",NEC:"NEC",NEXP:"NexPress Solutions",NISS:"Nissei Sangyo America",NKON:"Nikon",NONE:"none",OCE:"Oce Technologies",OCEC:"OceColor",OKI:"Oki",OKID:"Okidata",OKIP:"Okidata",OLIV:"Olivetti",OLYM:"Olympus",ONYX:"Onyx Graphics",OPTI:"Optiquest",PACK:"Packard Bell",PANA:"Matsushita Electric Industrial",PANT:"Pantone",PBN:"Packard Bell",PFU:"PFU",PHIL:"Philips Consumer Electronics",PNTX:"HOYA",POne:"Phase One A/S",PREM:"Premier Computer Innovations",PRIN:"Princeton Graphic Systems",PRIP:"Princeton Publishing Labs",QLUX:"Hong Kong",QMS:"QMS",QPCD:"QPcard AB",QUAD:"QuadLaser",QUME:"Qume",RADI:"Radius",RDDx:"Integrated Color Solutions",RDG:"Roland DG",REDM:"REDMS Group",RELI:"Relisys",RGMS:"Rolf Gierling Multitools",RICO:"Ricoh",RNLD:"Edmund Ronald",ROYA:"Royal",RPC:"Ricoh Printing Systems",RTL:"Royal Information Electronics",SAMP:"Sampo",SAMS:"Samsung",SANT:"Jaime Santana Pomares",SCIT:"Scitex",SCRN:"Dainippon Screen",SDP:"Scitex",SEC:"Samsung",SEIK:"Seiko Instruments",SEIk:"Seikosha",SGUY:"ScanGuy.com",SHAR:"Sharp Laboratories",SICC:"International Color Consortium",SONY:"Sony",SPCL:"SpectraCal",STAR:"Star",STC:"Sampo Technology",Scit:"Scitex",Sdp:"Scitex",Sony:"Sony",TALO:"Talon Technology",TAND:"Tandy",TATU:"Tatung",TAXA:"TAXAN America",TDS:"Tokyo Denshi Sekei",TECO:"TECO Information Systems",TEGR:"Tegra",TEKT:"Tektronix",TI:"Texas Instruments",TMKR:"TypeMaker",TOSB:"Toshiba",TOSH:"Toshiba",TOTK:"TOTOKU ELECTRIC",TRIU:"Triumph",TSBT:"Toshiba",TTX:"TTX Computer Products",TVM:"TVM Professional Monitor",TW:"TW Casper",ULSX:"Ulead Systems",UNIS:"Unisys",UTZF:"Utz Fehlau & Sohn",VARI:"Varityper",VIEW:"Viewsonic",VISL:"Visual communication",VIVO:"Vivo Mobile Communication",WANG:"Wang",WLBR:"Wilbur Imaging",WTG2:"Ware To Go",WYSE:"WYSE Technology",XERX:"Xerox",XRIT:"X-Rite",ZRAN:"Zoran",Zebr:"Zebra Technologies",appl:"Apple Computer",bICC:"basICColor",berg:"bergdesign",ceyd:"Integrated Color Solutions",clsp:"MacDermid ColorSpan",ds:"Dainippon Screen",dupn:"DuPont",ffei:"FujiFilm Electronic Imaging",flux:"FluxData",iris:"Scitex",kart:"Scitex",lcms:"Little CMS",lino:"Linotronic",none:"none",ob4d:"Erdt Systems",obic:"Medigraph",quby:"Qubyx Sarl",scit:"Scitex",scrn:"Dainippon Screen",sdp:"Scitex",siwi:"SIWI GRAFIKA",yxym:"YxyMaster"},Ut={scnr:"Scanner",mntr:"Monitor",prtr:"Printer",link:"Device Link",abst:"Abstract",spac:"Color Space Conversion Profile",nmcl:"Named Color",cenc:"ColorEncodingSpace profile",mid:"MultiplexIdentification profile",mlnk:"MultiplexLink profile",mvis:"MultiplexVisualization profile",nkpf:"Nikon Input Device Profile (NON-STANDARD!)"};B(G,"icc",[[4,Lt],[12,Ut],[40,Object.assign({},Lt,Ut)],[48,Lt],[80,Lt],[64,{0:"Perceptual",1:"Relative Colorimetric",2:"Saturation",3:"Absolute Colorimetric"}],["tech",{amd:"Active Matrix Display",crt:"Cathode Ray Tube Display",kpcd:"Photo CD",pmd:"Passive Matrix Display",dcam:"Digital Camera",dcpj:"Digital Cinema Projector",dmpc:"Digital Motion Picture Camera",dsub:"Dye Sublimation Printer",epho:"Electrophotographic Printer",esta:"Electrostatic Printer",flex:"Flexography",fprn:"Film Writer",fscn:"Film Scanner",grav:"Gravure",ijet:"Ink Jet Printer",imgs:"Photo Image Setter",mpfr:"Motion Picture Film Recorder",mpfs:"Motion Picture Film Scanner",offs:"Offset Lithography",pjtv:"Projection Television",rpho:"Photographic Paper Printer",rscn:"Reflective Scanner",silk:"Silkscreen",twax:"Thermal Wax Printer",vidc:"Video Camera",vidm:"Video Monitor"}]]);class Ft extends ge{static canHandle(e,t,i){return 237===e.getUint8(t+1)&&"Photoshop"===e.getString(t+4,9)&&void 0!==this.containsIptc8bim(e,t,i)}static headerLength(e,t,i){let n,s=this.containsIptc8bim(e,t,i);if(void 0!==s)return n=e.getUint8(t+s+7),n%2!=0&&(n+=1),0===n&&(n=4),s+8+n}static containsIptc8bim(e,t,i){for(let n=0;n btnClose.click(), 100); const btnRefresh = gradioApp().getElementById(`${tabname}_extra_refresh`); if (btnRefresh && enDirty) setTimeout(() => btnRefresh.click(), 100); - return args; + return [...args]; } function refeshDetailsEN(args) { diff --git a/javascript/imageViewer.js b/javascript/imageViewer.js index 3d2ac2675..2e1b60c7f 100644 --- a/javascript/imageViewer.js +++ b/javascript/imageViewer.js @@ -24,7 +24,6 @@ function modalImageSwitch(offset) { nextButton.click(); const modalImage = gradioApp().getElementById('modalImage'); const modal = gradioApp().getElementById('lightboxModal'); - modalImage.onload = () => modalPreviewZone.focus(); modalImage.src = nextButton.children[0].src; if (modalImage.style.display === 'none') modal.style.setProperty('background-image', `url(${modalImage.src})`); } @@ -55,6 +54,24 @@ function modalKeyHandler(event) { event.stopPropagation(); } +async function displayExif(el) { + const modalExif = gradioApp().getElementById('modalExif'); + modalExif.innerHTML = ''; + const exif = await window.exifr.parse(el); + if (!exif) return; + // log('exif', exif); + try { + let html = ` + Image ${el.src} Size ${el.naturalWidth}x${el.naturalHeight}
+ Prompt ${exif.parameters || ''}
+ `; + html = html.replace('\n', '
'); + html = html.replace('Negative prompt:', '
Negative'); + html = html.replace('Steps:', '
Params Steps:'); + modalExif.innerHTML = html; + } catch (e) { } +} + function showModal(event) { const source = event.target || event.srcElement; const modalImage = gradioApp().getElementById('modalImage'); @@ -63,6 +80,7 @@ function showModal(event) { modalImage.onload = () => { previewInstance.moveTo(0, 0); modalPreviewZone.focus(); + displayExif(modalImage); }; modalImage.src = source.src; if (modalImage.style.display === 'none') lb.style.setProperty('background-image', `url(${source.src})`); @@ -165,45 +183,50 @@ async function initImageViewer() { const modalZoom = document.createElement('span'); modalZoom.id = 'modal_zoom'; modalZoom.className = 'cursor'; - modalZoom.innerHTML = '🔍'; + modalZoom.innerHTML = '\uf531'; modalZoom.title = 'Toggle zoomed view'; modalZoom.addEventListener('click', modalZoomToggle, true); const modalReset = document.createElement('span'); modalReset.id = 'modal_reset'; modalReset.className = 'cursor'; - modalReset.innerHTML = '♻️'; + modalReset.innerHTML = '\uf532'; modalReset.title = 'Reset zoomed view'; modalReset.addEventListener('click', modalResetInstance, true); const modalTile = document.createElement('span'); modalTile.id = 'modal_tile'; modalTile.className = 'cursor'; - modalTile.innerHTML = '🖽'; + modalTile.innerHTML = '\udb81\udd70'; modalTile.title = 'Preview tiling'; modalTile.addEventListener('click', modalTileToggle, true); const modalSave = document.createElement('span'); modalSave.id = 'modal_save'; modalSave.className = 'cursor'; - modalSave.innerHTML = '💾'; + modalSave.innerHTML = '\udb80\udd93'; modalSave.title = 'Save Image'; modalSave.addEventListener('click', modalSaveImage, true); const modalDownload = document.createElement('span'); modalDownload.id = 'modal_download'; modalDownload.className = 'cursor'; - modalDownload.innerHTML = '📷'; + modalDownload.innerHTML = '\udb85\udc62'; modalDownload.title = 'Download Image'; modalDownload.addEventListener('click', modalDownloadImage, true); const modalClose = document.createElement('span'); modalClose.id = 'modal_close'; modalClose.className = 'cursor'; - modalClose.innerHTML = '🗙'; + modalClose.innerHTML = '\udb80\udd57'; modalClose.title = 'Close'; modalClose.addEventListener('click', (evt) => closeModal(evt, true), true); + // exif + const modalExif = document.createElement('div'); + modalExif.id = 'modalExif'; + modalExif.style = 'position: absolute; bottom: 0px; width: 98%; background-color: rgba(0, 0, 0, 0.5); color: var(--neutral-300); padding: 1em; font-size: small;'; + // handlers modalPreviewZone.addEventListener('mousedown', () => { previewDrag = false; }); modalPreviewZone.addEventListener('touchstart', () => { previewDrag = false; }, { passive: true }); @@ -233,6 +256,7 @@ async function initImageViewer() { modal.appendChild(modalPreviewZone); modal.appendChild(modalNext); modal.append(modalControls); + modal.append(modalExif); modalControls.appendChild(modalZoom); modalControls.appendChild(modalReset); modalControls.appendChild(modalTile); diff --git a/javascript/invoked.css b/javascript/invoked.css index 391db821c..72e78d31a 100644 --- a/javascript/invoked.css +++ b/javascript/invoked.css @@ -1,6 +1,6 @@ /* generic html tags */ :root, .light, .dark { - --font: 'system-ui', 'ui-sans-serif', 'system-ui', "Roboto", sans-serif; + --font: 'system-ui', 'ui-sans-serif', 'system-ui', "Roboto", sans-serif, 'NotoSans'; --font-mono: 'ui-monospace', 'Consolas', monospace; --font-size: 16px; --primary-100: #2b303b; @@ -107,7 +107,7 @@ button.selected {background: var(--button-primary-background-fill);} #interrogate, #deepbooru { margin: 0 0px 10px 0px; max-width: 80px; max-height: 80px; font-weight: normal; font-size: 0.95em; } #quicksettings .gr-button-tool { font-size: 1.6rem; box-shadow: none; margin-top: -2px; height: 2.4em; } #quicksettings button {padding: 0 0.5em 0.1em 0.5em;} -#open_folder_extras, #footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } +#footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } #save-animation { border-radius: var(--radius-sm) !important; margin-bottom: 16px; background-color: #111111; } #script_list { padding: 4px; margin-top: 16px; margin-bottom: 8px; } #settings > div.flex-wrap { width: 15em; } diff --git a/javascript/light-teal.css b/javascript/light-teal.css index cccf07a62..28bf03e6f 100644 --- a/javascript/light-teal.css +++ b/javascript/light-teal.css @@ -108,7 +108,7 @@ svg.feather.feather-image, .feather .feather-image { display: none } #img2img_settings { min-width: calc(2 * var(--left-column)); max-width: calc(2 * var(--left-column)); background-color: #111111; padding-top: 16px; } #interrogate, #deepbooru { margin: 0 0px 10px 0px; max-width: 80px; max-height: 80px; font-weight: normal; font-size: 0.95em; } #quicksettings .gr-button-tool { font-size: 1.6rem; box-shadow: none; margin-left: -20px; margin-top: -2px; height: 2.4em; } -#open_folder_extras, #footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } +#footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } #save-animation { border-radius: var(--radius-sm) !important; margin-bottom: 16px; background-color: #111111; } #script_list { padding: 4px; margin-top: 16px; margin-bottom: 8px; } #settings > div.flex-wrap { width: 15em; } diff --git a/javascript/logMonitor.js b/javascript/logMonitor.js index af744aee1..d55794a58 100644 --- a/javascript/logMonitor.js +++ b/javascript/logMonitor.js @@ -1,5 +1,20 @@ let logMonitorEl = null; let logMonitorStatus = true; +let logWarnings = 0; +let logErrors = 0; + +function dateToStr(ts) { + const dt = new Date(1000 * ts); + const year = dt.getFullYear(); + const mo = String(dt.getMonth() + 1).padStart(2, '0'); + const day = String(dt.getDate()).padStart(2, '0'); + const hour = String(dt.getHours()).padStart(2, '0'); + const min = String(dt.getMinutes()).padStart(2, '0'); + const sec = String(dt.getSeconds()).padStart(2, '0'); + const ms = String(dt.getMilliseconds()).padStart(3, '0'); + const s = `${year}-${mo}-${day} ${hour}:${min}:${sec}.${ms}`; + return s; +} async function logMonitor() { if (logMonitorStatus) setTimeout(logMonitor, opts.logmonitor_refresh_period); @@ -24,14 +39,23 @@ async function logMonitor() { try { const l = JSON.parse(line); const row = document.createElement('tr'); - row.style = 'padding: 10px; margin: 0;'; - row.innerHTML = `${new Date(1000 * l.created).toISOString()}${l.level}${l.facility}${l.module}${l.msg}`; + // row.style = 'padding: 10px; margin: 0;'; + const level = `${l.level}`; + if (l.level === 'WARNING') logWarnings++; + if (l.level === 'ERROR') logErrors++; + const module = `${l.module}`; + row.innerHTML = `${dateToStr(l.created)}${level}${l.facility}${module}${l.msg}`; logMonitorEl.appendChild(row); - } catch {} + } catch (e) { + console.log('logMonitor', e); + console.error('logMonitor line', line); + } } while (logMonitorEl.childElementCount > 100) logMonitorEl.removeChild(logMonitorEl.firstChild); if (at_bottom) logMonitorEl.scrollTop = logMonitorEl.scrollHeight; else if (lines?.length > 0) logMonitorEl.parentElement.style = 'border-bottom: 2px solid var(--highlight-color);'; + document.getElementById('logWarnings').innerText = logWarnings; + document.getElementById('logErrors').innerText = logErrors; } } @@ -47,14 +71,16 @@ async function initLogMonitor() { - + - - + + + + - +
TimeTime LevelFacilityModuleModule MessageWarnings 0Errors 0
`; diff --git a/javascript/midnight-barbie.css b/javascript/midnight-barbie.css index 1e53b934e..ea78e5cab 100644 --- a/javascript/midnight-barbie.css +++ b/javascript/midnight-barbie.css @@ -1,6 +1,6 @@ /* generic html tags */ :root, .light, .dark { - --font: "Source Sans Pro", 'ui-sans-serif', 'system-ui', "Roboto", sans-serif; + --font: "Source Sans Pro", 'ui-sans-serif', 'system-ui', "Roboto", sans-serif, 'NotoSans'; --font-mono: 'IBM Plex Mono', 'ui-monospace', 'Consolas', monospace; --font-size: 14px; --highlight-color: #ff00f2; @@ -99,7 +99,7 @@ svg.feather.feather-image, .feather .feather-image { display: none } #img2img_settings { min-width: calc(2 * var(--left-column)); max-width: calc(2 * var(--left-column)); background-color: #111111; padding-top: 16px; } #interrogate, #deepbooru { margin: 0 0px 10px 0px; max-width: 80px; max-height: 80px; font-weight: normal; font-size: 0.95em; } #quicksettings .gr-button-tool { font-size: 1.6rem; box-shadow: none; margin-left: -20px; margin-top: -2px; height: 2.4em; } -#open_folder_extras, #footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } +#footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } #save-animation { border-radius: var(--radius-sm) !important; margin-bottom: 16px; background-color: #111111; } #script_list { padding: 4px; margin-top: 16px; margin-bottom: 8px; } #settings > div.flex-wrap { width: 15em; } diff --git a/javascript/orchid-dreams.css b/javascript/orchid-dreams.css index 4a80eeed5..c290240c1 100644 --- a/javascript/orchid-dreams.css +++ b/javascript/orchid-dreams.css @@ -1,6 +1,6 @@ /* generic html tags */ :root, .light, .dark { - --font: 'system-ui', 'ui-sans-serif', 'system-ui', "Roboto", sans-serif; + --font: 'system-ui', 'ui-sans-serif', 'system-ui', "Roboto", sans-serif, 'NotoSans'; --font-mono: 'ui-monospace', 'Consolas', monospace; --font-size: 16px; --primary-100: #2a2a34; /* bg color*/ @@ -107,7 +107,7 @@ button.selected {background: var(--button-primary-background-fill);} #interrogate, #deepbooru { margin: 0 0px 10px 0px; max-width: 80px; max-height: 80px; font-weight: normal; font-size: 0.95em; } #quicksettings .gr-button-tool { font-size: 1.6rem; box-shadow: none; margin-top: -2px; height: 2.4em; } #quicksettings button {padding: 0 0.5em 0.1em 0.5em;} -#open_folder_extras, #footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } +#footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } #save-animation { border-radius: var(--radius-sm) !important; margin-bottom: 16px; background-color: #111111; } #script_list { padding: 4px; margin-top: 16px; margin-bottom: 8px; } #settings > div.flex-wrap { width: 15em; } diff --git a/javascript/sdnext.css b/javascript/sdnext.css index f752c707f..8d9e57ef9 100644 --- a/javascript/sdnext.css +++ b/javascript/sdnext.css @@ -1,11 +1,18 @@ @font-face { font-family: 'NotoSans'; font-display: swap; font-style: normal; font-weight: 100; src: local('NotoSans'), url('notosans-nerdfont-regular.ttf') } -:root { --left-column: 520px; } +:root { + --left-column: 520px; + --color-trace: #666666; + --color-debug: #7F7F7F; + --color-info: #D4D4D4; + --color-warning: #FF9900; + --color-error: #BE0000 +} a { font-weight: bold; cursor: pointer; } h2 { margin-top: 1em !important; font-size: var(--text-xxl) !important; } footer { display: none; margin-top: 0 !important;} table { overflow-x: auto !important; overflow-y: auto !important; } -td { border-bottom: none !important; padding: 0.1em 0.5em !important; } -tr { border-bottom: none !important; padding: 0.1em 0.5em !important; } +td { border-bottom: none !important; padding: 0 0.5em !important; } +tr { border-bottom: none !important; padding: 0 0.5em !important; } textarea { overflow-y: auto !important; } span { font-size: var(--text-md) !important; } button { font-size: var(--text-lg) !important; } @@ -21,6 +28,9 @@ input[type='color'] { width: 64px; height: 32px; } .hidden { display: none; } .tabitem { padding: 0 !important; } +/* color elements */ + + .gradio-dropdown, .block.gradio-slider, .block.gradio-checkbox, .block.gradio-textbox, .block.gradio-radio, .block.gradio-checkboxgroup, .block.gradio-number, .block.gradio-colorpicker { border-width: 0 !important; box-shadow: none !important;} .gradio-accordion { padding-top: var(--spacing-md) !important; padding-right: 0 !important; padding-bottom: 0 !important; color: var(--body-text-color); } .gradio-accordion .label-wrap .icon { color: var(--button-primary-border-color); } @@ -149,10 +159,10 @@ div#extras_scale_to_tab div.form{ flex-direction: row; } /* fullpage image viewer */ #lightboxModal{ display: none; position: fixed; z-index: 1001; left: 0; top: 0; width: 100%; height: 100%; overflow: auto; background-color: rgba(20, 20, 20, 0.75); backdrop-filter: blur(6px); - user-select: none; -webkit-user-select: none; flex-direction: row; } + user-select: none; -webkit-user-select: none; flex-direction: row; font-family: 'NotoSans';} .modalControls { display: flex; justify-content: space-evenly; background-color: transparent; position: absolute; width: 99%; z-index: 1; } .modalControls:hover { background-color: #50505050; } -.modalControls span { color: white; font-size: 2em; font-weight: bold; cursor: pointer; filter: grayscale(100%); } +.modalControls span { color: white; font-size: 2em !important; font-weight: bold; cursor: pointer; filter: grayscale(100%); } .modalControls span:hover, .modalControls span:focus { color: var(--highlight-color); filter: none; } .lightboxModalPreviewZone { display: flex; width: 100%; height: 100%; } .lightboxModalPreviewZone:focus-visible { outline: none; } @@ -218,14 +228,15 @@ table.settings-value-table td { padding: 0.4em; border: 1px solid #ccc; max-widt .extra-network-cards .card-list { display: flex; margin: 0.3em; padding: 0.3em; background: var(--input-background-fill); cursor: pointer; border-radius: var(--button-large-radius); } .extra-network-cards .card-list .tag { color: var(--primary-500); margin-left: 0.8em; } .extra-details-close { position: fixed; top: 0.2em; right: 0.2em; z-index: 99; background: var(--button-secondary-background-fill) !important; } -#txt2img_description, #img2img_description, #control_description { max-height: 63px; overflow-y: auto !important; } -#txt2img_description>label>textarea, #img2img_description>label>textarea, #control_description>label>textarea { font-size: var(--text-xs); height: 6em; } - -#txt2img_extra_details>div, #img2img_extra_details>div { overflow-y: auto; min-height: 40vh; max-height: 80vh; align-self: flex-start; } -#txt2img_extra_details, #img2img_extra_details { position: fixed; bottom: 50%; left: 50%; transform: translate(-50%, 50%); padding: 0.8em; border: var(--block-border-width) solid var(--highlight-color) !important; +.extra-details-tabs textarea, .extra-details-tabs .gradio-json { overflow-y: scroll !important; scrollbar-width: unset !important; max-height: 15vh; } +.extra-details-text .form { overflow-x: hidden; overflow-y: scroll; display: block; } +.extra-description { max-height: 63px; overflow-y: auto !important; } +.extra-description > label > textarea { font-size: var(--text-xs); height: 6em; } +.extra-details { position: fixed; bottom: 50%; left: 50%; transform: translate(-50%, 50%); padding: 0.8em; border: var(--block-border-width) solid var(--highlight-color) !important; z-index: 100; box-shadow: var(--button-shadow); } -#txt2img_extra_details td:first-child, #img2img_extra_details td:first-child { font-weight: bold; vertical-align: top; } -#txt2img_extra_details .gradio-image, #img2img_extra_details .gradio-image { max-height: 70vh; } +.extra-details > div { overflow-y: auto; min-height: 40vh; max-height: 80vh; align-self: flex-start; } +.extra-details td:first-child { font-weight: bold; vertical-align: top; } +.extra-details .gradio-image { max-height: 50vh; } /* specific elements */ @@ -240,6 +251,8 @@ table.settings-value-table td { padding: 0.4em; border: 1px solid #ccc; max-widt #models_tab { flex-flow: row-reverse; } #swap_axes>button { min-width: 100px; font-size: var(--text-md); } #ui_defaults_review { margin: 1em; } +.ar-dropdown { font-size: 0.9em; min-width: 5.5em !important; max-width: 5.5em !important; margin: 0 !important; padding: 0 !important; align-content: center; } +.ar-dropdown div { margin: 0; background: var(--background-color)} /* extras */ .extras { gap: 0.2em 1em !important } @@ -248,7 +261,7 @@ table.settings-value-table td { padding: 0.4em; border: 1px solid #ccc; max-widt #pnginfo_html_info .gradio-html > div { margin: 0.5em; } /* log monitor */ -.log-monitor { display: none; justify-content: unset !important; overflow: hidden; padding: 0; margin-top: auto; font-family: monospace; font-size: var(--text-xs); } +.log-monitor { display: none; justify-content: unset !important; overflow: hidden; padding: 0; margin-top: auto; font-family: monospace; font-size: var(--text-xxs); } .log-monitor td, .log-monitor th { padding-left: 1em; } /* changelog */ @@ -270,8 +283,9 @@ table.settings-value-table td { padding: 0.4em; border: 1px solid #ccc; max-widt .control-tabs > .tab-nav { margin-bottom: 0; margin-top: 0; } .control-unit { max-width: 1200px; padding: 0 !important; margin-top: -10px !important; } .control-unit > .label-wrap { margin-bottom: 0 !important; } +.control-settings { border-width: var(--block-border-width) !important; border-top: var(--button-primary-border-color) !important; border-style: solid !important; margin-top: 1em !important; } .processor-settings { padding: 0 !important; max-width: 300px; } -.processor-group>div { flex-flow: wrap;gap: 1em; } +.processor-group > div { flex-flow: wrap;gap: 1em; } /* main info */ .main-info { font-weight: var(--section-header-text-weight); color: var(--body-text-color-subdued); padding: 1em !important; margin-top: 2em !important; line-height: var(--line-lg) !important; } diff --git a/javascript/timeless-beige.css b/javascript/timeless-beige.css index e453851c8..fbd9ec1a7 100644 --- a/javascript/timeless-beige.css +++ b/javascript/timeless-beige.css @@ -1,6 +1,6 @@ /* generic html tags */ :root, .light, .dark { - --font: 'system-ui', 'ui-sans-serif', 'system-ui', "Roboto", sans-serif; + --font: 'system-ui', 'ui-sans-serif', 'system-ui', "Roboto", sans-serif, 'NotoSans'; --font-mono: 'ui-monospace', 'Consolas', monospace; --font-size: 16px; --primary-100: #212226; /* bg color*/ @@ -107,7 +107,7 @@ button.selected {background: var(--button-primary-background-fill);} #interrogate, #deepbooru { margin: 0 0px 10px 0px; max-width: 80px; max-height: 80px; font-weight: normal; font-size: 0.95em; } #quicksettings .gr-button-tool { font-size: 1.6rem; box-shadow: none; margin-top: -2px; height: 2.4em; } #quicksettings button {padding: 0 0.5em 0.1em 0.5em;} -#open_folder_extras, #footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } +#footer, #style_pos_col, #style_neg_col, #roll_col, #extras_upscaler_2, #extras_upscaler_2_visibility, #txt2img_seed_resize_from_w, #txt2img_seed_resize_from_h { display: none; } #save-animation { border-radius: var(--radius-sm) !important; margin-bottom: 16px; background-color: #111111; } #script_list { padding: 4px; margin-top: 16px; margin-bottom: 8px; } #settings > div.flex-wrap { width: 15em; } diff --git a/javascript/ui.js b/javascript/ui.js index 4d61d7566..782487f41 100644 --- a/javascript/ui.js +++ b/javascript/ui.js @@ -67,7 +67,13 @@ function extract_image_from_gallery(gallery) { async function setTheme(val, old) { if (!old || val === old) return; + old = old.replace('modern/', ''); + val = val.replace('modern/', ''); const links = Array.from(document.getElementsByTagName('link')).filter((l) => l.href.includes(old)); + if (links.length === 0) { + log('setTheme: current theme not matched', old); + return; + } for (const link of links) { const href = link.href.replace(old, val); const res = await fetch(href); diff --git a/models/Reference/TempestV0.1-Artistic.jpg b/models/Reference/TempestV0.1-Artistic.jpg new file mode 100644 index 000000000..ab045a5ca Binary files /dev/null and b/models/Reference/TempestV0.1-Artistic.jpg differ diff --git a/models/Reference/dreamshaperXL_turboDpmppSDE.jpg b/models/Reference/dreamshaperXL_v21TurboDPMSDE.jpg similarity index 100% rename from models/Reference/dreamshaperXL_turboDpmppSDE.jpg rename to models/Reference/dreamshaperXL_v21TurboDPMSDE.jpg diff --git a/models/Reference/juggernautXL_v7Rundiffusion.jpg b/models/Reference/juggernautXL_v9Rdphoto2Lightning.jpg similarity index 100% rename from models/Reference/juggernautXL_v7Rundiffusion.jpg rename to models/Reference/juggernautXL_v9Rdphoto2Lightning.jpg diff --git a/models/Reference/juggernautXL_v9Rundiffusionphoto2.jpg b/models/Reference/juggernautXL_v9Rundiffusionphoto2.jpg new file mode 100644 index 000000000..cbce7cb32 Binary files /dev/null and b/models/Reference/juggernautXL_v9Rundiffusionphoto2.jpg differ diff --git a/models/Reference/stabilityai--stable-diffusion-xl-base-1.0.jpg b/models/Reference/sd_xl_base_1.0.jpg similarity index 100% rename from models/Reference/stabilityai--stable-diffusion-xl-base-1.0.jpg rename to models/Reference/sd_xl_base_1.0.jpg diff --git a/models/Reference/stabilityai--sdxl-turbo.jpg b/models/Reference/sdxl_turbo.jpg similarity index 100% rename from models/Reference/stabilityai--sdxl-turbo.jpg rename to models/Reference/sdxl_turbo.jpg diff --git a/models/Reference/stabilityai--sd-turbo.jpg b/models/Reference/stabilityai--stable-diffusion-2-1-base.jpg similarity index 100% rename from models/Reference/stabilityai--sd-turbo.jpg rename to models/Reference/stabilityai--stable-diffusion-2-1-base.jpg diff --git a/models/Reference/stabilityai--stable-diffusion-2.1-base.jpg b/models/Reference/stabilityai--stable-diffusion-2-1.jpg similarity index 100% rename from models/Reference/stabilityai--stable-diffusion-2.1-base.jpg rename to models/Reference/stabilityai--stable-diffusion-2-1.jpg diff --git a/models/Reference/runwayml--stable-diffusion-v1-5.jpg b/models/Reference/v1-5-pruned-fp16-emaonly.jpg similarity index 100% rename from models/Reference/runwayml--stable-diffusion-v1-5.jpg rename to models/Reference/v1-5-pruned-fp16-emaonly.jpg diff --git a/modules/api/api.py b/modules/api/api.py index 4224f78d3..398719aa3 100644 --- a/modules/api/api.py +++ b/modules/api/api.py @@ -5,7 +5,7 @@ from fastapi.security import HTTPBasic, HTTPBasicCredentials from fastapi.exceptions import HTTPException from modules import errors, shared, postprocessing -from modules.api import models, endpoints, script, helpers, server, nvml, generate +from modules.api import models, endpoints, script, helpers, server, nvml, generate, process, control errors.install() @@ -28,6 +28,8 @@ def __init__(self, app: FastAPI, queue_lock: Lock): self.app = app self.queue_lock = queue_lock self.generate = generate.APIGenerate(queue_lock) + self.process = process.APIProcess(queue_lock) + self.control = control.APIControl(queue_lock) # server api self.add_api_route("/sdapi/v1/motd", server.get_motd, methods=["GET"], response_model=str) @@ -43,20 +45,24 @@ def __init__(self, app: FastAPI, queue_lock: Lock): self.add_api_route("/sdapi/v1/options", server.get_config, methods=["GET"], response_model=models.OptionsModel) self.add_api_route("/sdapi/v1/options", server.set_config, methods=["POST"]) self.add_api_route("/sdapi/v1/cmd-flags", server.get_cmd_flags, methods=["GET"], response_model=models.FlagsModel) - app.add_api_route("/sdapi/v1/nvml", nvml.get_nvml, methods=["GET"], response_model=List[models.ResNVML]) - + self.add_api_route("/sdapi/v1/nvml", nvml.get_nvml, methods=["GET"], response_model=List[models.ResNVML]) # core api using locking self.add_api_route("/sdapi/v1/txt2img", self.generate.post_text2img, methods=["POST"], response_model=models.ResTxt2Img) self.add_api_route("/sdapi/v1/img2img", self.generate.post_img2img, methods=["POST"], response_model=models.ResImg2Img) + self.add_api_route("/sdapi/v1/control", self.control.post_control, methods=["POST"], response_model=control.ResControl) self.add_api_route("/sdapi/v1/extra-single-image", self.extras_single_image_api, methods=["POST"], response_model=models.ResProcessImage) self.add_api_route("/sdapi/v1/extra-batch-images", self.extras_batch_images_api, methods=["POST"], response_model=models.ResProcessBatch) + self.add_api_route("/sdapi/v1/preprocess", self.process.post_preprocess, methods=["POST"]) + self.add_api_route("/sdapi/v1/mask", self.process.post_mask, methods=["POST"]) # api dealing with optional scripts self.add_api_route("/sdapi/v1/scripts", script.get_scripts_list, methods=["GET"], response_model=models.ResScripts) self.add_api_route("/sdapi/v1/script-info", script.get_script_info, methods=["GET"], response_model=List[models.ItemScript]) # enumerator api + self.add_api_route("/sdapi/v1/preprocessors", self.process.get_preprocess, methods=["GET"], response_model=List[process.ItemPreprocess]) + self.add_api_route("/sdapi/v1/masking", self.process.get_mask, methods=["GET"], response_model=process.ItemMask) self.add_api_route("/sdapi/v1/interrogate", endpoints.get_interrogate, methods=["GET"], response_model=List[str]) self.add_api_route("/sdapi/v1/samplers", endpoints.get_samplers, methods=["GET"], response_model=List[models.ItemSampler]) self.add_api_route("/sdapi/v1/upscalers", endpoints.get_upscalers, methods=["GET"], response_model=List[models.ItemUpscaler]) diff --git a/modules/api/control.py b/modules/api/control.py new file mode 100644 index 000000000..519f68378 --- /dev/null +++ b/modules/api/control.py @@ -0,0 +1,114 @@ +from typing import Optional, List +from threading import Lock +from pydantic import BaseModel, Field # pylint: disable=no-name-in-module +from modules import errors, shared, scripts, ui +from modules.api import script, helpers +from modules.processing import StableDiffusionProcessingControl +from modules.control import run as run_control + +# TODO control api +# should use control.run, not process_images directly + +errors.install() + + +class ReqControl(BaseModel): + pass + +class ResControl(BaseModel): + images: List[str] = Field(default=None, title="Image", description="The generated images in base64 format.") + params: dict = Field(default={}, title="Settings", description="Process settings") + info: str = Field(default="", title="Info", description="Process info") + + +class APIControl(): + def __init__(self, queue_lock: Lock): + self.queue_lock = queue_lock + self.default_script_arg = [] + + def sanitize_args(self, args: dict): + args = vars(args) + args.pop('include_init_images', None) # this is meant to be done by "exclude": True in model + args.pop('script_name', None) + args.pop('script_args', None) # will refeed them to the pipeline directly after initializing them + args.pop('alwayson_scripts', None) + args.pop('face', None) + args.pop('face_id', None) + args.pop('ip_adapter', None) + args.pop('save_images', None) + return args + + def sanitize_b64(self, request): + def sanitize_str(args: list): + for idx in range(0, len(args)): + if isinstance(args[idx], str) and len(args[idx]) >= 1000: + args[idx] = f"" + + if hasattr(request, "alwayson_scripts") and request.alwayson_scripts: + for script_name in request.alwayson_scripts.keys(): + script_obj = request.alwayson_scripts[script_name] + if script_obj and "args" in script_obj and script_obj["args"]: + sanitize_str(script_obj["args"]) + if hasattr(request, "script_args") and request.script_args: + sanitize_str(request.script_args) + + def prepare_face_module(self, request): + if hasattr(request, "face") and request.face and not request.script_name and (not request.alwayson_scripts or "face" not in request.alwayson_scripts.keys()): + request.script_name = "face" + request.script_args = [ + request.face.mode, + request.face.source_images, + request.face.ip_model, + request.face.ip_override_sampler, + request.face.ip_cache_model, + request.face.ip_strength, + request.face.ip_structure, + request.face.id_strength, + request.face.id_conditioning, + request.face.id_cache, + request.face.pm_trigger, + request.face.pm_strength, + request.face.pm_start, + request.face.fs_cache + ] + del request.face + + def post_control(self, req: ReqControl): + self.prepare_face_module(req) + + # prepare script + script_runner = scripts.scripts_control + if not script_runner.scripts: + script_runner.initialize_scripts(False) + ui.create_ui(None) + if not self.default_script_arg: + self.default_script_arg = script.init_default_script_args(script_runner) + + # prepare args + args = req.copy(update={ # Override __init__ params + "sampler_name": helpers.validate_sampler_name(req.sampler_name or req.sampler_index), + "sampler_index": None, + "do_not_save_samples": not req.save_images, + "do_not_save_grid": not req.save_images, + "init_images": [helpers.decode_base64_to_image(x) for x in req.init_images] if req.init_images else None, + "mask": helpers.decode_base64_to_image(req.mask) if req.mask else None, + }) + args = self.sanitize_args(args) + send_images = args.pop('send_images', True) + + # run + with self.queue_lock: + shared.state.begin('api-control', api=True) + + # selectable_scripts, selectable_script_idx = script.get_selectable_script(req.script_name, script_runner) + # script_args = script.init_script_args(p, req, self.default_script_arg, selectable_scripts, selectable_script_idx, script_runner) + # output_images, _processed_images, output_info = run_control(**args, **script_args) + output_images = None + output_info = None + + shared.state.end(api=False) + + # return + b64images = list(map(helpers.encode_pil_to_base64, output_images)) if send_images else [] + self.sanitize_b64(req) + return ResControl(images=b64images, params=vars(req), info=output_info) diff --git a/modules/api/endpoints.py b/modules/api/endpoints.py index 1337ccc8d..9f7efe078 100644 --- a/modules/api/endpoints.py +++ b/modules/api/endpoints.py @@ -68,8 +68,8 @@ def get_extra_networks(page: Optional[str] = None, name: Optional[str] = None, f return res def get_interrogate(): - from modules.ui_interrogate import get_models - return ['clip', 'deepdanbooru'] + get_models() + from modules.interrogate import get_clip_models + return ['clip', 'deepdanbooru'] + get_clip_models() def post_interrogate(req: models.ReqInterrogate): if req.image is None or len(req.image) < 64: @@ -87,8 +87,8 @@ def post_interrogate(req: models.ReqInterrogate): caption = deepbooru.model.tag(image) return models.ResInterrogate(caption=caption) else: - from modules.ui_interrogate import interrogate_image, analyze_image, get_models - if req.model not in get_models(): + from modules.interrogate import interrogate_image, analyze_image, get_clip_models + if req.model not in get_clip_models(): raise HTTPException(status_code=404, detail="Model not found") try: caption = interrogate_image(image, model=req.model, mode=req.mode) diff --git a/modules/api/generate.py b/modules/api/generate.py index 4f674d716..dda3fe98c 100644 --- a/modules/api/generate.py +++ b/modules/api/generate.py @@ -1,5 +1,5 @@ from threading import Lock -from fastapi.exceptions import HTTPException +from fastapi.responses import JSONResponse from modules import errors, shared, scripts, ui from modules.api import models, script, helpers from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images @@ -100,7 +100,7 @@ def post_img2img(self, img2imgreq: models.ReqImg2Img): self.prepare_face_module(img2imgreq) init_images = img2imgreq.init_images if init_images is None: - raise HTTPException(status_code=404, detail="Init image not found") + return JSONResponse(status_code=400, content={"error": "Init image is none"}) mask = img2imgreq.mask if mask: mask = helpers.decode_base64_to_image(mask) diff --git a/modules/api/models.py b/modules/api/models.py index f234b0d7e..b0e56d8a2 100644 --- a/modules/api/models.py +++ b/modules/api/models.py @@ -208,7 +208,7 @@ class ItemExtension(BaseModel): StableDiffusionTxt2ImgProcessingAPI = ReqTxt2Img class ResTxt2Img(BaseModel): - images: List[str] = Field(default=None, title="Image", description="The generated image in base64 format.") + images: List[str] = Field(default=None, title="Image", description="The generated images in base64 format.") parameters: dict info: str @@ -233,7 +233,7 @@ class ResTxt2Img(BaseModel): StableDiffusionImg2ImgProcessingAPI = ReqImg2Img class ResImg2Img(BaseModel): - images: List[str] = Field(default=None, title="Image", description="The generated image in base64 format.") + images: List[str] = Field(default=None, title="Image", description="The generated images in base64 format.") parameters: dict info: str @@ -272,7 +272,7 @@ class ResProcessBatch(ResProcess): images: List[str] = Field(title="Images", description="The generated images in base64 format.") class ReqImageInfo(BaseModel): - image: str = Field(title="Image", description="The base64 encoded PNG image") + image: str = Field(title="Image", description="The base64 encoded image") class ResImageInfo(BaseModel): info: str = Field(title="Image info", description="A string with the parameters used to generate the image") diff --git a/modules/api/process.py b/modules/api/process.py new file mode 100644 index 000000000..343b6efb4 --- /dev/null +++ b/modules/api/process.py @@ -0,0 +1,100 @@ +from typing import Optional, List +from threading import Lock +from pydantic import BaseModel, Field # pylint: disable=no-name-in-module +from fastapi.responses import JSONResponse +from modules.api.helpers import decode_base64_to_image, encode_pil_to_base64 +from modules import errors, shared + + +processor = None # cached instance of processor +errors.install() + + +class ReqPreprocess(BaseModel): + image: str = Field(title="Image", description="The base64 encoded image") + model: str = Field(title="Model", description="The model to use for preprocessing") + params: Optional[dict] = Field(default={}, title="Settings", description="Preprocessor settings") + +class ResPreprocess(BaseModel): + model: str = Field(default='', title="Model", description="The processor model used") + image: str = Field(default='', title="Image", description="The processed image in base64 format") + +class ReqMask(BaseModel): + image: str = Field(title="Image", description="The base64 encoded image") + type: str = Field(title="Mask type", description="Type of masking image to return") + mask: Optional[str] = Field(title="Mask", description="If optional maks image is not provided auto-masking will be performed") + model: Optional[str] = Field(title="Model", description="The model to use for preprocessing") + params: Optional[dict] = Field(default={}, title="Settings", description="Preprocessor settings") + +class ResMask(BaseModel): + mask: str = Field(default='', title="Image", description="The processed image in base64 format") + +class ItemPreprocess(BaseModel): + name: str = Field(title="Name") + params: dict = Field(title="Params") + +class ItemMask(BaseModel): + models: List[str] = Field(title="Models") + colormaps: List[str] = Field(title="Color maps") + params: dict = Field(title="Params") + types: List[str] = Field(title="Types") + + +class APIProcess(): + def __init__(self, queue_lock: Lock): + self.queue_lock = queue_lock + + def get_preprocess(self): + from modules.control import processors + items = [] + for k, v in processors.config.items(): + items.append(ItemPreprocess(name=k, params=v.get('params', {}))) + return items + + def post_preprocess(self, req: ReqPreprocess): + global processor # pylint: disable=global-statement + from modules.control import processors + models = list(processors.config) + if req.model not in models: + return JSONResponse(status_code=400, content={"error": f"Processor model not found: id={req.model}"}) + image = decode_base64_to_image(req.image) + if processor is None or processor.processor_id != req.model: + with self.queue_lock: + processor = processors.Processor(req.model) + for k, v in req.params.items(): + if k not in processors.config[processor.processor_id]['params']: + return JSONResponse(status_code=400, content={"error": f"Processor invalid parameter: id={req.model} {k}={v}"}) + shared.state.begin('api-preprocess', api=True) + processed = processor(image, local_config=req.params) + image = encode_pil_to_base64(processed) + shared.state.end(api=False) + return ResPreprocess(model=processor.processor_id, image=image) + + def get_mask(self): + from modules import masking + return ItemMask(models=list(masking.MODELS), colormaps=masking.COLORMAP, params=vars(masking.opts), types=masking.TYPES) + + def post_mask(self, req: ReqMask): + from modules import masking + if req.model: + if req.model not in masking.MODELS: + return JSONResponse(status_code=400, content={"error": f"Mask model not found: id={req.model}"}) + else: + masking.init_model(req.model) + if req.type not in masking.TYPES: + return JSONResponse(status_code=400, content={"error": f"Mask type not found: id={req.type}"}) + image = decode_base64_to_image(req.image) + mask = decode_base64_to_image(req.mask) if req.mask else None + for k, v in req.params.items(): + if not hasattr(masking.opts, k): + return JSONResponse(status_code=400, content={"error": f"Mask invalid parameter: {k}={v}"}) + else: + setattr(masking.opts, k, v) + shared.state.begin('api-mask', api=True) + with self.queue_lock: + processed = masking.run_mask(input_image=image, input_mask=mask, return_type=req.type) + shared.state.end(api=False) + if processed is None: + return JSONResponse(status_code=400, content={"error": "Mask is none"}) + image = encode_pil_to_base64(processed) + return ResMask(mask=image) diff --git a/modules/control/processors.py b/modules/control/processors.py index c5ff7003a..f7bafb81d 100644 --- a/modules/control/processors.py +++ b/modules/control/processors.py @@ -206,9 +206,9 @@ def load(self, processor_id: str = None) -> str: display(e, 'Control Processor load') return f'Processor load filed: {processor_id}' - def __call__(self, image_input: Image, mode: str = 'RGB', resize_mode: int = 0, resize_name: str = 'None', scale_tab: int = 1, scale_by: float = 1.0): + def __call__(self, image_input: Image, mode: str = 'RGB', resize_mode: int = 0, resize_name: str = 'None', scale_tab: int = 1, scale_by: float = 1.0, local_config: dict = {}): if self.processor_id is None or self.processor_id == 'None': - return image_input + return self.override if self.override is not None else image_input if self.override is not None: debug(f'Control Processor: id="{self.processor_id}" override={self.override}') image_input = self.override @@ -232,6 +232,8 @@ def __call__(self, image_input: Image, mode: str = 'RGB', resize_mode: int = 0, try: t0 = time.time() kwargs = config.get(self.processor_id, {}).get('params', None) + if kwargs: + kwargs.update(local_config) if self.resize: image_resized = image_input.resize((512, 512), Image.Resampling.LANCZOS) else: diff --git a/modules/control/run.py b/modules/control/run.py index 94e38a8fa..f0e54c5b2 100644 --- a/modules/control/run.py +++ b/modules/control/run.py @@ -27,14 +27,20 @@ def restore_pipeline(): global pipe, instance # pylint: disable=global-statement if instance is not None and hasattr(instance, 'restore'): instance.restore() - if original_pipeline is not None: + if original_pipeline is not None and (original_pipeline.__class__.__name__ != shared.sd_model.__class__.__name__): + shared.log.debug(f'Control restored pipeline: class={shared.sd_model.__class__.__name__} to={original_pipeline.__class__.__name__}') shared.sd_model = original_pipeline - shared.log.debug(f'Control restored pipeline: class={shared.sd_model.__class__.__name__}') pipe = None instance = None devices.torch_gc() +def terminate(msg): + restore_pipeline() + shared.log.error(f'Control terminated: {msg}') + return msg + + def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_generator: bool, input_type: int, prompt, negative, styles, steps, sampler_index, seed, subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, @@ -44,6 +50,8 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ resize_mode_after, resize_name_after, width_after, height_after, scale_by_after, selected_scale_tab_after, resize_mode_mask, resize_name_mask, width_mask, height_mask, scale_by_mask, selected_scale_tab_mask, denoising_strength, batch_count, batch_size, + enable_hr, hr_sampler_index, hr_denoising_strength, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, + refiner_start, refiner_prompt, refiner_negative, video_skip_frames, video_type, video_duration, video_loop, video_pad, video_interpolate, *input_script_args # pylint: disable=unused-argument ): @@ -66,13 +74,15 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ negative_prompt = negative, styles = styles, steps = steps, + n_iter = batch_count, + batch_size = batch_size, sampler_name = processing.get_sampler_name(sampler_index), - hr_sampler_name = processing.get_sampler_name(sampler_index), seed = seed, subseed = subseed, subseed_strength = subseed_strength, seed_resize_from_h = seed_resize_from_h, seed_resize_from_w = seed_resize_from_w, + # advanced cfg_scale = cfg_scale, clip_skip = clip_skip, image_cfg_scale = image_cfg_scale, @@ -81,29 +91,46 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ full_quality = full_quality, restore_faces = restore_faces, tiling = tiling, + # resize resize_mode = resize_mode_before if resize_name_before != 'None' else 0, resize_name = resize_name_before, scale_by = scale_by_before, selected_scale_tab = selected_scale_tab_before, denoising_strength = denoising_strength, - n_iter = batch_count, - batch_size = batch_size, + # inpaint inpaint_full_res = masking.opts.mask_only, - # inpaint_full_res_padding = masking.opts.mask_padding, inpainting_mask_invert = 1 if masking.opts.invert else 0, inpainting_fill = 1, + # hdr hdr_mode=hdr_mode, hdr_brightness=hdr_brightness, hdr_color=hdr_color, hdr_sharpen=hdr_sharpen, hdr_clamp=hdr_clamp, hdr_boundary=hdr_boundary, hdr_threshold=hdr_threshold, hdr_maximize=hdr_maximize, hdr_max_center=hdr_max_center, hdr_max_boundry=hdr_max_boundry, hdr_color_picker=hdr_color_picker, hdr_tint_ratio=hdr_tint_ratio, + # path outpath_samples=shared.opts.outdir_samples or shared.opts.outdir_control_samples, outpath_grids=shared.opts.outdir_grids or shared.opts.outdir_control_grids, ) processing.process_init(p) - + # set initial resolution if resize_mode_before != 0 or inputs is None or inputs == [None]: p.width, p.height = width_before, height_before # pylint: disable=attribute-defined-outside-init else: del p.width del p.height + # hires/refine defined outside of main init + p.enable_hr = enable_hr + p.hr_sampler_name = processing.get_sampler_name(hr_sampler_index) + p.hr_denoising_strength = hr_denoising_strength + p.hr_upscaler = hr_upscaler + p.hr_force = hr_force + p.hr_second_pass_steps = hr_second_pass_steps + p.hr_scale = hr_scale + p.hr_resize_x = hr_resize_x + p.hr_resize_y = hr_resize_y + p.refiner_steps = refiner_steps + p.refiner_start = refiner_start + p.refiner_prompt = refiner_prompt + p.refiner_negative = refiner_negative + if p.enable_hr and (p.hr_resize_x == 0 or p.hr_resize_y == 0): + p.hr_upscale_to_x, p.hr_upscale_to_y = 8 * int(p.width * p.hr_scale / 8), 8 * int(p.height * p.hr_scale / 8) t0 = time.time() num_units = 0 @@ -249,13 +276,16 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ frames = 0 # set pipeline - original_pipeline = shared.sd_model - shared.sd_model = pipe - sd_models.move_model(shared.sd_model, shared.device) - shared.sd_model.to(dtype=devices.dtype) - debug(f'Control device={devices.device} dtype={devices.dtype}') - sd_models.copy_diffuser_options(shared.sd_model, original_pipeline) # copy options from original pipeline - sd_models.set_diffuser_options(shared.sd_model) + if pipe.__class__.__name__ != shared.sd_model.__class__.__name__: + original_pipeline = shared.sd_model + shared.sd_model = pipe + sd_models.move_model(shared.sd_model, shared.device) + shared.sd_model.to(dtype=devices.dtype) + debug(f'Control device={devices.device} dtype={devices.dtype}') + sd_models.copy_diffuser_options(shared.sd_model, original_pipeline) # copy options from original pipeline + sd_models.set_diffuser_options(shared.sd_model) + else: + original_pipeline = None try: with devices.inference_context(): @@ -267,10 +297,8 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ try: video = cv2.VideoCapture(inputs) if not video.isOpened(): - msg = f'Control: video open failed: path={inputs}' - shared.log.error(msg) - restore_pipeline() - return msg + yield terminate(f'Control: video open failed: path={inputs}') + return frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) fps = int(video.get(cv2.CAP_PROP_FPS)) w, h = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) @@ -280,10 +308,8 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) shared.log.debug(f'Control: input video: path={inputs} frames={frames} fps={fps} size={w}x{h} codec={codec}') except Exception as e: - msg = f'Control: video open failed: path={inputs} {e}' - shared.log.error(msg) - restore_pipeline() - return msg + yield terminate(f'Control: video open failed: path={inputs} {e}') + return while status: processed_image = None @@ -296,8 +322,8 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ continue if shared.state.interrupted: shared.state.interrupted = False - restore_pipeline() - return 'Control interrupted' + yield terminate('Control interrupted') + return # get input if isinstance(input_image, str): try: @@ -335,7 +361,7 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ debug(f'Control resize: op=before image={input_image} width={width_before} height={height_before} mode={resize_mode_before} name={resize_name_before}') input_image = images.resize_image(resize_mode_before, input_image, width_before, height_before, resize_name_before) if input_image is not None and init_image is not None and init_image.size != input_image.size: - debug(f'Control resize init: image={p.override} target={input_image}') + debug(f'Control resize init: image={init_image} target={input_image}') init_image = images.resize_image(resize_mode=1, im=init_image, width=input_image.width, height=input_image.height) if input_image is not None and p.override is not None and p.override.size != input_image.size: debug(f'Control resize override: image={p.override} target={input_image}') @@ -358,11 +384,10 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ else: masked_image = input_image for i, process in enumerate(active_process): # list[image] - image_mode = 'L' if unit_type == 't2i adapter' and len(active_model) > i and ('Canny' in active_model[i].model_id or 'Sketch' in active_model[i].model_id) else 'RGB' # t2iadapter canny and sketch work in grayscale only debug(f'Control: i={i+1} process="{process.processor_id}" input={masked_image} override={process.override}') processed_image = process( image_input=masked_image, - mode=image_mode, + mode='RGB', resize_mode=resize_mode_before, resize_name=resize_name_before, scale_tab=selected_scale_tab_before, @@ -380,10 +405,8 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ if len(p.extra_generation_params["Control process"]) == 0: p.extra_generation_params["Control process"] = None if any(img is None for img in processed_images): - msg = 'Control: attempting process but output is none' - shared.log.error(f'{msg}: {processed_images}') - restore_pipeline() - return msg + yield terminate('Control: attempting process but output is none') + return if len(processed_images) > 1: processed_image = [np.array(i) for i in processed_images] processed_image = util.blend(processed_image) # blend all processed images into one @@ -394,10 +417,8 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ debug(f'Control: inputs match: input={len(processed_images)} models={len(selected_models)}') p.init_images = processed_images elif isinstance(selected_models, list) and len(processed_images) != len(selected_models): - msg = f'Control: number of inputs does not match: input={len(processed_images)} models={len(selected_models)}' - shared.log.error(msg) - restore_pipeline() - return msg + yield terminate(f'Control: number of inputs does not match: input={len(processed_images)} models={len(selected_models)}') + return elif selected_models is not None: if len(processed_images) > 1: debug('Control: using blended image for single model') @@ -412,21 +433,19 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ p.task_args['ref_image'] = p.ref_image debug(f'Control: process=None image={p.ref_image}') if p.ref_image is None: - msg = 'Control: attempting reference mode but image is none' - shared.log.error(msg) - restore_pipeline() - return msg + yield terminate('Control: attempting reference mode but image is none') + return elif unit_type == 'controlnet' and input_type == 1: # Init image same as control p.task_args['control_image'] = p.init_images # switch image and control_image - p.init_images = [p.override or input_image] * len(active_model) p.task_args['strength'] = p.denoising_strength + p.init_images = [p.override or input_image] * len(active_model) elif unit_type == 'controlnet' and input_type == 2: # Separate init image if init_image is None: shared.log.warning('Control: separate init image not provided') init_image = input_image p.task_args['control_image'] = p.init_images # switch image and control_image - p.init_images = [init_image] * len(active_model) p.task_args['strength'] = p.denoising_strength + p.init_images = [init_image] * len(active_model) if is_generator: image_txt = f'{processed_image.width}x{processed_image.height}' if processed_image is not None else 'None' @@ -449,7 +468,7 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ p.init_images = [processed_image] shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE) else: - p.init_hr() + p.init_hr(p.scale_by, p.resize_name, force=True) shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.TEXT_2_IMAGE) elif has_models: # actual control p.is_control = True @@ -463,11 +482,19 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.TEXT_2_IMAGE) if hasattr(p, 'init_images') and p.init_images is not None: p.task_args['image'] = p.init_images # need to set explicitly for txt2img + del p.init_images if unit_type == 'lite': - instance.apply(selected_models, p.init_images, control_conditioning) - if hasattr(p, 'init_images') and p.init_images is None: # delete as its set via task_args + p.init_image = [input_image] + instance.apply(selected_models, processed_image, control_conditioning) + if hasattr(p, 'init_images') and p.init_images is None: # delete empty del p.init_images + # final check + if has_models: + if unit_type in ['controlnet', 't2i adapter', 'lite', 'xs'] and p.task_args.get('image', None) is None and getattr(p, 'init_images', None) is None: + yield terminate(f'Control: mode={p.extra_generation_params.get("Control mode", None)} input image is none') + return + # resize mask if mask is not None and resize_mode_mask != 0 and resize_name_mask != 'None': if selected_scale_tab_mask == 1: @@ -544,6 +571,7 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ else: image_str = [f'{image.width}x{image.height}' for image in output_images] image_txt = f'| Images {len(output_images)} | Size {" ".join(image_str)}' + p.init_images = output_images # may be used for hires if video_type != 'None' and isinstance(output_images, list): p.do_not_save_grid = True # pylint: disable=attribute-defined-outside-init diff --git a/modules/control/units/xs_model.py b/modules/control/units/xs_model.py index 3a6721766..43d992575 100644 --- a/modules/control/units/xs_model.py +++ b/modules/control/units/xs_model.py @@ -22,12 +22,13 @@ from torch.nn.modules.normalization import GroupNorm from diffusers.configuration_utils import ConfigMixin, register_to_config -from diffusers.models.attention_processor import USE_PEFT_BACKEND, AttentionProcessor +from diffusers.models.attention_processor import AttentionProcessor from diffusers.models.autoencoders import AutoencoderKL from diffusers.models.lora import LoRACompatibleConv from diffusers.models.modeling_utils import ModelMixin + try: - from diffusers.models.unet_2d_blocks import CrossAttnDownBlock2D, CrossAttnUpBlock2D, DownBlock2D, Downsample2D, ResnetBlock2D, Transformer2DModel, UpBlock2D, Upsample2D + from diffusers.models.unet_2d_blocks import CrossAttnDownBlock2D, CrossAttnUpBlock2D, DownBlock2D, Downsample2D, ResnetBlock2D, Transformer2DModel, UpBlock2D, Upsample2D # pylint: disable=no-name-in-module except Exception: pass try: @@ -36,7 +37,7 @@ pass from diffusers.models.unet_2d_condition import UNet2DConditionModel -from diffusers.utils import BaseOutput, logging +from diffusers.utils import BaseOutput, logging, USE_PEFT_BACKEND logger = logging.get_logger(__name__) # pylint: disable=invalid-name diff --git a/modules/devices.py b/modules/devices.py index ff8b5f80d..7fe12e8ea 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -12,6 +12,7 @@ previous_oom = 0 +backup_sdpa = None debug = os.environ.get('SD_DEVICE_DEBUG', None) is not None @@ -229,10 +230,29 @@ def set_cuda_params(): except Exception: pass try: - if shared.opts.cross_attention_optimization == "Scaled-Dot-Product": + if shared.opts.cross_attention_optimization == "Scaled-Dot-Product" or shared.opts.cross_attention_optimization == "Dynamic Attention SDP": torch.backends.cuda.enable_flash_sdp('Flash attention' in shared.opts.sdp_options) torch.backends.cuda.enable_mem_efficient_sdp('Memory attention' in shared.opts.sdp_options) torch.backends.cuda.enable_math_sdp('Math attention' in shared.opts.sdp_options) + if backend == "rocm": + global backup_sdpa # pylint: disable=global-statement + if 'Flash attention' in shared.opts.sdp_options: + try: + # https://github.com/huggingface/diffusers/discussions/7172 + from flash_attn import flash_attn_func + if backup_sdpa is None: + backup_sdpa = torch.nn.functional.scaled_dot_product_attention + def sdpa_hijack(query, key, value, attn_mask=None, dropout_p=0.0, is_causal=False, scale=None): + if query.shape[3] <= 128 and attn_mask is None: + return flash_attn_func(q=query.transpose(1, 2), k=key.transpose(1, 2), v=value.transpose(1, 2), dropout_p=dropout_p, causal=is_causal, softmax_scale=scale).transpose(1, 2) + else: + return backup_sdpa(query=query, key=key, value=value, attn_mask=attn_mask, dropout_p=dropout_p, is_causal=is_causal, scale=scale) + torch.nn.functional.scaled_dot_product_attention = sdpa_hijack + shared.log.debug('ROCm Flash Attention Hijacked') + except Exception as err: + log.error(f'ROCm Flash Attention failed: {err}') + elif backup_sdpa is not None: # Restore original SDPA + torch.nn.functional.scaled_dot_product_attention = backup_sdpa except Exception: pass if shared.cmd_opts.profile: @@ -290,14 +310,14 @@ def set_cuda_params(): from modules.intel.ipex import ipex_init ok, e = ipex_init() if not ok: - log.error('IPEX initialization failed: {e}') + log.error(f'IPEX initialization failed: {e}') backend = 'cpu' elif args.use_directml: backend = 'directml' from modules.dml import directml_init ok, e = directml_init() if not ok: - log.error('DirectML initialization failed: {e}') + log.error(f'DirectML initialization failed: {e}') backend = 'cpu' elif torch.cuda.is_available() and torch.version.cuda: backend = 'cuda' diff --git a/modules/extra_networks.py b/modules/extra_networks.py index 4bca0fe33..291771907 100644 --- a/modules/extra_networks.py +++ b/modules/extra_networks.py @@ -1,6 +1,6 @@ import re from collections import defaultdict -from modules import errors +from modules import errors, shared extra_network_registry = {} @@ -15,7 +15,7 @@ def register_extra_network(extra_network): def register_default_extra_networks(): - from modules.extra_networks_hypernet import ExtraNetworkHypernet + from modules.ui_extra_networks_hypernet import ExtraNetworkHypernet register_extra_network(ExtraNetworkHypernet()) from modules.ui_extra_networks_styles import ExtraNetworkStyles register_extra_network(ExtraNetworkStyles()) @@ -62,17 +62,33 @@ def deactivate(self, p): raise NotImplementedError -def activate(p, extra_network_data): +def is_stepwise(en_obj): + all_args = [] + for en in en_obj: + all_args.extend(en.positional[1:]) + all_args.extend(en.named.values()) + return any([len(str(x).split("@")) > 1 for x in all_args]) # noqa C419 + + +def activate(p, extra_network_data, step=0): """call activate for extra networks in extra_network_data in specified order, then call activate for all remaining registered networks with an empty argument list""" if extra_network_data is None: return + stepwise = False + for extra_network_args in extra_network_data.values(): + stepwise = stepwise or is_stepwise(extra_network_args) + functional = shared.opts.lora_functional + if shared.opts.lora_force_diffusers and stepwise: + shared.log.warning("Composable LoRA not compatible with 'lora_force_diffusers'") + stepwise = False + shared.opts.data['lora_functional'] = stepwise or functional for extra_network_name, extra_network_args in extra_network_data.items(): extra_network = extra_network_registry.get(extra_network_name, None) if extra_network is None: errors.log.warning(f"Skipping unknown extra network: {extra_network_name}") continue try: - extra_network.activate(p, extra_network_args) + extra_network.activate(p, extra_network_args, step=step) except Exception as e: errors.display(e, f"activating extra network: name={extra_network_name} args:{extra_network_args}") @@ -84,6 +100,9 @@ def activate(p, extra_network_data): extra_network.activate(p, []) except Exception as e: errors.display(e, f"activating extra network: name={extra_network_name}") + if stepwise: + p.extra_network_data = extra_network_data + shared.opts.data['lora_functional'] = functional def deactivate(p, extra_network_data): diff --git a/modules/face/__init__.py b/modules/face/__init__.py index e0d76b689..5b4c3a31c 100644 --- a/modules/face/__init__.py +++ b/modules/face/__init__.py @@ -91,7 +91,9 @@ def ui(self, _is_img2img): def run(self, p: processing.StableDiffusionProcessing, mode, input_images, ip_model, ip_override, ip_cache, ip_strength, ip_structure, id_strength, id_conditioning, id_cache, pm_trigger, pm_strength, pm_start, fs_cache): # pylint: disable=arguments-differ, unused-argument if shared.backend != shared.Backend.DIFFUSERS: - return + return None + if mode == 'None': + return None if input_images is None or len(input_images) == 0: shared.log.error('Face: no init images') return None diff --git a/modules/face_restoration.py b/modules/face_restoration.py index d7fc5d1e9..d17191fdf 100644 --- a/modules/face_restoration.py +++ b/modules/face_restoration.py @@ -9,9 +9,9 @@ def restore(self, np_image): return np_image -def restore_faces(np_image): +def restore_faces(np_image, p=None): face_restorers = [x for x in shared.face_restorers if x.name() == shared.opts.face_restoration_model or shared.opts.face_restoration_model is None] if len(face_restorers) == 0: return np_image face_restorer = face_restorers[0] - return face_restorer.restore(np_image) + return face_restorer.restore(np_image, p) diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py index 49aa581b3..bdc9ace4c 100644 --- a/modules/generation_parameters_copypaste.py +++ b/modules/generation_parameters_copypaste.py @@ -9,9 +9,6 @@ from modules import shared, gr_tempdir, script_callbacks, images -re_param_code = r'\s*([\w ]+):\s*("(?:\\"[^,]|\\"|\\|[^\"])+"|[^,]*)(?:,|$)' -re_param = re.compile(re_param_code) -re_imagesize = re.compile(r"^(\d+)x(\d+)$") type_of_gr_update = type(gr.update()) paste_fields = {} registered_param_bindings = [] @@ -187,40 +184,48 @@ def send_image_and_dimensions(x): return img, w, h -def parse_generation_parameters(x: str): - res = {} - if x is None: - return res - remaining = x.replace('\n', ' ').strip() - if len(remaining) == 0: - return res - remaining = x[7:] if x.startswith('Prompt: ') else x - remaining = x[11:] if x.startswith('parameters: ') else x - if 'Steps: ' in remaining and 'Negative prompt: ' not in remaining: - remaining = remaining.replace('Steps: ', 'Negative prompt: Steps: ') - prompt, remaining = remaining.strip().split('Negative prompt: ', maxsplit=1) if 'Negative prompt: ' in remaining else (remaining, '') - res["Prompt"] = prompt.strip() - negative, remaining = remaining.strip().split('Steps: ', maxsplit=1) if 'Steps: ' in remaining else (remaining, None) - res["Negative prompt"] = negative.strip() - if remaining is None: - return res - remaining = f'Steps: {remaining}' - for k, v in re_param.findall(remaining.strip()): - try: - if v[0] == '"' and v[-1] == '"': - v = unquote(v) - m = re_imagesize.match(v) - if m is not None: - res[f"{k}-1"] = m.group(1) - res[f"{k}-2"] = m.group(2) - else: - res[k] = v - except Exception: - pass - if res.get('VAE', None) == 'TAESD': - res["Full quality"] = False - debug(f"Parse prompt: {res}") - return res +def parse_generation_parameters(infotext): + if not isinstance(infotext, str): + return {} + debug(f'Parse infotext: {infotext}') + re_param = re.compile(r'\s*([\w ]+):\s*("(?:\\"[^,]|\\"|\\|[^\"])+"|[^,]*)(?:,|$)') # multi-word: value + re_size = re.compile(r"^(\d+)x(\d+)$") # int x int + sanitized = infotext.replace('prompt:', 'Prompt:').replace('negative prompt:', 'Negative prompt:').replace('Negative Prompt', 'Negative prompt') # cleanup everything in brackets so re_params can work + sanitized = re.sub(r'<[^>]*>', lambda match: ' ' * len(match.group()), sanitized) + sanitized = re.sub(r'\([^)]*\)', lambda match: ' ' * len(match.group()), sanitized) + sanitized = re.sub(r'\{[^}]*\}', lambda match: ' ' * len(match.group()), sanitized) + + params = dict(re_param.findall(sanitized)) + debug(f"Parse params: {params}") + params = { k.strip():params[k].strip() for k in params if k.lower() not in ['hashes', 'lora', 'embeddings', 'prompt', 'negative prompt']} # remove some keys + first_param = next(iter(params)) if params else None + params_idx = sanitized.find(f'{first_param}:') if first_param else -1 + negative_idx = infotext.find("Negative prompt:") + + prompt = infotext[:params_idx] if negative_idx == -1 else infotext[:negative_idx] # prompt can be with or without negative prompt + negative = infotext[negative_idx:params_idx] if negative_idx >= 0 else '' + + for k, v in params.copy().items(): # avoid dict-has-changed + if len(v) > 0 and v[0] == '"' and v[-1] == '"': + v = unquote(v) + m = re_size.match(v) + if v.replace('.', '', 1).isdigit(): + params[k] = float(v) if '.' in v else int(v) + elif v == "True": + params[k] = True + elif v == "False": + params[k] = False + elif m is not None: + params[f"{k}-1"] = int(m.group(1)) + params[f"{k}-2"] = int(m.group(2)) + elif k == 'VAE' and v == 'TAESD': + params["Full quality"] = False + else: + params[k] = v + params["Prompt"] = prompt.replace('Prompt:', '').strip() + params["Negative prompt"] = negative.replace('Negative prompt:', '').strip() + debug(f"Parse: {params}") + return params settings_map = {} diff --git a/modules/images.py b/modules/images.py index a9060da06..81c04e5f4 100644 --- a/modules/images.py +++ b/modules/images.py @@ -7,6 +7,7 @@ import uuid import queue import string +import random import hashlib import datetime import threading @@ -122,13 +123,9 @@ def __init__(self, text='', is_active=True): def get_font(fontsize): try: - return ImageFont.truetype( - shared.opts.font or "javascript/notosans-nerdfont-regular.ttf", fontsize - ) + return ImageFont.truetype(shared.opts.font or "javascript/notosans-nerdfont-regular.ttf", fontsize) except Exception: - return ImageFont.truetype( - "javascript/notosans-nerdfont-regular.ttf", fontsize - ) + return ImageFont.truetype("javascript/notosans-nerdfont-regular.ttf", fontsize) def draw_grid_annotations(im, width, height, hor_texts, ver_texts, margin=0, title=None): @@ -215,31 +212,21 @@ def draw_prompt_matrix(im, width, height, all_prompts, margin=0): def resize_image(resize_mode, im, width, height, upscaler_name=None, output_type='image'): - shared.log.debug(f'Image resize: input={im} mode={resize_mode} target={width}x{height} upscaler={upscaler_name} function={sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access - """ - Resizes an image with the specified resize_mode, width, and height. - Args: - resize_mode: The mode to use when resizing the image. - 0: No resize - 1: Resize the image to the specified width and height. - 2: Resize the image to fill the specified width and height, maintaining the aspect ratio, and then center the image within the dimensions, cropping the excess. - 3: Resize the image to fit within the specified width and height, maintaining the aspect ratio, and then center the image within the dimensions, filling empty with data from image. - im: The image to resize. - width: The width to resize the image to. - height: The height to resize the image to. - upscaler_name: The name of the upscaler to use. If not provided, defaults to opts.upscaler_for_img2img. - """ + if im.width == width and im.height == height: + shared.log.debug(f'Image resize: input={im} target={width}x{height} mode={shared.resize_modes[resize_mode]} upscaler="{upscaler_name}" fn={sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access upscaler_name = upscaler_name or shared.opts.upscaler_for_img2img def latent(im, w, h, upscaler): from modules.processing_vae import vae_encode, vae_decode import torch latents = vae_encode(im, shared.sd_model, full_quality=False) # TODO enable full VAE mode - latents = torch.nn.functional.interpolate(latents, size=(h // 8, w // 8), mode=upscaler["mode"], antialias=upscaler["antialias"]) + latents = torch.nn.functional.interpolate(latents, size=(int(h // 8), int(w // 8)), mode=upscaler["mode"], antialias=upscaler["antialias"]) im = vae_decode(latents, shared.sd_model, output_type='pil', full_quality=False)[0] return im def resize(im, w, h): + w = int(w) + h = int(h) if upscaler_name is None or upscaler_name == "None" or im.mode == 'L': return im.resize((w, h), resample=Image.Resampling.LANCZOS) # force for mask scale = max(w / im.width, h / im.height) @@ -253,16 +240,13 @@ def resize(im, w, h): if upscaler is not None: im = latent(im, w, h, upscaler) else: + upscaler = upscalers[0] shared.log.warning(f"Resize upscaler: invalid={upscaler_name} fallback={upscaler.name}") if im.width != w or im.height != h: # probably downsample after upscaler created larger image im = im.resize((w, h), resample=Image.Resampling.LANCZOS) return im - if resize_mode == 0 or (im.width == width and im.height == height): - res = im.copy() - elif resize_mode == 1: - res = resize(im, width, height) - elif resize_mode == 2: + def crop(im): ratio = width / height src_ratio = im.width / im.height src_w = width if ratio > src_ratio else im.width * height // im.height @@ -270,7 +254,11 @@ def resize(im, w, h): resized = resize(im, src_w, src_h) res = Image.new(im.mode, (width, height)) res.paste(resized, box=(width // 2 - src_w // 2, height // 2 - src_h // 2)) - else: + return res + + def fill(im, color=None): + color = color or shared.opts.image_background + """ ratio = round(width / height, 1) src_ratio = round(im.width / im.height, 1) src_w = width if ratio < src_ratio else im.width * height // im.height @@ -288,6 +276,26 @@ def resize(im, w, h): if height > 0 and fill_width > 0: res.paste(resized.resize((fill_width, height), box=(0, 0, 0, height)), box=(0, 0)) res.paste(resized.resize((fill_width, height), box=(resized.width, 0, resized.width, height)), box=(fill_width + src_w, 0)) + return res + """ + ratio = min(width / im.width, height / im.height) + im = resize(im, int(im.width * ratio), int(im.height * ratio)) + res = Image.new(im.mode, (width, height), color=color) + res.paste(im, box=((width - im.width)//2, (height - im.height)//2)) + return res + + if resize_mode == 0 or (im.width == width and im.height == height): # none + res = im.copy() + elif resize_mode == 1: # fixed + res = resize(im, width, height) + elif resize_mode == 2: # crop + res = crop(im) + elif resize_mode == 3: # fill + res = fill(im) + elif resize_mode == 4: # edge + from modules import masking + res = fill(im, color=0) + res, _mask = masking.outpaint(res) if output_type == 'np': return np.array(res) return res @@ -534,9 +542,9 @@ def atomically_save_image(): try: image_format = Image.registered_extensions()[extension] except Exception: - shared.log.warning(f'Unknown image format: {extension}') + shared.log.warning(f'Saving: unknown image format: {extension}') image_format = 'JPEG' - if shared.opts.image_watermark_enabled: + if shared.opts.image_watermark_enabled or (shared.opts.image_watermark_position != 'none' and shared.opts.image_watermark_image != ''): image = set_watermark(image, shared.opts.image_watermark) size = os.path.getsize(fn) if os.path.exists(fn) else 0 shared.log.info(f'Saving: image="{fn}" type={image_format} resolution={image.width}x{image.height} size={size}') @@ -547,42 +555,33 @@ def atomically_save_image(): file.write(f"{exifinfo}\n") shared.log.info(f'Saving: text="{filename_txt}" len={len(exifinfo)}') except Exception as e: - shared.log.warning(f'Image description save failed: {filename_txt} {e}') + shared.log.warning(f'Saving failed: description={filename_txt} {e}') # actual save exifinfo = (exifinfo or "") if shared.opts.image_metadata else "" if image_format == 'PNG': pnginfo_data = PngImagePlugin.PngInfo() for k, v in params.pnginfo.items(): pnginfo_data.add_text(k, str(v)) - try: - image.save(fn, format=image_format, compress_level=6, pnginfo=pnginfo_data if shared.opts.image_metadata else None) - except Exception as e: - shared.log.error(f'Image save failed: file="{fn}" {e}') + save_args = { 'compress_level': 6, 'pnginfo': pnginfo_data if shared.opts.image_metadata else None } elif image_format == 'JPEG': if image.mode == 'RGBA': - shared.log.warning('Saving RGBA image as JPEG: Alpha channel will be lost') + shared.log.warning('Saving: removing alpha channel') image = image.convert("RGB") elif image.mode == 'I;16': image = image.point(lambda p: p * 0.0038910505836576).convert("L") exif_bytes = piexif.dump({ "Exif": { piexif.ExifIFD.UserComment: piexif.helper.UserComment.dump(exifinfo, encoding="unicode") } }) - try: - image.save(fn, format=image_format, optimize=True, quality=shared.opts.jpeg_quality, exif=exif_bytes) - except Exception as e: - shared.log.error(f'Image save failed: file="{fn}" {e}') + save_args = { 'optimize': True, 'quality': shared.opts.jpeg_quality, 'exif': exif_bytes if shared.opts.image_metadata else None } elif image_format == 'WEBP': if image.mode == 'I;16': image = image.point(lambda p: p * 0.0038910505836576).convert("RGB") exif_bytes = piexif.dump({ "Exif": { piexif.ExifIFD.UserComment: piexif.helper.UserComment.dump(exifinfo, encoding="unicode") } }) - try: - image.save(fn, format=image_format, quality=shared.opts.jpeg_quality, lossless=shared.opts.webp_lossless, exif=exif_bytes) - except Exception as e: - shared.log.error(f'Image save failed: file="{fn}" {e}') + save_args = { 'optimize': True, 'quality': shared.opts.jpeg_quality, 'exif': exif_bytes if shared.opts.image_metadata else None, 'lossless': shared.opts.webp_lossless } else: - # shared.log.warning(f'Unrecognized image format: {extension} attempting save as {image_format}') - try: - image.save(fn, format=image_format, quality=shared.opts.jpeg_quality) - except Exception as e: - shared.log.error(f'Image save failed: file="{fn}" {e}') + save_args = { 'quality': shared.opts.jpeg_quality } + try: + image.save(fn, format=image_format, **save_args) + except Exception as e: + shared.log.error(f'Saving failed: file="{fn}" format={image_format} {e}') if shared.opts.save_log_fn != '' and len(exifinfo) > 0: fn = os.path.join(paths.data_path, shared.opts.save_log_fn) if not fn.endswith('.json'): @@ -604,7 +603,7 @@ def atomically_save_image(): def save_image(image, path, basename='', seed=None, prompt=None, extension=shared.opts.samples_format, info=None, short_filename=False, no_prompt=False, grid=False, pnginfo_section_name='parameters', p=None, existing_info=None, forced_filename=None, suffix='', save_to_dirs=None): # pylint: disable=unused-argument - debug(f'Save from function={sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access + debug(f'Save: fn={sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access if image is None: shared.log.warning('Image is none') return None, None @@ -839,24 +838,55 @@ def flatten(img, bgcolor): def set_watermark(image, watermark): - from imwatermark import WatermarkEncoder - wm_type = 'bytes' - wm_method = 'dwtDctSvd' - wm_length = 32 - length = wm_length // 8 - info = image.info - data = np.asarray(image) - encoder = WatermarkEncoder() - text = f"{watermark:<{length}}"[:length] - bytearr = text.encode(encoding='ascii', errors='ignore') - try: - encoder.set_watermark(wm_type, bytearr) - encoded = encoder.encode(data, wm_method) - image = Image.fromarray(encoded) - image.info = info - shared.log.debug(f'Set watermark: {watermark} method={wm_method} bits={wm_length}') - except Exception as e: - shared.log.warning(f'Set watermark error: {watermark} method={wm_method} bits={wm_length} {e}') + if shared.opts.image_watermark_position != 'none': # visible watermark + wm_image = None + try: + wm_image = Image.open(shared.opts.image_watermark_image) + except Exception as e: + shared.log.warning(f'Set image watermark: fn="{shared.opts.image_watermark_image}" {e}') + if wm_image is not None: + if shared.opts.image_watermark_position == 'top/left': + position = (0, 0) + elif shared.opts.image_watermark_position == 'top/right': + position = (image.width - wm_image.width, 0) + elif shared.opts.image_watermark_position == 'bottom/left': + position = (0, image.height - wm_image.height) + elif shared.opts.image_watermark_position == 'bottom/right': + position = (image.width - wm_image.width, image.height - wm_image.height) + elif shared.opts.image_watermark_position == 'center': + position = ((image.width - wm_image.width) // 2, (image.height - wm_image.height) // 2) + else: + position = (random.randint(0, image.width - wm_image.width), random.randint(0, image.height - wm_image.height)) + try: + for x in range(wm_image.width): + for y in range(wm_image.height): + r, g, b, _a = wm_image.getpixel((x, y)) + if not (r == 0 and g == 0 and b == 0): + image.putpixel((x+position[0], y+position[1]), (r, g, b)) + shared.log.debug(f'Set image watermark: fn="{shared.opts.image_watermark_image}" image={wm_image} position={position}') + except Exception as e: + shared.log.warning(f'Set image watermark: image={wm_image} {e}') + + if shared.opts.image_watermark_enabled: # invisible watermark + from imwatermark import WatermarkEncoder + wm_type = 'bytes' + wm_method = 'dwtDctSvd' + wm_length = 32 + length = wm_length // 8 + info = image.info + data = np.asarray(image) + encoder = WatermarkEncoder() + text = f"{watermark:<{length}}"[:length] + bytearr = text.encode(encoding='ascii', errors='ignore') + try: + encoder.set_watermark(wm_type, bytearr) + encoded = encoder.encode(data, wm_method) + image = Image.fromarray(encoded) + image.info = info + shared.log.debug(f'Set invisible watermark: {watermark} method={wm_method} bits={wm_length}') + except Exception as e: + shared.log.warning(f'Set invisible watermark error: {watermark} method={wm_method} bits={wm_length} {e}') + return image diff --git a/modules/interrogate.py b/modules/interrogate.py index 3932489fd..c795cb7dc 100644 --- a/modules/interrogate.py +++ b/modules/interrogate.py @@ -79,7 +79,7 @@ def checkpoint_wrapper(self): def load_blip_model(self): self.create_fake_fairscale() - from repositories.blip import models + from repositories.blip import models # pylint: disable=unused-import from repositories.blip.models import blip import modules.modelloader as modelloader model_path = os.path.join(paths.models_path, "BLIP") @@ -195,3 +195,162 @@ def interrogate(self, pil_image): self.unload() shared.state.end() return res + +# --------- interrrogate ui + +ci = None +low_vram = False + + +class BatchWriter: + def __init__(self, folder): + self.folder = folder + self.csv, self.file = None, None + + def add(self, file, prompt): + txt_file = os.path.splitext(file)[0] + ".txt" + with open(os.path.join(self.folder, txt_file), 'w', encoding='utf-8') as f: + f.write(prompt) + + def close(self): + if self.file is not None: + self.file.close() + + +def get_clip_models(): + import open_clip + return ['/'.join(x) for x in open_clip.list_pretrained()] + + +def load_interrogator(model): + from clip_interrogator import Config, Interrogator + global ci # pylint: disable=global-statement + if ci is None: + config = Config(device=devices.get_optimal_device(), cache_path=os.path.join(paths.models_path, 'Interrogator'), clip_model_name=model, quiet=True) + if low_vram: + config.apply_low_vram_defaults() + shared.log.info(f'Interrogate load: config={config}') + ci = Interrogator(config) + elif model != ci.config.clip_model_name: + ci.config.clip_model_name = model + shared.log.info(f'Interrogate load: config={ci.config}') + ci.load_clip_model() + + +def unload_clip_model(): + if ci is not None: + shared.log.debug('Interrogate offload') + ci.caption_model = ci.caption_model.to(devices.cpu) + ci.clip_model = ci.clip_model.to(devices.cpu) + ci.caption_offloaded = True + ci.clip_offloaded = True + devices.torch_gc() + + +def interrogate(image, mode, caption=None): + shared.log.info(f'Interrogate: image={image} mode={mode} config={ci.config}') + if mode == 'best': + prompt = ci.interrogate(image, caption=caption) + elif mode == 'caption': + prompt = ci.generate_caption(image) if caption is None else caption + elif mode == 'classic': + prompt = ci.interrogate_classic(image, caption=caption) + elif mode == 'fast': + prompt = ci.interrogate_fast(image, caption=caption) + elif mode == 'negative': + prompt = ci.interrogate_negative(image) + else: + raise RuntimeError(f"Unknown mode {mode}") + return prompt + + +def interrogate_image(image, model, mode): + shared.state.begin() + shared.state.job = 'interrogate' + try: + if shared.backend == shared.Backend.ORIGINAL and (shared.cmd_opts.lowvram or shared.cmd_opts.medvram): + lowvram.send_everything_to_cpu() + devices.torch_gc() + load_interrogator(model) + image = image.convert('RGB') + shared.log.info(f'Interrogate: image={image} mode={mode} config={ci.config}') + prompt = interrogate(image, mode) + except Exception as e: + prompt = f"Exception {type(e)}" + shared.log.error(f'Interrogate: {e}') + shared.state.end() + return prompt + + +def interrogate_batch(batch_files, batch_folder, batch_str, model, mode, write): + files = [] + if batch_files is not None: + files += [f.name for f in batch_files] + if batch_folder is not None: + files += [f.name for f in batch_folder] + if batch_str is not None and len(batch_str) > 0 and os.path.exists(batch_str) and os.path.isdir(batch_str): + files += [os.path.join(batch_str, f) for f in os.listdir(batch_str) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp'))] + if len(files) == 0: + shared.log.error('Interrogate batch no images') + return '' + shared.state.begin() + shared.state.job = 'batch interrogate' + prompts = [] + try: + if shared.backend == shared.Backend.ORIGINAL and (shared.cmd_opts.lowvram or shared.cmd_opts.medvram): + lowvram.send_everything_to_cpu() + devices.torch_gc() + load_interrogator(model) + shared.log.info(f'Interrogate batch: images={len(files)} mode={mode} config={ci.config}') + captions = [] + # first pass: generate captions + for file in files: + caption = "" + try: + if shared.state.interrupted: + break + image = Image.open(file).convert('RGB') + caption = ci.generate_caption(image) + except Exception as e: + shared.log.error(f'Interrogate caption: {e}') + finally: + captions.append(caption) + # second pass: interrogate + if write: + writer = BatchWriter(os.path.dirname(files[0])) + for idx, file in enumerate(files): + try: + if shared.state.interrupted: + break + image = Image.open(file).convert('RGB') + prompt = interrogate(image, mode, caption=captions[idx]) + prompts.append(prompt) + if write: + writer.add(file, prompt) + except OSError as e: + shared.log.error(f'Interrogate batch: {e}') + if write: + writer.close() + ci.config.quiet = False + unload_clip_model() + except Exception as e: + shared.log.error(f'Interrogate batch: {e}') + shared.state.end() + return '\n\n'.join(prompts) + + +def analyze_image(image, model): + load_interrogator(model) + image = image.convert('RGB') + image_features = ci.image_to_features(image) + top_mediums = ci.mediums.rank(image_features, 5) + top_artists = ci.artists.rank(image_features, 5) + top_movements = ci.movements.rank(image_features, 5) + top_trendings = ci.trendings.rank(image_features, 5) + top_flavors = ci.flavors.rank(image_features, 5) + medium_ranks = dict(zip(top_mediums, ci.similarities(image_features, top_mediums))) + artist_ranks = dict(zip(top_artists, ci.similarities(image_features, top_artists))) + movement_ranks = dict(zip(top_movements, ci.similarities(image_features, top_movements))) + trending_ranks = dict(zip(top_trendings, ci.similarities(image_features, top_trendings))) + flavor_ranks = dict(zip(top_flavors, ci.similarities(image_features, top_flavors))) + return medium_ranks, artist_ranks, movement_ranks, trending_ranks, flavor_ranks diff --git a/modules/ipadapter.py b/modules/ipadapter.py index 32cfdb486..190d9dedf 100644 --- a/modules/ipadapter.py +++ b/modules/ipadapter.py @@ -23,10 +23,10 @@ 'Plus': 'ip-adapter-plus_sd15.safetensors', 'Plus Face': 'ip-adapter-plus-face_sd15.safetensors', 'Full Face': 'ip-adapter-full-face_sd15.safetensors', - 'Base SXDL': 'ip-adapter_sdxl.safetensors', - 'Base ViT-H SXDL': 'ip-adapter_sdxl_vit-h.safetensors', - 'Plus ViT-H SXDL': 'ip-adapter-plus_sdxl_vit-h.safetensors', - 'Plus Face ViT-H SXDL': 'ip-adapter-plus-face_sdxl_vit-h.safetensors', + 'Base SDXL': 'ip-adapter_sdxl.safetensors', + 'Base ViT-H SDXL': 'ip-adapter_sdxl_vit-h.safetensors', + 'Plus ViT-H SDXL': 'ip-adapter-plus_sdxl_vit-h.safetensors', + 'Plus Face ViT-H SDXL': 'ip-adapter-plus-face_sdxl_vit-h.safetensors', } @@ -96,6 +96,8 @@ def apply(pipe, p: processing.StableDiffusionProcessing, adapter_names=[], adapt adapters = [adapter for adapter in adapters if adapter is not None and adapter.lower() != 'none'] if len(adapters) == 0: unapply(pipe) + if hasattr(p, 'ip_adapter_images'): + del p.ip_adapter_images return False if hasattr(p, 'ip_adapter_scales'): adapter_scales = p.ip_adapter_scales @@ -125,6 +127,8 @@ def apply(pipe, p: processing.StableDiffusionProcessing, adapter_names=[], adapt adapters = [] # unload adapter if previously loaded as it will cause runtime errors if len(adapters) == 0: unapply(pipe) + if hasattr(p, 'ip_adapter_images'): + del p.ip_adapter_images return False if not hasattr(pipe, 'load_ip_adapter'): shared.log.error(f'IP adapter: pipeline not supported: {pipe.__class__.__name__}') diff --git a/modules/masking.py b/modules/masking.py index 34d9f174e..6eb1abf35 100644 --- a/modules/masking.py +++ b/modules/masking.py @@ -141,6 +141,7 @@ def fill(image, mask): # "isnet-anime", } COLORMAP = ['autumn', 'bone', 'jet', 'winter', 'rainbow', 'ocean', 'summer', 'spring', 'cool', 'hsv', 'pink', 'hot', 'parula', 'magma', 'inferno', 'plasma', 'viridis', 'cividis', 'twilight', 'shifted', 'turbo', 'deepgreen'] +TYPES = ['None', 'Opaque', 'Binary', 'Masked', 'Grayscale', 'Color', 'Composite'] cache_dir = 'models/control/segment' generator: MaskGenerationPipeline = None busy = False @@ -371,7 +372,7 @@ def outpaint(input_image: Image.Image, outpaint_type: str = 'Edge'): def run_mask(input_image: Image.Image, input_mask: Image.Image = None, return_type: str = None, mask_blur: int = None, mask_padding: int = None, segment_enable=True, invert=None): - debug(f'Run mask: function={sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access + debug(f'Run mask: fn={sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access if input_image is None: return input_mask @@ -396,7 +397,7 @@ def run_mask(input_image: Image.Image, input_mask: Image.Image = None, return_ty if mask_blur is not None: # compatibility with old img2img values which uses px values opts.mask_blur = round(4 * mask_blur / size, 3) if mask_padding is not None: # compatibility with old img2img values which uses px values - opts.mask_erode = 4 * mask_padding / size + opts.mask_dilate = 4 * mask_padding / size if opts.model is None or not segment_enable: mask = input_mask diff --git a/modules/modelloader.py b/modules/modelloader.py index 91cedbf83..50f5623dc 100644 --- a/modules/modelloader.py +++ b/modules/modelloader.py @@ -1,5 +1,6 @@ import os import time +import json import shutil import importlib from typing import Dict @@ -71,7 +72,7 @@ def download_civit_preview(model_path: str, preview_url: str): download_pbar = None -def download_civit_model_thread(model_name, model_url, model_path, model_type, preview, token): +def download_civit_model_thread(model_name, model_url, model_path, model_type, token): import hashlib sha256 = hashlib.sha256() sha256.update(model_name.encode('utf-8')) @@ -87,7 +88,7 @@ def download_civit_model_thread(model_name, model_url, model_path, model_type, p model_file = os.path.join(shared.opts.ckpt_dir, model_path, model_name) temp_file = os.path.join(shared.opts.ckpt_dir, model_path, temp_file) - res = f'CivitAI download: name="{model_name}" url="{model_url}" path="{model_path}" temp="{temp_file}"' + res = f'Model download: name="{model_name}" url="{model_url}" path="{model_path}" temp="{temp_file}"' if os.path.isfile(model_file): res += ' already exists' shared.log.warning(res) @@ -117,16 +118,24 @@ def download_civit_model_thread(model_name, model_url, model_path, model_type, p try: with open(temp_file, 'ab') as f: for data in r.iter_content(block_size): + if written == 0: + try: # check if response is JSON message instead of bytes + shared.log.error(f'Model download: response={json.loads(data.decode("utf-8"))}') + raise ValueError('response: type=json expected=bytes') + except Exception: # this is good + pass written = written + len(data) f.write(data) download_pbar.update(task, description="Download", completed=written) if written < 1024: # min threshold os.remove(temp_file) raise ValueError(f'removed invalid download: bytes={written}') + """ if preview is not None: preview_file = os.path.splitext(model_file)[0] + '.jpg' preview.save(preview_file) res += f' preview={preview_file}' + """ except Exception as e: shared.log.error(f'{res} {e}') finally: @@ -134,17 +143,18 @@ def download_civit_model_thread(model_name, model_url, model_path, model_type, p download_pbar.remove_task(task) if starting_pos+total_size != written: shared.log.warning(f'{res} written={round(written/1024/1024)}Mb incomplete download') - else: + elif os.path.exists(temp_file): + shared.log.debug(f'Model download complete: temp="{temp_file}" path="{model_file}"') os.rename(temp_file, model_file) shared.state.end() return res -def download_civit_model(model_url: str, model_name: str, model_path: str, model_type: str, preview, token: str = None): +def download_civit_model(model_url: str, model_name: str, model_path: str, model_type: str, token: str = None): import threading - thread = threading.Thread(target=download_civit_model_thread, args=(model_name, model_url, model_path, model_type, preview, token)) + thread = threading.Thread(target=download_civit_model_thread, args=(model_name, model_url, model_path, model_type, token)) thread.start() - return f'CivitAI download: name={model_name} url={model_url} path={model_path}' + return f'Model download: name={model_name} url={model_url} path={model_path}' def download_diffusers_model(hub_id: str, cache_dir: str = None, download_config: Dict[str, str] = None, token = None, variant = None, revision = None, mirror = None, custom_pipeline = None): @@ -215,60 +225,47 @@ def download_diffusers_model(hub_id: str, cache_dir: str = None, download_config return pipeline_dir -def load_diffusers_models(model_path: str, command_path: str = None, clear=True): +def load_diffusers_models(clear=True): excluded_models = [ 'PhotoMaker', 'inswapper_128', 'IP-Adapter' ] t0 = time.time() - places = [] - places.append(model_path) - if command_path is not None and command_path != model_path: - places.append(command_path) + place = shared.opts.diffusers_dir + if place is None or len(place) == 0 or not os.path.isdir(place): + place = os.path.join(models_path, 'Diffusers') if clear: diffuser_repos.clear() output = [] - for place in places: - if not os.path.isdir(place): - continue - try: - """ - import huggingface_hub as hf - res = hf.scan_cache_dir(cache_dir=place) - for r in list(res.repos): - cache_path = os.path.join(r.repo_path, "snapshots", list(r.revisions)[-1].commit_hash) - diffuser_repos.append({ 'name': r.repo_id, 'filename': r.repo_id, 'path': cache_path, 'size': r.size_on_disk, 'mtime': r.last_modified, 'hash': list(r.revisions)[-1].commit_hash, 'model_info': str(os.path.join(cache_path, "model_info.json")) }) - if not os.path.isfile(os.path.join(cache_path, "hidden")): - output.append(str(r.repo_id)) - """ - for folder in os.listdir(place): - try: - if any([x in folder for x in excluded_models]): # noqa:C419 - continue - if "--" not in folder: - continue - if folder.endswith("-prior"): - continue - _, name = folder.split("--", maxsplit=1) - name = name.replace("--", "/") - folder = os.path.join(place, folder) - friendly = os.path.join(place, name) - snapshots = os.listdir(os.path.join(folder, "snapshots")) - if len(snapshots) == 0: - shared.log.warning(f"Diffusers folder has no snapshots: location={place} folder={folder} name={name}") - continue - commit = os.path.join(folder, 'snapshots', snapshots[-1]) - mtime = os.path.getmtime(commit) - info = os.path.join(commit, "model_info.json") - diffuser_repos.append({ 'name': name, 'filename': name, 'friendly': friendly, 'folder': folder, 'path': commit, 'hash': commit, 'mtime': mtime, 'model_info': info }) - if os.path.exists(os.path.join(folder, 'hidden')): - continue - output.append(name) - except Exception: - # shared.log.error(f"Error analyzing diffusers model: {folder} {e}") - pass - except Exception as e: - shared.log.error(f"Error listing diffusers: {place} {e}") - shared.log.debug(f'Scanning diffusers cache: {places} items={len(output)} time={time.time()-t0:.2f}') + try: + for folder in os.listdir(place): + try: + if any([x in folder for x in excluded_models]): # noqa:C419 + continue + if "--" not in folder: + continue + if folder.endswith("-prior"): + continue + _, name = folder.split("--", maxsplit=1) + name = name.replace("--", "/") + folder = os.path.join(place, folder) + friendly = os.path.join(place, name) + snapshots = os.listdir(os.path.join(folder, "snapshots")) + if len(snapshots) == 0: + shared.log.warning(f"Diffusers folder has no snapshots: location={place} folder={folder} name={name}") + continue + commit = os.path.join(folder, 'snapshots', snapshots[-1]) + mtime = os.path.getmtime(commit) + info = os.path.join(commit, "model_info.json") + diffuser_repos.append({ 'name': name, 'filename': name, 'friendly': friendly, 'folder': folder, 'path': commit, 'hash': commit, 'mtime': mtime, 'model_info': info }) + if os.path.exists(os.path.join(folder, 'hidden')): + continue + output.append(name) + except Exception: + # shared.log.error(f"Error analyzing diffusers model: {folder} {e}") + pass + except Exception as e: + shared.log.error(f"Error listing diffusers: {place} {e}") + shared.log.debug(f'Scanning diffusers cache: folder={place} items={len(output)} time={time.time()-t0:.2f}') return output @@ -290,33 +287,44 @@ def find_diffuser(name: str): return None -def load_reference(name: str): - found = [r for r in diffuser_repos if name == r['name'] or name == r['friendly'] or name == r['path']] - if len(found) > 0: # already downloaded - shared.log.debug(f'Reference model: {found[0]}') - return True - shared.log.debug(f'Reference download: {name}') - reference_models = shared.readfile(os.path.join('html', 'reference.json'), silent=False) +def get_reference_opts(name: str): model_opts = {} - for v in reference_models.values(): - if v.get('path', '') == name: + for k, v in shared.reference_models.items(): + model_name = os.path.splitext(v.get('path', '').split('@')[0])[0] + if k == name or model_name == name: model_opts = v break + if not model_opts: + # shared.log.error(f'Reference: model="{name}" not found') + return {} + shared.log.debug(f'Reference: model="{name}" {model_opts.get("extras", None)}') + return model_opts + + +def load_reference(name: str, variant: str = None, revision: str = None, mirror: str = None, custom_pipeline: str = None): + found = [r for r in diffuser_repos if name == r['name'] or name == r['friendly'] or name == r['path']] + if len(found) > 0: # already downloaded + model_opts = get_reference_opts(found[0]['name']) + return True + else: + model_opts = get_reference_opts(name) if model_opts.get('skip', False): return True + shared.log.debug(f'Reference: download="{name}"') model_dir = download_diffusers_model( hub_id=name, cache_dir=shared.opts.diffusers_dir, - variant=model_opts.get('variant', None), - revision=model_opts.get('revision', None), - mirror=model_opts.get('mirror', None), - custom_pipeline=model_opts.get('custom_pipeline', None) + variant=variant or model_opts.get('variant', None), + revision=revision or model_opts.get('revision', None), + mirror=mirror or model_opts.get('mirror', None), + custom_pipeline=custom_pipeline or model_opts.get('custom_pipeline', None) ) if model_dir is None: - shared.log.debug(f'Reference download failed: {name}') + shared.log.error(f'Reference download: model="{name}"') return False else: - shared.log.debug(f'Reference download complete: {name}') + shared.log.debug(f'Reference download complete: model="{name}"') + model_opts = get_reference_opts(name) from modules import sd_models sd_models.list_models() return True @@ -327,19 +335,19 @@ def load_civitai(model: str, url: str): name, _ext = os.path.splitext(model) info = sd_models.get_closet_checkpoint_match(name) if info is not None: - shared.log.debug(f'Reference model: {name}') + _model_opts = get_reference_opts(info.model_name) return name # already downloaded else: - shared.log.debug(f'Reference model: {name} download start') - download_civit_model_thread(model_name=model, model_url=url, model_path='', model_type='safetensors', preview=None, token=None) - shared.log.debug(f'Reference model: {name} download complete') + shared.log.debug(f'Reference download start: model="{name}"') + download_civit_model_thread(model_name=model, model_url=url, model_path='', model_type='safetensors', token=None) + shared.log.debug(f'Reference download complete: model="{name}"') sd_models.list_models() info = sd_models.get_closet_checkpoint_match(name) if info is not None: - shared.log.debug(f'Reference model: {name}') + shared.log.debug(f'Reference: model="{name}"') return name # already downloaded else: - shared.log.debug(f'Reference model: {name} not found') + shared.log.error(f'Reference model="{name}" not found') return None @@ -349,7 +357,6 @@ def download_url_to_file(url: str, dst: str): import tempfile from urllib.request import urlopen, Request from rich.progress import Progress, TextColumn, BarColumn, TaskProgressColumn, TimeRemainingColumn, TimeElapsedColumn - file_size = None req = Request(url, headers={"User-Agent": "sdnext"}) u = urlopen(req) # pylint: disable=R1732 @@ -390,6 +397,8 @@ def download_url_to_file(url: str, dst: str): def load_file_from_url(url: str, *, model_dir: str, progress: bool = True, file_name = None): # pylint: disable=unused-argument """Download a file from url into model_dir, using the file present if possible. Returns the path to the downloaded file.""" + if model_dir is None: + shared.log.error('Download folder is none') os.makedirs(model_dir, exist_ok=True) if not file_name: parts = urlparse(url) @@ -398,7 +407,10 @@ def load_file_from_url(url: str, *, model_dir: str, progress: bool = True, file_ if not os.path.exists(cached_file): shared.log.info(f'Downloading: url="{url}" file={cached_file}') download_url_to_file(url, cached_file) - return cached_file + if os.path.exists(cached_file): + return cached_file + else: + return None def load_models(model_path: str, model_url: str = None, command_path: str = None, ext_filter=None, download_name=None, ext_blacklist=None) -> list: @@ -411,14 +423,15 @@ def load_models(model_path: str, model_url: str = None, command_path: str = None @param ext_filter: An optional list of filename extensions to filter by @return: A list of paths containing the desired model(s) """ - places = list(set([model_path, command_path])) # noqa:C405 + places = [x for x in list(set([model_path, command_path])) if x is not None] # noqa:C405 output = [] try: output:list = [*files_cache.list_files(*places, ext_filter=ext_filter, ext_blacklist=ext_blacklist)] if model_url is not None and len(output) == 0: if download_name is not None: dl = load_file_from_url(model_url, model_dir=places[0], progress=True, file_name=download_name) - output.append(dl) + if dl is not None: + output.append(dl) else: output.append(model_url) except Exception as e: diff --git a/modules/onnx_impl/__init__.py b/modules/onnx_impl/__init__.py index e48a355b6..61ff7c76b 100644 --- a/modules/onnx_impl/__init__.py +++ b/modules/onnx_impl/__init__.py @@ -194,9 +194,11 @@ def initialize_onnx(): global initialized # pylint: disable=global-statement if initialized: return - from installer import log + from installer import log, installed from modules import devices from modules.shared import opts + if not installed('onnx', quiet=True): + return try: # may fail on onnx import import onnx # pylint: disable=unused-import from .execution_providers import ExecutionProvider, TORCH_DEVICE_TO_EP, available_execution_providers @@ -243,7 +245,7 @@ def initialize_onnx(): def initialize_olive(): global run_olive_workflow # pylint: disable=global-statement from installer import installed, log - if not installed("olive-ai"): + if not installed('olive-ai', quiet=True) or not installed('onnx', quiet=True): return import sys import importlib @@ -268,6 +270,7 @@ def install_olive(): return try: log.info('Installing Olive') + install('onnx', 'onnx', ignore=True) install('olive-ai', 'olive-ai', ignore=True) import olive.workflows # pylint: disable=unused-import except Exception as e: diff --git a/modules/postprocess/codeformer_model.py b/modules/postprocess/codeformer_model.py index 4a812fdb7..26dec124f 100644 --- a/modules/postprocess/codeformer_model.py +++ b/modules/postprocess/codeformer_model.py @@ -66,7 +66,7 @@ def send_model_to(self, device): self.face_helper.face_det.to(device) # pylint: disable=no-member self.face_helper.face_parse.to(device) - def restore(self, np_image, w=None): + def restore(self, np_image, p=None, w=None): # pylint: disable=unused-argument from torchvision.transforms.functional import normalize from basicsr.utils import img2tensor, tensor2img np_image = np_image[:, :, ::-1] @@ -90,7 +90,7 @@ def restore(self, np_image, w=None): del output devices.torch_gc() except Exception as e: - shared.log.error(f'CodeForomer error: {e}') + shared.log.error(f'CodeFormer error: {e}') restored_face = tensor2img(cropped_face_t, rgb2bgr=True, min_max=(-1, 1)) restored_face = restored_face.astype('uint8') self.face_helper.add_restored_face(restored_face) diff --git a/modules/postprocess/gfpgan_model.py b/modules/postprocess/gfpgan_model.py index ee412012d..fb7ff0f5d 100644 --- a/modules/postprocess/gfpgan_model.py +++ b/modules/postprocess/gfpgan_model.py @@ -105,7 +105,7 @@ class FaceRestorerGFPGAN(modules.face_restoration.FaceRestoration): def name(self): return "GFPGAN" - def restore(self, np_image): + def restore(self, np_image, p=None): # pylint: disable=unused-argument return gfpgan_fix_faces(np_image) shared.face_restorers.append(FaceRestorerGFPGAN()) diff --git a/modules/postprocess/realesrgan_model_arch.py b/modules/postprocess/realesrgan_model_arch.py index 30b8e65ac..a4f4bd682 100644 --- a/modules/postprocess/realesrgan_model_arch.py +++ b/modules/postprocess/realesrgan_model_arch.py @@ -55,7 +55,7 @@ def __init__(self, self.device = torch.device( f'cuda:{gpu_id}' if torch.cuda.is_available() else 'cpu') if device is None else device else: - self.device = devices.device if device is None else device + self.device = devices.device_esrgan if device is None else device if isinstance(model_path, list): # dni diff --git a/modules/processing.py b/modules/processing.py index 521d4502b..185077483 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -6,7 +6,7 @@ from PIL import Image from modules import shared, devices, errors, images, scripts, memstats, lowvram, script_callbacks, extra_networks, face_restoration, sd_hijack_freeu, sd_models, sd_vae, processing_helpers from modules.sd_hijack_hypertile import context_hypertile_vae, context_hypertile_unet -from modules.processing_class import StableDiffusionProcessing, StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img # pylint: disable=unused-import +from modules.processing_class import StableDiffusionProcessing, StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, StableDiffusionProcessingControl # pylint: disable=unused-import from modules.processing_info import create_infotext @@ -159,10 +159,16 @@ def process_images(p: StableDiffusionProcessing) -> Processed: sd_vae.reload_vae_weights() shared.prompt_styles.apply_styles_to_extra(p) + shared.prompt_styles.extract_comments(p) if not shared.opts.cuda_compile: sd_models.apply_token_merging(p.sd_model, p.get_token_merging_ratio()) sd_hijack_freeu.apply_freeu(p, shared.backend == shared.Backend.ORIGINAL) + if p.width is not None: + p.width = 8 * int(p.width / 8) + if p.height is not None: + p.height = 8 * int(p.height / 8) + script_callbacks.before_process_callback(p) if shared.cmd_opts.profile: @@ -246,10 +252,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: if p.scripts is not None and isinstance(p.scripts, scripts.ScriptRunner): p.scripts.process(p) - if shared.backend == shared.Backend.DIFFUSERS: - from modules import ipadapter - ipadapter.apply(shared.sd_model, p) - def infotext(_inxex=0): # dummy function overriden if there are iterations return '' @@ -257,23 +259,29 @@ def infotext(_inxex=0): # dummy function overriden if there are iterations shared.state.job_count = p.n_iter with devices.inference_context(), ema_scope_context(): t0 = time.time() - with devices.autocast(): - p.init(p.all_prompts, p.all_seeds, p.all_subseeds) + if not hasattr(p, 'skip_init'): + with devices.autocast(): + p.init(p.all_prompts, p.all_seeds, p.all_subseeds) extra_network_data = None debug(f'Processing inner: args={vars(p)}') for n in range(p.n_iter): + debug(f'Processing inner: iteration={n+1}/{p.n_iter}') p.iteration = n if shared.state.skipped: - shared.log.debug(f'Process skipped: {n}/{p.n_iter}') + shared.log.debug(f'Process skipped: {n+1}/{p.n_iter}') shared.state.skipped = False continue if shared.state.interrupted: - shared.log.debug(f'Process interrupted: {n}/{p.n_iter}') + shared.log.debug(f'Process interrupted: {n+1}/{p.n_iter}') break - p.prompts = p.all_prompts[n * p.batch_size:(n + 1) * p.batch_size] - p.negative_prompts = p.all_negative_prompts[n * p.batch_size:(n + 1) * p.batch_size] - p.seeds = p.all_seeds[n * p.batch_size:(n + 1) * p.batch_size] - p.subseeds = p.all_subseeds[n * p.batch_size:(n + 1) * p.batch_size] + + if shared.backend == shared.Backend.DIFFUSERS: + from modules import ipadapter + ipadapter.apply(shared.sd_model, p) + p.prompts = p.all_prompts[n * p.batch_size:(n+1) * p.batch_size] + p.negative_prompts = p.all_negative_prompts[n * p.batch_size:(n+1) * p.batch_size] + p.seeds = p.all_seeds[n * p.batch_size:(n+1) * p.batch_size] + p.subseeds = p.all_subseeds[n * p.batch_size:(n+1) * p.batch_size] if p.scripts is not None and isinstance(p.scripts, scripts.ScriptRunner): p.scripts.before_process_batch(p, batch_number=n, prompts=p.prompts, seeds=p.seeds, subseeds=p.subseeds) if len(p.prompts) == 0: @@ -307,8 +315,8 @@ def infotext(_inxex=0): # dummy function overriden if there are iterations if p.scripts is not None and isinstance(p.scripts, scripts.ScriptRunner): p.scripts.postprocess_batch(p, x_samples_ddim, batch_number=n) if p.scripts is not None and isinstance(p.scripts, scripts.ScriptRunner): - p.prompts = p.all_prompts[n * p.batch_size:(n + 1) * p.batch_size] - p.negative_prompts = p.all_negative_prompts[n * p.batch_size:(n + 1) * p.batch_size] + p.prompts = p.all_prompts[n * p.batch_size:(n+1) * p.batch_size] + p.negative_prompts = p.all_negative_prompts[n * p.batch_size:(n+1) * p.batch_size] batch_params = scripts.PostprocessBatchListArgs(list(x_samples_ddim)) p.scripts.postprocess_batch_list(p, batch_params, batch_number=n) x_samples_ddim = batch_params.images @@ -316,10 +324,10 @@ def infotext(_inxex=0): # dummy function overriden if there are iterations def infotext(index): # pylint: disable=function-redefined # noqa: F811 return create_infotext(p, p.prompts, p.seeds, p.subseeds, index=index, all_negative_prompts=p.negative_prompts) - if hasattr(shared.sd_model, 'restore_pipeline') and shared.sd_model.restore_pipeline is not None: - shared.sd_model.restore_pipeline() - for i, x_sample in enumerate(x_samples_ddim): + if hasattr(p, 'recursion'): + continue + debug(f'Processing result: index={i+1}/{len(x_samples_ddim)} iteration={n+1}/{p.n_iter}') p.batch_index = i if type(x_sample) == Image.Image: image = x_sample @@ -329,13 +337,9 @@ def infotext(index): # pylint: disable=function-redefined # noqa: F811 image = Image.fromarray(x_sample) if p.restore_faces: if not p.do_not_save_samples and shared.opts.save_images_before_face_restoration: - orig = p.restore_faces - p.restore_faces = False - info = infotext(i) - p.restore_faces = orig - images.save_image(Image.fromarray(x_sample), path=p.outpath_samples, basename="", seed=p.seeds[i], prompt=p.prompts[i], extension=shared.opts.samples_format, info=info, p=p, suffix="-before-face-restore") + images.save_image(Image.fromarray(x_sample), path=p.outpath_samples, basename="", seed=p.seeds[i], prompt=p.prompts[i], extension=shared.opts.samples_format, info=infotext(i), p=p, suffix="-before-face-restore") p.ops.append('face') - x_sample = face_restoration.restore_faces(x_sample) + x_sample = face_restoration.restore_faces(x_sample, p) image = Image.fromarray(x_sample) if p.scripts is not None and isinstance(p.scripts, scripts.ScriptRunner): pp = scripts.PostprocessImageArgs(image) @@ -361,7 +365,11 @@ def infotext(index): # pylint: disable=function-redefined # noqa: F811 images.save_image(image, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=text, p=p) # main save image if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay and any([shared.opts.save_mask, shared.opts.save_mask_composite, shared.opts.return_mask, shared.opts.return_mask_composite]): image_mask = p.mask_for_overlay.convert('RGB') - image_mask_composite = Image.composite(image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(3, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA') + image1 = image.convert('RGBA').convert('RGBa') + image2 = Image.new('RGBa', image.size) + mask = images.resize_image(3, p.mask_for_overlay, image.width, image.height).convert('L') + image_mask_composite = Image.composite(image1, image2, mask).convert('RGBA') + image_mask_composite.save('/tmp/composite.png') if shared.opts.save_mask: images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=text, p=p, suffix="-mask") if shared.opts.save_mask_composite: @@ -373,6 +381,9 @@ def infotext(index): # pylint: disable=function-redefined # noqa: F811 del x_samples_ddim devices.torch_gc() + if hasattr(shared.sd_model, 'restore_pipeline') and shared.sd_model.restore_pipeline is not None: + shared.sd_model.restore_pipeline() + t1 = time.time() shared.log.info(f'Processed: images={len(output_images)} time={t1 - t0:.2f} its={(p.steps * len(output_images)) / (t1 - t0):.2f} memory={memstats.memory_stats()}') diff --git a/modules/processing_class.py b/modules/processing_class.py index 6207317af..8440debf4 100644 --- a/modules/processing_class.py +++ b/modules/processing_class.py @@ -1,5 +1,6 @@ import os -import math +import sys +import inspect import hashlib from typing import Any, Dict, List from dataclasses import dataclass, field @@ -11,6 +12,9 @@ from modules.sd_hijack_hypertile import hypertile_set +debug = shared.log.trace if os.environ.get('SD_PROCESS_DEBUG', None) is not None else lambda *args, **kwargs: None + + @dataclass(repr=False) class StableDiffusionProcessing: """ @@ -173,7 +177,7 @@ def setup_scripts(self): def comment(self, text): self.comments[text] = 1 - def init(self, all_prompts, all_seeds, all_subseeds): + def init(self, all_prompts=None, all_seeds=None, all_subseeds=None): pass def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts): @@ -231,16 +235,24 @@ def __init__(self, enable_hr: bool = False, denoising_strength: float = 0.75, fi self.scripts = None self.script_args = [] - def init(self, all_prompts, all_seeds, all_subseeds): + def init(self, all_prompts=None, all_seeds=None, all_subseeds=None): if shared.backend == shared.Backend.DIFFUSERS: shared.sd_model = sd_models.set_diffuser_pipe(self.sd_model, sd_models.DiffusersTaskType.TEXT_2_IMAGE) self.width = self.width or 512 self.height = self.height or 512 - - def init_hr(self): + if all_prompts is not None: + self.all_prompts = all_prompts + if all_seeds is not None: + self.all_seeds = all_seeds + if all_subseeds is not None: + self.all_subseeds = all_subseeds + + def init_hr(self, scale = None, upscaler = None, force = False): # pylint: disable=unused-argument + scale = scale or self.hr_scale + upscaler = upscaler or self.hr_upscaler if self.hr_resize_x == 0 and self.hr_resize_y == 0: - self.hr_upscale_to_x = int(self.width * self.hr_scale) - self.hr_upscale_to_y = int(self.height * self.hr_scale) + self.hr_upscale_to_x = int(self.width * scale) + self.hr_upscale_to_y = int(self.height * scale) else: if self.hr_resize_y == 0: self.hr_upscale_to_x = self.hr_resize_x @@ -262,7 +274,7 @@ def init_hr(self): self.truncate_x = (self.hr_upscale_to_x - target_w) // 8 self.truncate_y = (self.hr_upscale_to_y - target_h) // 8 if shared.backend == shared.Backend.ORIGINAL: # diffusers are handled in processing_diffusers - if (self.hr_upscale_to_x == self.width and self.hr_upscale_to_y == self.height) or self.hr_upscaler is None or self.hr_upscaler == 'None': # special case: the user has chosen to do nothing + if (self.hr_upscale_to_x == self.width and self.hr_upscale_to_y == self.height) or upscaler is None or upscaler == 'None': # special case: the user has chosen to do nothing self.is_hr_pass = False return self.is_hr_pass = True @@ -283,6 +295,7 @@ def __init__(self, init_images: list = None, resize_mode: int = 0, resize_name: self.resize_mode: int = resize_mode self.resize_name: str = resize_name self.denoising_strength: float = denoising_strength + self.hr_denoising_strength: float = denoising_strength self.image_cfg_scale: float = image_cfg_scale self.init_latent = None self.image_mask = mask @@ -310,12 +323,19 @@ def __init__(self, init_images: list = None, resize_mode: int = 0, resize_name: self.scripts = None self.script_args = [] - def init(self, all_prompts, all_seeds, all_subseeds): - if shared.backend == shared.Backend.DIFFUSERS and self.image_mask is not None and not self.is_control: + def init(self, all_prompts=None, all_seeds=None, all_subseeds=None): + if shared.backend == shared.Backend.DIFFUSERS and getattr(self, 'image_mask', None) is not None: shared.sd_model = sd_models.set_diffuser_pipe(self.sd_model, sd_models.DiffusersTaskType.INPAINTING) - elif shared.backend == shared.Backend.DIFFUSERS and self.image_mask is None and not self.is_control: + elif shared.backend == shared.Backend.DIFFUSERS and getattr(self, 'init_images', None) is not None: shared.sd_model = sd_models.set_diffuser_pipe(self.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE) + if all_prompts is not None: + self.all_prompts = all_prompts + if all_seeds is not None: + self.all_seeds = all_seeds + if all_subseeds is not None: + self.all_subseeds = all_subseeds + if self.sampler_name == "PLMS": self.sampler_name = 'UniPC' if shared.backend == shared.Backend.ORIGINAL: @@ -385,9 +405,8 @@ def init(self, all_prompts, all_seeds, all_subseeds): image = images.flatten(img, shared.opts.img2img_background_color) if self.width is None or self.height is None: self.width, self.height = image.width, image.height - if crop_region is None and self.resize_mode != 4 and self.resize_mode > 0: - if image.width != self.width or image.height != self.height: - image = images.resize_image(self.resize_mode, image, self.width, self.height, self.resize_name) + if crop_region is None and self.resize_mode > 0: + image = images.resize_image(self.resize_mode, image, self.width, self.height, self.resize_name) self.width = image.width self.height = image.height if self.image_mask is not None and shared.opts.mask_apply_overlay: @@ -425,8 +444,6 @@ def init(self, all_prompts, all_seeds, all_subseeds): image = 2. * image - 1. image = image.to(device=shared.device, dtype=devices.dtype_vae) self.init_latent = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image)) - if self.resize_mode == 4: - self.init_latent = torch.nn.functional.interpolate(self.init_latent, size=(self.height // 8, self.width // 8), mode="bilinear") if self.image_mask is not None: init_mask = latent_mask latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2])) @@ -477,14 +494,39 @@ def __init__(self, **kwargs): def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts): # abstract pass - def init_hr(self): - if self.resize_name == 'None' or self.scale_by == 1.0: + def init_hr(self, scale = None, upscaler = None, force = False): + scale = scale or self.scale_by + upscaler = upscaler or self.resize_name + if upscaler == 'None' or scale == 1.0: return self.is_hr_pass = True - self.hr_force = True - self.hr_upscaler = self.resize_name - self.hr_upscale_to_x, self.hr_upscale_to_y = int(self.width * self.scale_by), int(self.height * self.scale_by) - self.hr_upscale_to_x, self.hr_upscale_to_y = 8 * math.ceil(self.hr_upscale_to_x / 8), 8 * math.ceil(self.hr_upscale_to_y / 8) + self.hr_force = force + self.hr_upscaler = upscaler + self.hr_upscale_to_x, self.hr_upscale_to_y = 8 * int(self.width * scale / 8), 8 * int(self.height * scale / 8) # hypertile_set(self, hr=True) shared.state.job_count = 2 * self.n_iter - shared.log.debug(f'Control hires: upscaler="{self.hr_upscaler}" upscale={self.scale_by} size={self.hr_upscale_to_x}x{self.hr_upscale_to_y}') + shared.log.debug(f'Control hires: upscaler="{self.hr_upscaler}" upscale={scale} size={self.hr_upscale_to_x}x{self.hr_upscale_to_y}') + + +def switch_class(p: StableDiffusionProcessing, new_class: type, dct: dict = None): + signature = inspect.signature(type(new_class).__init__, follow_wrapped=True) + possible = list(signature.parameters) + kwargs = {} + for k, v in p.__dict__.copy().items(): + if k in possible: + kwargs[k] = v + if dct is not None: + for k, v in dct.items(): + if k in possible: + kwargs[k] = v + debug(f"Switching class: {p.__class__.__name__} -> {new_class.__name__} fn={sys._getframe(1).f_code.co_name}") # pylint: disable=protected-access + p.__class__ = new_class + p.__init__(**kwargs) + for k, v in p.__dict__.items(): + if hasattr(p, k): + setattr(p, k, v) + if dct is not None: # post init set additional values + for k, v in dct.items(): + if hasattr(p, k): + setattr(p, k, v) + return p diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py index f4250582d..29da4a5ab 100644 --- a/modules/processing_diffusers.py +++ b/modules/processing_diffusers.py @@ -8,7 +8,7 @@ import torch import torchvision.transforms.functional as TF import diffusers -from modules import shared, devices, processing, sd_samplers, sd_models, images, errors, prompt_parser_diffusers, sd_hijack_hypertile, processing_correction, processing_vae, sd_models_compile +from modules import shared, devices, processing, sd_samplers, sd_models, images, errors, prompt_parser_diffusers, sd_hijack_hypertile, processing_correction, processing_vae, sd_models_compile, extra_networks from modules.processing_helpers import resize_init_images, resize_hires, fix_prompts, calculate_base_steps, calculate_hires_steps, calculate_refiner_steps from modules.onnx_impl import preprocess_pipeline as preprocess_onnx_pipeline, check_parameters_changed as olive_check_parameters_changed @@ -73,6 +73,8 @@ def diffusers_callback(pipe, step: int, timestep: int, kwargs: dict): if shared.state.interrupted or shared.state.skipped: raise AssertionError('Interrupted...') time.sleep(0.1) + if hasattr(p, "extra_network_data"): + extra_networks.activate(p, p.extra_network_data, step=step) if latents is None: return kwargs elif shared.opts.nan_skip: @@ -87,7 +89,7 @@ def diffusers_callback(pipe, step: int, timestep: int, kwargs: dict): ip_adapter_scales[i] *= float(step <= pipe.num_timesteps * ip_adapter_ends[i]) debug(f"Callback: IP Adapter scales={ip_adapter_scales}") pipe.set_ip_adapter_scale(ip_adapter_scales) - if step != pipe.num_timesteps: + if step != getattr(pipe, 'num_timesteps', 0): kwargs = processing_correction.correction_callback(p, timestep, kwargs) if p.scheduled_prompt and 'prompt_embeds' in kwargs and 'negative_prompt_embeds' in kwargs: try: @@ -97,7 +99,7 @@ def diffusers_callback(pipe, step: int, timestep: int, kwargs: dict): kwargs["negative_prompt_embeds"] = p.negative_embeds[j][0:1].expand(kwargs["negative_prompt_embeds"].shape) except Exception as e: shared.log.debug(f"Callback: {e}") - if step == int(pipe.num_timesteps * p.cfg_end) and 'prompt_embeds' in kwargs and 'negative_prompt_embeds' in kwargs: + if step == int(getattr(pipe, 'num_timesteps', 100) * p.cfg_end) and 'prompt_embeds' in kwargs and 'negative_prompt_embeds' in kwargs: pipe._guidance_scale = 0.0 # pylint: disable=protected-access for key in {"prompt_embeds", "negative_prompt_embeds", "add_text_embeds", "add_time_ids"} & set(kwargs): kwargs[key] = kwargs[key].chunk(2)[-1] @@ -137,7 +139,7 @@ def task_specific_kwargs(model): width, height = resize_init_images(p) task_args = { 'image': p.init_images, - 'mask_image': p.image_mask, + 'mask_image': p.task_args.get('image_mask', None) or getattr(p, 'image_mask', None) or getattr(p, 'mask', None), 'strength': p.denoising_strength, 'height': height, 'width': width, @@ -185,7 +187,11 @@ def set_pipeline_args(model, prompts: list, negative_prompts: list, prompts_2: t generator = p.generator else: generator_device = devices.cpu if shared.opts.diffusers_generator_device == "CPU" else shared.device - generator = [torch.Generator(generator_device).manual_seed(s) for s in p.seeds] + try: + generator = [torch.Generator(generator_device).manual_seed(s) for s in p.seeds] + except Exception as e: + shared.log.error(f'Torch generator: seeds={p.seeds} device={generator_device} {e}') + generator = None prompts, negative_prompts, prompts_2, negative_prompts_2 = fix_prompts(prompts, negative_prompts, prompts_2, negative_prompts_2) parser = 'Fixed attention' clip_skip = kwargs.pop("clip_skip", 1) @@ -243,13 +249,6 @@ def set_pipeline_args(model, prompts: list, negative_prompts: list, prompts_2: t args["prior_guidance_scale"] = p.cfg_scale if 'decoder_guidance_scale' in possible: args["decoder_guidance_scale"] = p.image_cfg_scale - # TODO Stable Cascade callbacks are currently broken in combined pipeline so preview will not get triggered - if 'prior_callback_on_step_end' in possible: - possible.remove('callback_on_step_end') - if 'callback_on_step_end' in possible: - possible.remove('callback_on_step_end') - if 'callback' in possible: - possible.remove('callback') # set callbacks if 'callback_steps' in possible: @@ -270,6 +269,7 @@ def set_pipeline_args(model, prompts: list, negative_prompts: list, prompts_2: t args[arg] = kwargs[arg] else: pass + task_kwargs = task_specific_kwargs(model) for arg in task_kwargs: # if arg in possible and arg not in args: # task specific args should not override args @@ -298,7 +298,7 @@ def set_pipeline_args(model, prompts: list, negative_prompts: list, prompts_2: t clean['generator'] = generator_device clean['parser'] = parser for k, v in clean.items(): - if isinstance(v, torch.Tensor): + if isinstance(v, torch.Tensor) or isinstance(v, np.ndarray) or (isinstance(v, list) and len(v) > 0 and (isinstance(v[0], torch.Tensor) or isinstance(v[0], np.ndarray))): clean[k] = v.shape shared.log.debug(f'Diffuser pipeline: {model.__class__.__name__} task={sd_models.get_diffusers_task(model)} set={clean}') if p.hdr_clamp or p.hdr_maximize or p.hdr_brightness != 0 or p.hdr_color != 0 or p.hdr_sharpen != 0: @@ -317,8 +317,6 @@ def set_pipeline_args(model, prompts: list, negative_prompts: list, prompts_2: t def update_sampler(sd_model, second_pass=False): sampler_selection = p.hr_sampler_name if second_pass else p.sampler_name - if sd_model.__class__.__name__ in ['AmusedPipeline']: - return # models with their own schedulers if hasattr(sd_model, 'scheduler') and sampler_selection != 'Default': sampler = sd_samplers.all_samplers_map.get(sampler_selection, None) if sampler is None: @@ -349,12 +347,17 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): if p.sag_scale > 0 and is_txt2img(): update_sampler(shared.sd_model) supported = ['DDIMScheduler', 'PNDMScheduler', 'DDPMScheduler', 'DEISMultistepScheduler', 'UniPCMultistepScheduler', 'DPMSolverMultistepScheduler', 'DPMSolverSinlgestepScheduler'] - if sd_model.scheduler.__class__.__name__ in supported: + if hasattr(sd_model, 'sfast'): + shared.log.warning(f'SAG incompatible compile mode: backend={shared.opts.cuda_compile_backend}') + elif sd_model.scheduler.__class__.__name__ in supported: sd_model = sd_models.switch_pipe(diffusers.StableDiffusionSAGPipeline, sd_model) p.extra_generation_params["SAG scale"] = p.sag_scale p.task_args['sag_scale'] = p.sag_scale else: shared.log.warning(f'SAG incompatible scheduler: current={sd_model.scheduler.__class__.__name__} supported={supported}') + if sd_models.get_diffusers_task(sd_model) == sd_models.DiffusersTaskType.INPAINTING and getattr(p, 'image_mask', None) is None and p.task_args.get('image_mask', None) is None and getattr(p, 'mask', None) is None: + shared.log.warning('Processing: mode=inpaint mask=None') + sd_model = sd_models.set_diffuser_pipe(sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE) if shared.opts.cuda_compile_backend == "olive-ai": sd_model = olive_check_parameters_changed(p, is_refiner_enabled()) if sd_model.__class__.__name__ == "OnnxRawPipeline": @@ -363,6 +366,11 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): orig_pipeline = sd_model # processed ONNX pipeline should not be replaced with original pipeline. return sd_model + # sanitize init_images + if hasattr(p, 'init_images') and getattr(p, 'init_images', None) is None: + del p.init_images + if hasattr(p, 'init_images') and not isinstance(getattr(p, 'init_images', []), list): + p.init_images = [p.init_images] if len(getattr(p, 'init_images', [])) > 0: while len(p.init_images) < len(p.prompts): p.init_images.append(p.init_images[-1]) @@ -410,9 +418,11 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): p.extra_generation_params['Pipeline'] = shared.sd_model.__class__.__name__ if shared.opts.scheduler_eta is not None and shared.opts.scheduler_eta > 0 and shared.opts.scheduler_eta < 1: p.extra_generation_params["Sampler Eta"] = shared.opts.scheduler_eta + output = None try: t0 = time.time() sd_models_compile.check_deepcache(enable=True) + sd_models.move_model(shared.sd_model, devices.device) output = shared.sd_model(**base_args) # pylint: disable=not-callable if isinstance(output, dict): output = SimpleNamespace(**output) @@ -441,7 +451,7 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): shared.log.error(f'Processing: args={base_args} {e}') errors.display(e, 'Processing') - if hasattr(shared.sd_model, 'embedding_db') and len(shared.sd_model.embedding_db.embeddings_used) > 0: + if hasattr(shared.sd_model, 'embedding_db') and len(shared.sd_model.embedding_db.embeddings_used) > 0: # register used embeddings p.extra_generation_params['Embeddings'] = ', '.join(shared.sd_model.embedding_db.embeddings_used) shared.state.nextjob() @@ -450,12 +460,15 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): return results # optional second pass - if p.enable_hr and len(getattr(p, 'init_images', [])) == 0: + if p.enable_hr: p.is_hr_pass = True - if p.is_hr_pass: - p.init_hr() + p.init_hr(p.hr_scale, p.hr_upscaler, force=p.hr_force) prev_job = shared.state.job + # hires runs on original pipeline + if hasattr(shared.sd_model, 'restore_pipeline') and shared.sd_model.restore_pipeline is not None: + shared.sd_model.restore_pipeline() + # upscale if hasattr(p, 'height') and hasattr(p, 'width') and p.hr_upscaler is not None and p.hr_upscaler != 'None': shared.log.info(f'Upscale: upscaler="{p.hr_upscaler}" resize={p.hr_resize_x}x{p.hr_resize_y} upscale={p.hr_upscale_to_x}x{p.hr_upscale_to_y}') @@ -467,7 +480,7 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): sd_hijack_hypertile.hypertile_set(p, hr=True) latent_upscale = shared.latent_upscale_modes.get(p.hr_upscaler, None) - if (latent_upscale is not None or p.hr_force) and p.denoising_strength > 0: + if (latent_upscale is not None or p.hr_force) and getattr(p, 'hr_denoising_strength', p.denoising_strength) > 0: p.ops.append('hires') sd_models_compile.openvino_recompile_model(p, hires=True, refiner=False) if shared.sd_model.__class__.__name__ == "OnnxRawPipeline": @@ -480,6 +493,13 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE) update_sampler(shared.sd_model, second_pass=True) shared.log.info(f'HiRes: class={shared.sd_model.__class__.__name__} sampler="{p.hr_sampler_name}"') + if p.is_control and hasattr(p, 'task_args') and p.task_args.get('image', None) is not None: + if hasattr(shared.sd_model, "vae") and output.images is not None and len(output.images) > 0: + output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil') # controlnet cannnot deal with latent input + p.task_args['image'] = output.images # replace so hires uses new output + sd_models.move_model(shared.sd_model, devices.device) + orig_denoise = p.denoising_strength + p.denoising_strength = getattr(p, 'hr_denoising_strength', p.denoising_strength) hires_args = set_pipeline_args( model=shared.sd_model, prompts=[p.refiner_prompt] if len(p.refiner_prompt) > 0 else p.prompts, @@ -507,7 +527,7 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): sd_models_compile.openvino_post_compile(op="base") except AssertionError as e: shared.log.info(e) - p.init_images = [] + p.denoising_strength = orig_denoise shared.state.job = prev_job shared.state.nextjob() p.is_hr_pass = False @@ -541,6 +561,8 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): image = processing_vae.vae_decode(latents=image, model=shared.sd_model, full_quality=p.full_quality, output_type='pil') p.extra_generation_params['Noise level'] = noise_level output_type = 'np' + if hasattr(p, 'task_args') and p.task_args.get('image', None) is not None and output is not None: # replace input with output so it can be used by hires/refine + p.task_args['image'] = image shared.log.info(f'Refiner: class={shared.sd_refiner.__class__.__name__}') refiner_args = set_pipeline_args( model=shared.sd_refiner, @@ -561,8 +583,8 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): ) shared.state.sampling_steps = refiner_args['num_inference_steps'] try: - if 'requires_aesthetics_score' in shared.sd_refiner.config: - shared.sd_refiner.register_to_config(requires_aesthetics_score=shared.opts.diffusers_aesthetics_score) + if 'requires_aesthetics_score' in shared.sd_refiner.config: # sdxl-model needs false and sdxl-refiner needs true + shared.sd_refiner.register_to_config(requires_aesthetics_score = getattr(shared.sd_refiner, 'tokenizer', None) is None) refiner_output = shared.sd_refiner(**refiner_args) # pylint: disable=not-callable if isinstance(refiner_output, dict): refiner_output = SimpleNamespace(**refiner_output) diff --git a/modules/processing_vae.py b/modules/processing_vae.py index 793f2aeb8..1120b4ba1 100644 --- a/modules/processing_vae.py +++ b/modules/processing_vae.py @@ -120,10 +120,10 @@ def vae_decode(latents, model, output_type='np', full_quality=True): if not hasattr(model, 'vae'): shared.log.error('VAE not found in model') return [] - if latents.shape[0] == 4 and latents.shape[1] != 4: # likely animatediff latent - latents = latents.permute(1, 0, 2, 3) if len(latents.shape) == 3: # lost a batch dim in hires latents = latents.unsqueeze(0) + if latents.shape[0] == 4 and latents.shape[1] != 4: # likely animatediff latent + latents = latents.permute(1, 0, 2, 3) if full_quality: decoded = full_vae_decode(latents=latents, model=shared.sd_model) else: diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py index 680ca05cd..3fd60d394 100644 --- a/modules/prompt_parser_diffusers.py +++ b/modules/prompt_parser_diffusers.py @@ -151,13 +151,11 @@ def prepare_embedding_providers(pipe, clip_skip): embedding_type = -(clip_skip + 1) else: embedding_type = clip_skip - if hasattr(pipe, "tokenizer") and hasattr(pipe, "text_encoder"): - provider = EmbeddingsProvider(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder, truncate=False, - returned_embeddings_type=embedding_type, device=device) + if getattr(pipe, "tokenizer", None) is not None and getattr(pipe, "text_encoder", None) is not None: + provider = EmbeddingsProvider(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder, truncate=False, returned_embeddings_type=embedding_type, device=device) embeddings_providers.append(provider) - if hasattr(pipe, "tokenizer_2") and hasattr(pipe, "text_encoder_2"): - provider = EmbeddingsProvider(tokenizer=pipe.tokenizer_2, text_encoder=pipe.text_encoder_2, truncate=False, - returned_embeddings_type=embedding_type, device=device) + if getattr(pipe, "tokenizer_2", None) is not None and getattr(pipe, "text_encoder_2", None) is not None: + provider = EmbeddingsProvider(tokenizer=pipe.tokenizer_2, text_encoder=pipe.text_encoder_2, truncate=False, returned_embeddings_type=embedding_type, device=device) embeddings_providers.append(provider) return embeddings_providers @@ -244,11 +242,12 @@ def get_weighted_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", c .argmax(dim=-1), ] else: - pooled_prompt_embeds = embedding_providers[-1].get_pooled_embeddings(texts=[prompt_2], device=device) if \ - prompt_embeds[-1].shape[-1] > 768 else None - negative_pooled_prompt_embeds = embedding_providers[-1].get_pooled_embeddings(texts=[neg_prompt_2], - device=device) if \ - negative_prompt_embeds[-1].shape[-1] > 768 else None + try: + pooled_prompt_embeds = embedding_providers[-1].get_pooled_embeddings(texts=[prompt_2], device=device) if prompt_embeds[-1].shape[-1] > 768 else None + negative_pooled_prompt_embeds = embedding_providers[-1].get_pooled_embeddings(texts=[neg_prompt_2], device=device) if negative_prompt_embeds[-1].shape[-1] > 768 else None + except Exception: + pooled_prompt_embeds = None + negative_pooled_prompt_embeds = None prompt_embeds = torch.cat(prompt_embeds, dim=-1) if len(prompt_embeds) > 1 else prompt_embeds[0] negative_prompt_embeds = torch.cat(negative_prompt_embeds, dim=-1) if len(negative_prompt_embeds) > 1 else \ diff --git a/modules/sd_models.py b/modules/sd_models.py index 0ca0774cd..9f578ee93 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -23,7 +23,6 @@ from modules import paths, shared, shared_items, shared_state, modelloader, devices, script_callbacks, sd_vae, errors, hashes, sd_models_config, sd_models_compile, sd_hijack_accelerate from modules.timer import Timer from modules.memstats import memory_stats -from modules.paths import models_path, script_path from modules.modeldata import model_data @@ -48,7 +47,7 @@ def __init__(self, filename): self.filename = filename self.type = '' relname = filename - app_path = os.path.abspath(script_path) + app_path = os.path.abspath(paths.script_path) def rel(fn, path): try: @@ -64,8 +63,8 @@ def rel(fn, path): relname = rel(filename, shared.opts.diffusers_dir) elif relname.startswith(model_path): relname = rel(filename, model_path) - elif relname.startswith(script_path): - relname = rel(filename, script_path) + elif relname.startswith(paths.script_path): + relname = rel(filename, paths.script_path) elif relname.startswith(app_path): relname = rel(filename, app_path) else: @@ -147,7 +146,7 @@ def list_models(): ext_filter = [".ckpt", ".safetensors"] model_list = list(modelloader.load_models(model_path=model_path, model_url=None, command_path=shared.opts.ckpt_dir, ext_filter=ext_filter, download_name=None, ext_blacklist=[".vae.ckpt", ".vae.safetensors"])) if shared.backend == shared.Backend.DIFFUSERS: - model_list += modelloader.load_diffusers_models(model_path=os.path.join(models_path, 'Diffusers'), command_path=shared.opts.diffusers_dir, clear=True) + model_list += modelloader.load_diffusers_models(clear=True) for filename in sorted(model_list, key=str.lower): checkpoint_info = CheckpointInfo(filename) if checkpoint_info.name is not None: @@ -676,14 +675,17 @@ def set_diffuser_options(sd_model, vae = None, op: str = 'model'): sd_model.vae.to(devices.dtype_vae) shared.log.debug(f'Setting {op} VAE: upcast={sd_model.vae.config.get("force_upcast", None)}') if hasattr(sd_model, "enable_model_cpu_offload"): - if (shared.cmd_opts.medvram and devices.backend != "directml") or shared.opts.diffusers_model_cpu_offload: + if shared.cmd_opts.medvram or shared.opts.diffusers_model_cpu_offload: shared.log.debug(f'Setting {op}: enable model CPU offload') if shared.opts.diffusers_move_base or shared.opts.diffusers_move_unet or shared.opts.diffusers_move_refiner: shared.opts.diffusers_move_base = False shared.opts.diffusers_move_unet = False shared.opts.diffusers_move_refiner = False shared.log.warning(f'Disabling {op} "Move model to CPU" since "Model CPU offload" is enabled') - sd_model.enable_model_cpu_offload() + if not hasattr(sd_model, "_all_hooks") or len(sd_model._all_hooks) == 0: # pylint: disable=protected-access + sd_model.enable_model_cpu_offload() + else: + sd_model.maybe_free_model_hooks() sd_model.has_accelerate = True if hasattr(sd_model, "enable_sequential_cpu_offload"): if shared.cmd_opts.lowvram or shared.opts.diffusers_seq_cpu_offload: @@ -693,7 +695,7 @@ def set_diffuser_options(sd_model, vae = None, op: str = 'model'): shared.opts.diffusers_move_unet = False shared.opts.diffusers_move_refiner = False shared.log.warning(f'Disabling {op} "Move model to CPU" since "Sequential CPU offload" is enabled') - sd_model.enable_sequential_cpu_offload(device=devices.device) + sd_model.enable_sequential_cpu_offload() sd_model.has_accelerate = True if hasattr(sd_model, "enable_vae_slicing"): if shared.cmd_opts.lowvram or shared.opts.diffusers_vae_slicing: @@ -725,8 +727,11 @@ def set_diffuser_options(sd_model, vae = None, op: str = 'model'): set_diffusers_attention(sd_model, DynamicAttnProcessorSDP()) if shared.opts.diffusers_fuse_projections and hasattr(sd_model, 'fuse_qkv_projections'): - shared.log.debug(f'Setting {op}: enable fused projections') - sd_model.fuse_qkv_projections() + try: + sd_model.fuse_qkv_projections() + shared.log.debug(f'Setting {op}: enable fused projections') + except Exception as e: + shared.log.error(f'Error enabling fused projections: {e}') if shared.opts.diffusers_eval: if hasattr(sd_model, "unet") and hasattr(sd_model.unet, "requires_grad_"): sd_model.unet.requires_grad_(False) @@ -746,21 +751,35 @@ def set_diffuser_options(sd_model, vae = None, op: str = 'model'): def move_model(model, device=None, force=False): - if model is not None: - if getattr(model, 'vae', None) is not None and get_diffusers_task(model) != DiffusersTaskType.TEXT_2_IMAGE: - if device == devices.device: # force vae back to gpu if not in txt2img mode - model.vae.to(device) - if hasattr(model.vae, '_hf_hook'): - debug_move(f'Model move: to={device} class={model.vae.__class__} function={sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access - model.vae._hf_hook.execution_device = device # pylint: disable=protected-access - if getattr(model, 'has_accelerate', False) and not force: - return - debug_move(f'Model move: to={device} class={model.__class__} function={sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access - try: - model.to(device) - except Exception as e: - shared.log.error(f'Model move: {e}') - devices.torch_gc() + if model is None or device is None: + return + if getattr(model, 'vae', None) is not None and get_diffusers_task(model) != DiffusersTaskType.TEXT_2_IMAGE: + if device == devices.device: # force vae back to gpu if not in txt2img mode + model.vae.to(device) + if hasattr(model.vae, '_hf_hook'): + debug_move(f'Model move: to={device} class={model.vae.__class__} fn={sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access + model.vae._hf_hook.execution_device = device # pylint: disable=protected-access + debug_move(f'Model move: device={device} class={model.__class__} accelerate={getattr(model, "has_accelerate", False)} fn={sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access + if hasattr(model, "components"): # accelerate patch + for name, m in model.components.items(): + if not hasattr(m, "_hf_hook"): # not accelerate hook + break + if not isinstance(m, torch.nn.Module) or name in model._exclude_from_cpu_offload: # pylint: disable=protected-access + continue + for module in m.modules(): + if (hasattr(module, "_hf_hook") and hasattr(module._hf_hook, "execution_device") and module._hf_hook.execution_device is not None): # pylint: disable=protected-access + try: + module._hf_hook.execution_device = device # pylint: disable=protected-access + except Exception as e: + if os.environ.get('SD_MOVE_DEBUG', None): + shared.log.error(f'Model move execution device: device={device} {e}') + if getattr(model, 'has_accelerate', False) and not force: + return + try: + model.to(device) + except Exception as e: + shared.log.error(f'Model move: device={device} {e}') + devices.torch_gc() def get_load_config(model_file, model_type): @@ -868,7 +887,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No model_name = modelloader.find_diffuser(ckpt_basename) if model_name is not None: shared.log.info(f'Load model {op}: {model_name}') - model_file = modelloader.download_diffusers_model(hub_id=model_name) + model_file = modelloader.download_diffusers_model(hub_id=model_name, variant=diffusers_load_config.get('variant', None)) try: shared.log.debug(f'Model load {op} config: {diffusers_load_config}') sd_model = diffusers.DiffusionPipeline.from_pretrained(model_file, **diffusers_load_config) @@ -898,15 +917,21 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No if 'variant' not in diffusers_load_config and any('diffusion_pytorch_model.fp16' in f for f in files): # deal with diffusers lack of variant fallback when loading diffusers_load_config['variant'] = 'fp16' if model_type in ['Stable Cascade']: # forced pipeline - # TODO experimental stable cascade - try: - shared.log.debug(f'StableCascade experimental: args={diffusers_load_config} device={devices.device} dtype={devices.dtype}') + try: # this is horrible special-case handling for stable-cascade multi-stage pipeline with variants and non-standard revision diffusers_load_config.pop("vae", None) - diffusers_load_config.pop("variant", None) - decoder = diffusers.StableCascadeDecoderPipeline.from_pretrained("stabilityai/stable-cascade", cache_dir=shared.opts.diffusers_dir, revision="refs/pr/17", **diffusers_load_config) - shared.log.debug(f'StableCascade decoder: scale={decoder.latent_dim_scale}') - prior = diffusers.StableCascadePriorPipeline.from_pretrained("stabilityai/stable-cascade-prior", cache_dir=shared.opts.diffusers_dir, **diffusers_load_config) - shared.log.debug(f'StableCascade prior: scale={prior.resolution_multiple}') + diffusers_load_config["variant"] = 'bf16' + if 'lite' in checkpoint_info.name: + decoder_unet = diffusers.models.StableCascadeUNet.from_pretrained("stabilityai/stable-cascade", subfolder="decoder_lite", cache_dir=shared.opts.diffusers_dir, revision="refs/pr/44", **diffusers_load_config) + decoder = diffusers.StableCascadeDecoderPipeline.from_pretrained("stabilityai/stable-cascade", cache_dir=shared.opts.diffusers_dir, revision="refs/pr/44", decoder=decoder_unet, **diffusers_load_config) + shared.log.debug(f'StableCascade lite decoder: scale={decoder.latent_dim_scale}') + prior_unet = diffusers.models.StableCascadeUNet.from_pretrained("stabilityai/stable-cascade-prior", subfolder="prior_lite", cache_dir=shared.opts.diffusers_dir, revision="refs/pr/2", **diffusers_load_config) + prior = diffusers.StableCascadePriorPipeline.from_pretrained("stabilityai/stable-cascade-prior", cache_dir=shared.opts.diffusers_dir, revision="refs/pr/2", prior=prior_unet, **diffusers_load_config) + shared.log.debug(f'StableCascade lite prior: scale={prior.resolution_multiple}') + else: + decoder = diffusers.StableCascadeDecoderPipeline.from_pretrained("stabilityai/stable-cascade", cache_dir=shared.opts.diffusers_dir, revision="refs/pr/44", **diffusers_load_config) + shared.log.debug(f'StableCascade decoder: scale={decoder.latent_dim_scale}') + prior = diffusers.StableCascadePriorPipeline.from_pretrained("stabilityai/stable-cascade-prior", cache_dir=shared.opts.diffusers_dir, revision="refs/pr/2", **diffusers_load_config) + shared.log.debug(f'StableCascade prior: scale={prior.resolution_multiple}') sd_model = diffusers.StableCascadeCombinedPipeline( tokenizer=decoder.tokenizer, text_encoder=decoder.text_encoder, @@ -917,8 +942,8 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No prior_text_encoder=prior.text_encoder, prior_tokenizer=prior.tokenizer, prior_scheduler=prior.scheduler, - prior_prior_feature_extractor=prior.feature_extractor, - prior_prior_image_encoder=prior.image_encoder) + prior_feature_extractor=prior.feature_extractor, + prior_image_encoder=prior.image_encoder) shared.log.debug(f'StableCascade combined: {sd_model.__class__.__name__}') except Exception as e: shared.log.error(f'Diffusers Failed loading {op}: {checkpoint_info.path} {e}') @@ -958,8 +983,17 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No if debug_load: shared.log.debug(f'Diffusers load args: {diffusers_load_config}') try: # 1 - autopipeline, best choice but not all pipelines are available - sd_model = diffusers.AutoPipelineForText2Image.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config) - sd_model.model_type = sd_model.__class__.__name__ + try: + sd_model = diffusers.AutoPipelineForText2Image.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config) + sd_model.model_type = sd_model.__class__.__name__ + except ValueError as e: + if 'no variant default' in str(e): + shared.log.warning(f'Load: variant={diffusers_load_config["variant"]} model={checkpoint_info.path} using default variant') + diffusers_load_config.pop('variant', None) + sd_model = diffusers.AutoPipelineForText2Image.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config) + sd_model.model_type = sd_model.__class__.__name__ + else: + raise ValueError from e # reraise except Exception as e: err1 = e if debug_load: @@ -996,9 +1030,6 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No if model_type.startswith('Stable Diffusion'): if shared.opts.diffusers_force_zeros: diffusers_load_config['force_zeros_for_empty_prompt '] = shared.opts.diffusers_force_zeros - if shared.opts.diffusers_aesthetics_score: - diffusers_load_config['requires_aesthetics_score'] = shared.opts.diffusers_aesthetics_score - # diffusers_load_config['config_files'] = get_load_config(checkpoint_info.path.lower()) diffusers_load_config['original_config_file'] = get_load_config(checkpoint_info.path, model_type) if hasattr(pipeline, 'from_single_file'): diffusers_load_config['use_safetensors'] = True @@ -1039,34 +1070,6 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No elif "Kandinsky" in sd_model.__class__.__name__: sd_model.scheduler.name = 'DDIM' - set_diffuser_options(sd_model, vae, op) - - base_sent_to_cpu=False - if (shared.opts.cuda_compile and shared.opts.cuda_compile_backend != 'none') or shared.opts.ipex_optimize or shared.opts.nncf_compress_weights: - if op == 'refiner' and not getattr(sd_model, 'has_accelerate', False): - gpu_vram = memory_stats().get('gpu', {}) - free_vram = gpu_vram.get('total', 0) - gpu_vram.get('used', 0) - refiner_enough_vram = free_vram >= 7 if "StableDiffusionXL" in sd_model.__class__.__name__ else 3 - if not shared.opts.diffusers_move_base and refiner_enough_vram: - move_model(sd_model, devices.device) - base_sent_to_cpu=False - else: - if not refiner_enough_vram and not (shared.opts.diffusers_move_base and shared.opts.diffusers_move_refiner): - shared.log.warning(f"Insufficient GPU memory, using system memory as fallback: free={free_vram} GB") - if not shared.opts.shared.opts.diffusers_seq_cpu_offload and not shared.opts.diffusers_model_cpu_offload: - shared.log.debug('Enabled moving base model to CPU') - shared.log.debug('Enabled moving refiner model to CPU') - shared.opts.diffusers_move_base=True - shared.opts.diffusers_move_refiner=True - shared.log.debug('Moving base model to CPU') - move_model(model_data.sd_model, devices.cpu) - devices.torch_gc(force=True) - move_model(sd_model, devices.device) - base_sent_to_cpu=True - else: - move_model(sd_model, devices.device) - sd_models_compile.compile_diffusers(sd_model) - if sd_model is None: shared.log.error('Diffuser model not loaded') return @@ -1080,14 +1083,24 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No shared.opts.data["sd_checkpoint_hash"] = checkpoint_info.sha256 if hasattr(sd_model, "set_progress_bar_config"): sd_model.set_progress_bar_config(bar_format='Progress {rate_fmt}{postfix} {bar} {percentage:3.0f}% {n_fmt}/{total_fmt} {elapsed} {remaining}', ncols=80, colour='#327fba') + + set_diffuser_options(sd_model, vae, op) + if op == 'refiner' and shared.opts.diffusers_move_refiner: shared.log.debug('Moving refiner model to CPU') move_model(sd_model, devices.cpu) else: move_model(sd_model, devices.device) - if op == 'refiner' and base_sent_to_cpu: - shared.log.debug('Moving base model back to GPU') - move_model(model_data.sd_model, devices.device) + + if shared.opts.ipex_optimize: + sd_model = sd_models_compile.ipex_optimize(sd_model) + + if shared.opts.nncf_compress_weights and not (shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx"): + sd_model = sd_models_compile.nncf_compress_weights(sd_model) + + if (shared.opts.cuda_compile and shared.opts.cuda_compile_backend != 'none'): + sd_model = sd_models_compile.compile_diffusers(sd_model) + except Exception as e: shared.log.error("Failed to load diffusers model") errors.display(e, "loading Diffusers model") @@ -1118,7 +1131,9 @@ class DiffusersTaskType(Enum): def get_diffusers_task(pipe: diffusers.DiffusionPipeline) -> DiffusersTaskType: - if pipe.__class__.__name__ == "StableDiffusionXLInstructPix2PixPipeline": + if pipe.__class__.__name__ in ["StableVideoDiffusionPipeline", "LEditsPPPipelineStableDiffusion", "LEditsPPPipelineStableDiffusionXL"]: + return DiffusersTaskType.IMAGE_2_IMAGE + elif pipe.__class__.__name__ == "StableDiffusionXLInstructPix2PixPipeline": return DiffusersTaskType.INSTRUCT elif pipe.__class__ in diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING.values(): return DiffusersTaskType.IMAGE_2_IMAGE @@ -1210,6 +1225,8 @@ def switch_pipe(cls: diffusers.DiffusionPipeline, pipeline: diffusers.DiffusionP components_skipped.append(k) if new_pipe is not None: copy_diffuser_options(new_pipe, pipeline) + if hasattr(new_pipe, "watermark"): + new_pipe.watermark = NoWatermark() if switch_mode == 'auto': shared.log.debug(f'Pipeline switch: from={pipeline.__class__.__name__} to={new_pipe.__class__.__name__} components={components_used} skipped={components_skipped} missing={components_missing}') else: @@ -1226,6 +1243,9 @@ def switch_pipe(cls: diffusers.DiffusionPipeline, pipeline: diffusers.DiffusionP def set_diffuser_pipe(pipe, new_pipe_type): if get_diffusers_task(pipe) == new_pipe_type: return pipe + # skip specific pipelines + if pipe.__class__.__name__ == 'StableDiffusionReferencePipeline' or pipe.__class__.__name__ == 'StableDiffusionAdapterPipeline' or 'Onnx' in pipe.__class__.__name__: + return pipe sd_checkpoint_info = getattr(pipe, "sd_checkpoint_info", None) sd_model_checkpoint = getattr(pipe, "sd_model_checkpoint", None) @@ -1235,10 +1255,6 @@ def set_diffuser_pipe(pipe, new_pipe_type): image_encoder = getattr(pipe, "image_encoder", None) feature_extractor = getattr(pipe, "feature_extractor", None) - # skip specific pipelines - if pipe.__class__.__name__ == 'StableDiffusionReferencePipeline' or pipe.__class__.__name__ == 'StableDiffusionAdapterPipeline' or 'Onnx' in pipe.__class__.__name__: - return pipe - try: if new_pipe_type == DiffusersTaskType.TEXT_2_IMAGE: new_pipe = diffusers.AutoPipelineForText2Image.from_pipe(pipe) @@ -1250,8 +1266,8 @@ def set_diffuser_pipe(pipe, new_pipe_type): shared.log.warning(f'Pipeline class change failed: type={new_pipe_type} pipeline={pipe.__class__.__name__} {e}') return pipe - if pipe.__class__ == new_pipe.__class__: - return pipe + # if pipe.__class__ == new_pipe.__class__: + # return pipe new_pipe.sd_checkpoint_info = sd_checkpoint_info new_pipe.sd_model_checkpoint = sd_model_checkpoint new_pipe.sd_model_hash = sd_model_hash @@ -1262,7 +1278,9 @@ def set_diffuser_pipe(pipe, new_pipe_type): new_pipe.is_sdxl = getattr(pipe, 'is_sdxl', False) # a1111 compatibility item new_pipe.is_sd2 = getattr(pipe, 'is_sd2', False) new_pipe.is_sd1 = getattr(pipe, 'is_sd1', True) - shared.log.debug(f"Pipeline class change: original={pipe.__class__.__name__} target={new_pipe.__class__.__name__}") + if hasattr(new_pipe, "watermark"): + new_pipe.watermark = NoWatermark() + shared.log.debug(f"Pipeline class change: original={pipe.__class__.__name__} target={new_pipe.__class__.__name__} device={pipe.device} fn={sys._getframe().f_back.f_code.co_name}") # pylint: disable=protected-access pipe = new_pipe return pipe diff --git a/modules/sd_models_compile.py b/modules/sd_models_compile.py index 76bbdd970..87ff44978 100644 --- a/modules/sd_models_compile.py +++ b/modules/sd_models_compile.py @@ -1,3 +1,4 @@ +import copy import time import logging import torch @@ -34,7 +35,20 @@ def ipex_optimize_model(model): import intel_extension_for_pytorch as ipex # pylint: disable=import-error, unused-import model.eval() model.training = False - model = ipex.optimize(model, dtype=devices.dtype, inplace=True, weights_prepack=False) # pylint: disable=attribute-defined-outside-init + if model.device.type != "meta": + return_device = model.device + model = ipex.optimize(model.to(devices.device), + dtype=devices.dtype, + inplace=True, + weights_prepack=False + ).to(return_device) # pylint: disable=attribute-defined-outside-init + else: + model = ipex.optimize(model, + dtype=devices.dtype, + inplace=True, + weights_prepack=False + ) # pylint: disable=attribute-defined-outside-init + devices.torch_gc() return model if "Model" in shared.opts.ipex_optimize: @@ -78,7 +92,12 @@ def nncf_compress_weights(sd_model): def nncf_compress_model(model): return_device = model.device + model.eval() + if hasattr(model, "get_input_embeddings"): + backup_embeddings = copy.deepcopy(model.get_input_embeddings()) model = nncf.compress_weights(model.to(devices.device)).to(return_device) + if hasattr(model, "set_input_embeddings"): + model.set_input_embeddings(backup_embeddings) devices.torch_gc(force=True) return model @@ -164,11 +183,12 @@ def compile_stablefast(sd_model): try: t0 = time.time() sd_model = sf.compile(sd_model, config) + sd_model.sfast = True setup_logging() # compile messes with logging so reset is needed if shared.opts.cuda_compile_precompile: sd_model("dummy prompt") t1 = time.time() - shared.log.info(f"Model compile: task=Stable-fast config={config.__dict__} time={t1-t0:.2f}") + shared.log.info(f"Model compile: task='Stable-fast' config={config.__dict__} time={t1-t0:.2f}") except Exception as e: shared.log.info(f"Model compile: task=Stable-fast error: {e}") return sd_model @@ -182,7 +202,21 @@ def compile_torch(sd_model): shared.log.debug(f"Model compile available backends: {torch._dynamo.list_backends()}") # pylint: disable=protected-access def torch_compile_model(model): - return torch.compile(model, mode=shared.opts.cuda_compile_mode, backend=shared.opts.cuda_compile_backend, fullgraph=shared.opts.cuda_compile_fullgraph) + if model.device.type != "meta": + return_device = model.device + model = torch.compile(model.to(devices.device), + mode=shared.opts.cuda_compile_mode, + backend=shared.opts.cuda_compile_backend, + fullgraph=shared.opts.cuda_compile_fullgraph + ).to(return_device) + else: + model = torch.compile(model, + mode=shared.opts.cuda_compile_mode, + backend=shared.opts.cuda_compile_backend, + fullgraph=shared.opts.cuda_compile_fullgraph + ) + devices.torch_gc() + return model if shared.opts.cuda_compile_backend == "openvino_fx": sd_model = optimize_openvino(sd_model) @@ -261,16 +295,12 @@ def compile_deepcache(sd_model): deepcache_worker = DeepCacheSDHelper(pipe=sd_model) deepcache_worker.set_params(cache_interval=shared.opts.deep_cache_interval, cache_branch_id=0) t1 = time.time() - shared.log.info(f"Model compile: task=DeepCache config={deepcache_worker.params} time={t1-t0:.2f}") + shared.log.info(f"Model compile: task='DeepCache' config={deepcache_worker.params} time={t1-t0:.2f}") # config={'cache_interval': 3, 'cache_layer_id': 0, 'cache_block_id': 0, 'skip_mode': 'uniform'} time=0.00 return sd_model def compile_diffusers(sd_model): - if shared.opts.ipex_optimize: - sd_model = ipex_optimize(sd_model) - if shared.opts.nncf_compress_weights and not (shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx"): - sd_model = nncf_compress_weights(sd_model) if not shared.opts.cuda_compile: return sd_model if shared.opts.cuda_compile_backend == 'none': diff --git a/modules/shared.py b/modules/shared.py index 6a1e4039c..8572982cc 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -68,12 +68,13 @@ "outdir_txt2img_samples", "outdir_img2img_samples", "outdir_extras_samples", + "outdir_control_samples", "outdir_grids", "outdir_txt2img_grids", "outdir_save", "outdir_init_images" } -resize_modes = ["None", "Fixed", "Crop", "Fill", "Latent"] +resize_modes = ["None", "Fixed", "Crop", "Fill", "Outpaint"] compatibility_opts = ['clip_skip', 'uni_pc_lower_order_final', 'uni_pc_order'] console = Console(log_time=True, log_time_format='%H:%M:%S-%f') dir_timestamps = {} @@ -325,6 +326,12 @@ def temp_disable_extensions(): else: # cuda, rocm, ipex cross_attention_optimization_default ="Scaled-Dot-Product" +if devices.backend == "rocm": + sdp_options_default = ['Memory attention', 'Math attention'] +#elif devices.backend == "zluda": +# sdp_options_default = ['Math attention'] +else: + sdp_options_default = ['Flash attention', 'Memory attention', 'Math attention'] options_templates.update(options_section(('sd', "Execution & Models"), { "sd_backend": OptionInfo(default_backend, "Execution backend", gr.Radio, {"choices": ["original", "diffusers"] }), @@ -340,7 +347,7 @@ def temp_disable_extensions(): "comma_padding_backtrack": OptionInfo(20, "Prompt padding", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1, "visible": backend == Backend.ORIGINAL }), "sd_checkpoint_cache": OptionInfo(0, "Cached models", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1, "visible": backend == Backend.ORIGINAL }), "sd_vae_checkpoint_cache": OptionInfo(0, "Cached VAEs", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1, "visible": False}), - "sd_disable_ckpt": OptionInfo(False, "Disallow models in ckpt format"), + "sd_disable_ckpt": OptionInfo(False, "Disallow models in ckpt format", gr.Checkbox, {"visible": False}), })) options_templates.update(options_section(('cuda', "Compute Settings"), { @@ -358,7 +365,7 @@ def temp_disable_extensions(): "cross_attention_sep": OptionInfo("

Attention

", "", gr.HTML), "cross_attention_optimization": OptionInfo(cross_attention_optimization_default, "Attention optimization method", gr.Radio, lambda: {"choices": shared_items.list_crossattention(diffusers=backend == Backend.DIFFUSERS) }), - "sdp_options": OptionInfo(['Flash attention', 'Memory attention', 'Math attention'], "SDP options", gr.CheckboxGroup, {"choices": ['Flash attention', 'Memory attention', 'Math attention'] }), + "sdp_options": OptionInfo(sdp_options_default, "SDP options", gr.CheckboxGroup, {"choices": ['Flash attention', 'Memory attention', 'Math attention'] }), "xformers_options": OptionInfo(['Flash attention'], "xFormers options", gr.CheckboxGroup, {"choices": ['Flash attention'] }), "dynamic_attention_slice_rate": OptionInfo(4, "Dynamic Attention slicing rate in GB", gr.Slider, {"minimum": 0.1, "maximum": 16, "step": 0.1, "visible": backend == Backend.DIFFUSERS}), "sub_quad_sep": OptionInfo("

Sub-quadratic options

", "", gr.HTML, {"visible": backend == Backend.ORIGINAL}), @@ -387,7 +394,7 @@ def temp_disable_extensions(): "nncf_compress_weights": OptionInfo([], "Compress Model weights with NNCF", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": backend == Backend.DIFFUSERS}), "ipex_sep": OptionInfo("

IPEX

", "", gr.HTML, {"visible": devices.backend == "ipex"}), - "ipex_optimize": OptionInfo(["Model", "VAE", "Text Encoder", "Upscaler"] if devices.backend == "ipex" else [], "IPEX Optimize for Intel GPUs", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "Upscaler"], "visible": devices.backend == "ipex"}), + "ipex_optimize": OptionInfo([], "IPEX Optimize for Intel GPUs", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "Upscaler"], "visible": devices.backend == "ipex"}), "openvino_sep": OptionInfo("

OpenVINO

", "", gr.HTML, {"visible": cmd_opts.use_openvino}), "openvino_devices": OptionInfo([], "OpenVINO devices to use", gr.CheckboxGroup, {"choices": get_openvino_device_list() if cmd_opts.use_openvino else [], "visible": cmd_opts.use_openvino}), @@ -457,7 +464,6 @@ def temp_disable_extensions(): "diffusers_to_gpu": OptionInfo(False, "Load model directly to GPU"), "disable_accelerate": OptionInfo(False, "Disable accelerate"), "diffusers_force_zeros": OptionInfo(False, "Force zeros for prompts when empty", gr.Checkbox, {"visible": False}), - "diffusers_aesthetics_score": OptionInfo(False, "Require aesthetics score"), "diffusers_pooled": OptionInfo("default", "Diffusers SDXL pooled embeds", gr.Radio, {"choices": ['default', 'weighted']}), "huggingface_token": OptionInfo('', 'HuggingFace token'), @@ -509,18 +515,17 @@ def temp_disable_extensions(): "save_selected_only": OptionInfo(True, "Save only saves selected image"), "include_mask": OptionInfo(False, "Include mask in outputs"), "samples_save_zip": OptionInfo(True, "Create ZIP archive"), + "image_background": OptionInfo("#000000", "Resize background color", gr.ColorPicker, {}), "image_sep_metadata": OptionInfo("

Metadata/Logging

", "", gr.HTML), "image_metadata": OptionInfo(True, "Include metadata"), "save_txt": OptionInfo(False, "Create info file per image"), "save_log_fn": OptionInfo("", "Update JSON log file per image", component_args=hide_dirs), - "image_watermark_enabled": OptionInfo(False, "Include watermark"), - "image_watermark": OptionInfo('', "Watermark string"), "image_sep_grid": OptionInfo("

Grid Options

", "", gr.HTML), "grid_save": OptionInfo(True, "Save all generated image grids"), "grid_format": OptionInfo('jpg', 'File format', gr.Dropdown, {"choices": ["jpg", "png", "webp", "tiff", "jp2"]}), "n_rows": OptionInfo(-1, "Row count", gr.Slider, {"minimum": -1, "maximum": 16, "step": 1}), - "grid_background": OptionInfo("#000000", "Background color", gr.ColorPicker, {}), + "grid_background": OptionInfo("#000000", "Grid background color", gr.ColorPicker, {}), "font": OptionInfo("", "Font file"), "font_color": OptionInfo("#FFFFFF", "Font color", gr.ColorPicker, {}), @@ -532,6 +537,12 @@ def temp_disable_extensions(): "save_images_before_color_correction": OptionInfo(False, "Save image before color correction"), "save_mask": OptionInfo(False, "Save inpainting mask"), "save_mask_composite": OptionInfo(False, "Save inpainting masked composite"), + + "image_sep_watermark": OptionInfo("

Watermarking

", "", gr.HTML), + "image_watermark_enabled": OptionInfo(False, "Include invisible watermark"), + "image_watermark": OptionInfo('', "Invisible watermark string"), + "image_watermark_position": OptionInfo('none', 'Image watermark position', gr.Dropdown, {"choices": ["none", "top/left", "top/right", "bottom/left", "bottom/right", "center", "random"]}), + "image_watermark_image": OptionInfo('', "Image watermark file"), })) options_templates.update(options_section(('saving-paths', "Image Naming & Paths"), { @@ -570,6 +581,7 @@ def temp_disable_extensions(): "theme_style": OptionInfo("Auto", "Theme mode", gr.Radio, {"choices": ["Auto", "Dark", "Light"]}), "font_size": OptionInfo(14, "Font size", gr.Slider, {"minimum": 8, "maximum": 32, "step": 1, "visible": True}), "tooltips": OptionInfo("UI Tooltips", "UI tooltips", gr.Radio, {"choices": ["None", "Browser default", "UI tooltips"], "visible": False}), + "aspect_ratios": OptionInfo("1:1, 4:3, 16:9, 16:10, 21:9, 3:4, 9:16, 10:16, 9:21", "Allowed aspect ratios"), "compact_view": OptionInfo(False, "Compact view"), "return_grid": OptionInfo(True, "Show grid in results"), "return_mask": OptionInfo(False, "Inpainting include greyscale mask in results"), @@ -657,9 +669,10 @@ def temp_disable_extensions(): "CLIP_stop_at_last_layers": OptionInfo(1, "Clip skip", gr.Slider, {"minimum": 1, "maximum": 8, "step": 1, "visible": False}), "postprocessing_sep_face_restoration": OptionInfo("

Face Restoration

", "", gr.HTML), - "face_restoration_model": OptionInfo("CodeFormer", "Face restoration model", gr.Radio, lambda: {"choices": [x.name() for x in face_restorers]}), + "face_restoration_model": OptionInfo("Face HiRes", "Face restoration model", gr.Radio, lambda: {"choices": [x.name() for x in face_restorers]}), + "facehires_strength": OptionInfo(0.0, "Face HiRes strength", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}), "code_former_weight": OptionInfo(0.2, "CodeFormer weight parameter", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}), - "face_restoration_unload": OptionInfo(False, "Move face restoration model to CPU when complete"), + "face_restoration_unload": OptionInfo(False, "Move model to CPU when complete"), "postprocessing_sep_upscalers": OptionInfo("

Upscaling

", "", gr.HTML), "upscaler_unload": OptionInfo(False, "Unload upscaler after processing"), @@ -669,7 +682,7 @@ def temp_disable_extensions(): })) options_templates.update(options_section(('control', "Control Options"), { - "control_max_units": OptionInfo(3, "Maximum number of units", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}), + "control_max_units": OptionInfo(4, "Maximum number of units", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}), "control_move_processor": OptionInfo(False, "Processor move to CPU after use"), "control_unload_processor": OptionInfo(False, "Processor unload after use"), })) @@ -715,6 +728,7 @@ def temp_disable_extensions(): "extra_networks_card_square": OptionInfo(True, "UI disable variable aspect ratio"), "extra_networks_card_fit": OptionInfo("cover", "UI image contain method", gr.Radio, {"choices": ["contain", "cover", "fill"], "visible": False}), "extra_networks_sep2": OptionInfo("

Extra networks general

", "", gr.HTML), + "extra_network_reference": OptionInfo(False, "Use reference values when available", gr.Checkbox), "extra_network_skip_indexing": OptionInfo(False, "Build info on first access", gr.Checkbox), "extra_networks_default_multiplier": OptionInfo(1.0, "Default multiplier for extra networks", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), "diffusers_convert_embed": OptionInfo(False, "Auto-convert SD 1.5 embeddings to SDXL ", gr.Checkbox, {"visible": backend==Backend.DIFFUSERS}), @@ -920,6 +934,7 @@ def cast_value(self, key, value): log.info(f'Device: {print_dict(devices.get_gpu_info())}') prompt_styles = modules.styles.StyleDatabase(opts) +reference_models = readfile(os.path.join('html', 'reference.json')) cmd_opts.disable_extension_access = (cmd_opts.share or cmd_opts.listen or (cmd_opts.server_name or False)) and not cmd_opts.insecure devices.device, devices.device_interrogate, devices.device_gfpgan, devices.device_esrgan, devices.device_codeformer = (devices.cpu if any(y in cmd_opts.use_cpu for y in [x, 'all']) else devices.get_optimal_device() for x in ['sd', 'interrogate', 'gfpgan', 'esrgan', 'codeformer']) devices.onnx = [opts.onnx_execution_provider] diff --git a/modules/styles.py b/modules/styles.py index 59047b565..44e3bb8e9 100644 --- a/modules/styles.py +++ b/modules/styles.py @@ -5,21 +5,24 @@ import csv import json import time -from installer import log -from modules import files_cache +import random +from modules import files_cache, shared + class Style(): - def __init__(self, name: str, desc: str = "", prompt: str = "", negative_prompt: str = "", extra: str = "", filename: str = "", preview: str = "", mtime: float = 0): + def __init__(self, name: str, desc: str = "", prompt: str = "", negative_prompt: str = "", extra: str = "", wildcards: str = "", filename: str = "", preview: str = "", mtime: float = 0): self.name = name self.description = desc self.prompt = prompt self.negative_prompt = negative_prompt self.extra = extra + self.wildcards = wildcards self.filename = filename self.preview = preview self.mtime = mtime + def merge_prompts(style_prompt: str, prompt: str) -> str: if "{prompt}" in style_prompt: res = style_prompt.replace("{prompt}", prompt) @@ -40,6 +43,31 @@ def apply_styles_to_prompt(prompt, styles): return prompt +def apply_wildcards_to_prompt(prompt, all_wildcards): + replaced = {} + for style_wildcards in all_wildcards: + wildcards = [x.strip() for x in style_wildcards.split(";") if len(x.strip()) > 0] + for wildcard in wildcards: + what, words = wildcard.split("=", 1) + words = [x.strip() for x in words.split(",") if len(x.strip()) > 0] + word = random.choice(words) + prompt = prompt.replace(what, word) + replaced[what] = word + if replaced: + shared.log.debug(f'Applying style wildcards: {replaced}') + return prompt + + +def get_reference_style(): + name = shared.sd_model.sd_checkpoint_info.name + name = name.replace('\\', '/').replace('Diffusers/', '') + for k, v in shared.reference_models.items(): + model_file = os.path.splitext(v.get('path', '').split('@')[0])[0].replace('huggingface/', '') + if k == name or model_file == name: + return v.get('extras', None) + return None + + def apply_styles_to_extra(p, style: Style): if style is None: return @@ -47,10 +75,13 @@ def apply_styles_to_extra(p, style: Style): 'sampler': 'sampler_name', } from modules.generation_parameters_copypaste import parse_generation_parameters - extra = parse_generation_parameters(style.extra) + reference_style = get_reference_style() + extra = parse_generation_parameters(reference_style) if shared.opts.extra_network_reference else {} + extra.update(parse_generation_parameters(style.extra)) extra.pop('Prompt', None) extra.pop('Negative prompt', None) fields = [] + skipped = [] for k, v in extra.items(): k = k.lower() k = k.replace(' ', '_') @@ -62,7 +93,9 @@ def apply_styles_to_extra(p, style: Style): v = type(orig)(v) setattr(p, k, v) fields.append(f'{k}={v}') - log.debug(f'Applying style: name="{style.name}" extra={fields}') + else: + skipped.append(f'{k}={v}') + shared.log.debug(f'Applying style: name="{style.name}" extra={fields} skipped={skipped} reference={True if reference_style else False}') class StyleDatabase: @@ -81,10 +114,10 @@ def __init__(self, opts): try: os.makedirs(opts.styles_dir, exist_ok=True) self.save_styles(opts.styles_dir, verbose=True) - log.debug(f'Migrated styles: file={legacy_file} folder={opts.styles_dir}') + shared.log.debug(f'Migrated styles: file={legacy_file} folder={opts.styles_dir}') self.reload() except Exception as e: - log.error(f'styles failed to migrate: file={legacy_file} error={e}') + shared.log.error(f'styles failed to migrate: file={legacy_file} error={e}') if not os.path.isdir(opts.styles_dir): opts.styles_dir = os.path.join(paths.models_path, "styles") self.path = opts.styles_dir @@ -115,13 +148,14 @@ def load_style(self, fn, prefix=None): prompt=style.get("prompt", ""), negative_prompt=style.get("negative", ""), extra=style.get("extra", ""), + wildcards=style.get("wildcards", ""), preview=style.get("preview", None), filename=fn, mtime=os.path.getmtime(fn), ) self.styles[style["name"]] = new_style except Exception as e: - log.error(f'Failed to load style: file={fn} error={e}') + shared.log.error(f'Failed to load style: file={fn} error={e}') return new_style @@ -149,7 +183,7 @@ def list_folder(folder): list_folder(self.path) t1 = time.time() - log.debug(f'Load styles: folder="{self.path}" items={len(self.styles.keys())} time={t1-t0:.2f}') + shared.log.debug(f'Load styles: folder="{self.path}" items={len(self.styles.keys())} time={t1-t0:.2f}') def find_style(self, name): found = [style for style in self.styles.values() if style.name == name] @@ -157,36 +191,49 @@ def find_style(self, name): def get_style_prompts(self, styles): if styles is None or not isinstance(styles, list): - log.error(f'Invalid styles: {styles}') + shared.log.error(f'Invalid styles: {styles}') return [] return [self.find_style(x).prompt for x in styles] def get_negative_style_prompts(self, styles): if styles is None or not isinstance(styles, list): - log.error(f'Invalid styles: {styles}') + shared.log.error(f'Invalid styles: {styles}') return [] return [self.find_style(x).negative_prompt for x in styles] def apply_styles_to_prompt(self, prompt, styles): if styles is None or not isinstance(styles, list): - log.error(f'Invalid styles: {styles}') + shared.log.error(f'Invalid styles: {styles}') return prompt - return apply_styles_to_prompt(prompt, [self.find_style(x).prompt for x in styles]) + prompt = apply_styles_to_prompt(prompt, [self.find_style(x).prompt for x in styles]) + prompt = apply_wildcards_to_prompt(prompt, [self.find_style(x).wildcards for x in styles]) + return prompt def apply_negative_styles_to_prompt(self, prompt, styles): if styles is None or not isinstance(styles, list): - log.error(f'Invalid styles: {styles}') + shared.log.error(f'Invalid styles: {styles}') return prompt - return apply_styles_to_prompt(prompt, [self.find_style(x).negative_prompt for x in styles]) + prompt = apply_styles_to_prompt(prompt, [self.find_style(x).negative_prompt for x in styles]) + prompt = apply_wildcards_to_prompt(prompt, [self.find_style(x).wildcards for x in styles]) + return prompt def apply_styles_to_extra(self, p): if p.styles is None or not isinstance(p.styles, list): - log.error(f'Invalid styles: {p.styles}') + shared.log.error(f'Invalid styles: {p.styles}') return for style in p.styles: s = self.find_style(style) apply_styles_to_extra(p, s) + def extract_comments(self, p): + if not isinstance(p.prompt, str): + return + match = re.search(r'/\*.*?\*/', p.prompt, flags=re.DOTALL) + if match: + comment = match.group() + p.prompt = p.prompt.replace(comment, '') + p.extra_generation_params['Comment'] = comment.replace('/*', '').replace('*/', '') + def save_styles(self, path, verbose=False): for name in list(self.styles): style = { @@ -203,12 +250,12 @@ def save_styles(self, path, verbose=False): with open(fn, 'w', encoding='utf-8') as f: json.dump(style, f, indent=2) if verbose: - log.debug(f'Saved style: name={name} file={fn}') + shared.log.debug(f'Saved style: name={name} file={fn}') except Exception as e: - log.error(f'Failed to save style: name={name} file={path} error={e}') + shared.log.error(f'Failed to save style: name={name} file={path} error={e}') count = len(list(self.styles)) if count > 0: - log.debug(f'Saved styles: folder="{path}" items={count}') + shared.log.debug(f'Saved styles: folder="{path}" items={count}') def load_csv(self, legacy_file): if not os.path.isfile(legacy_file): @@ -221,9 +268,9 @@ def load_csv(self, legacy_file): name = row["name"] prompt = row["prompt"] if "prompt" in row else row["text"] negative = row.get("negative_prompt", "") if "negative_prompt" in row else row.get("negative", "") - self.styles[name] = Style(name, desc=name, prompt=prompt, negative_prompt=negative, extra="") - log.debug(f'Migrated style: {self.styles[name].__dict__}') + self.styles[name] = Style(name, desc=name, prompt=prompt, negative_prompt=negative) + shared.log.debug(f'Migrated style: {self.styles[name].__dict__}') num += 1 except Exception: - log.error(f'Styles error: file="{legacy_file}" row={row}') - log.info(f'Load legacy styles: file="{legacy_file}" loaded={num} created={len(list(self.styles))}') + shared.log.error(f'Styles error: file="{legacy_file}" row={row}') + shared.log.info(f'Load legacy styles: file="{legacy_file}" loaded={num} created={len(list(self.styles))}') diff --git a/modules/theme.py b/modules/theme.py index eb594834e..04fd3dda4 100644 --- a/modules/theme.py +++ b/modules/theme.py @@ -43,11 +43,12 @@ def list_themes(): if 'sdnext-ui-ux' in extensions: ext = next((e for e in modules.extensions.extensions if e.name == 'sdnext-ui-ux'), None) folder = os.path.join(ext.path, 'themes') - engines.append('modern/default') if os.path.exists(folder): for f in os.listdir(folder): if f.endswith('.css'): engines.append(f'modern/{os.path.splitext(f)[0]}') + if len(engines) == 0: + engines.append('modern/sdxl_alpha') if 'sd-webui-lobe-theme' in extensions: modules.shared.log.info('Theme: installed="lobe"') engines.append('lobe') @@ -68,6 +69,8 @@ def reload_gradio_theme(theme_name=None): theme_name = theme_name or modules.shared.cmd_opts.theme or modules.shared.opts.gradio_theme if theme_name == 'default': theme_name = 'black-teal' + if theme_name == 'modern': + theme_name = 'modern/sdxl_alpha' modules.shared.opts.data['gradio_theme'] = theme_name default_font_params = { 'font':['Helvetica', 'ui-sans-serif', 'system-ui', 'sans-serif'], diff --git a/modules/ui.py b/modules/ui.py index b059bbd36..e0f21dcd4 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -3,8 +3,8 @@ import gradio as gr import gradio.routes import gradio.utils -from modules.call_queue import wrap_gradio_call -from modules import timer, gr_hijack, shared, theme, sd_models, script_callbacks, modelloader, ui_common, ui_loadsave, ui_symbols, ui_javascript, generation_parameters_copypaste, call_queue +from modules.call_queue import wrap_gradio_call, wrap_gradio_gpu_call # pylint: disable=unused-import +from modules import timer, gr_hijack, shared, theme, sd_models, script_callbacks, modelloader, ui_common, ui_loadsave, ui_symbols, ui_javascript, ui_sections, generation_parameters_copypaste, call_queue from modules.paths import script_path, data_path # pylint: disable=unused-import from modules.dml import directml_override_opts from modules.onnx_impl import install_olive @@ -32,6 +32,7 @@ apply_style_symbol = ui_symbols.apply save_style_symbol = ui_symbols.save gr_hijack.init() +create_sampler_and_steps_selection = ui_sections.create_sampler_and_steps_selection # compatibility item if not cmd_opts.share and not cmd_opts.listen: @@ -40,6 +41,10 @@ gradio.utils.get_local_ip_address = lambda: '127.0.0.1' +def create_override_settings_dropdown(a, _b): + return ui_common.create_override_inputs(a) # compatibility item + + def gr_show(visible=True): return {"visible": visible, "__type__": "update"} @@ -149,12 +154,6 @@ def create_ui(startup_timer = None): ui_models.create_ui() timer.startup.record("ui-models") - with gr.Blocks(analytics_enabled=False) as interrogate_interface: - from modules import ui_interrogate - ui_interrogate.create_ui() - timer.startup.record("ui-interrogate") - - def create_setting_component(key, is_quicksettings=False): def fun(): return opts.data[key] if key in opts.data else opts.data_labels[key].default @@ -371,7 +370,6 @@ def reload_sd_weights(): interfaces += [(img2img_interface, "Image", "img2img")] interfaces += [(control_interface, "Control", "control")] if control_interface is not None else [] interfaces += [(extras_interface, "Process", "process")] - interfaces += [(interrogate_interface, "Interrogate", "interrogate")] interfaces += [(models_interface, "Models", "models")] interfaces += script_callbacks.ui_tabs_callback() interfaces += [(settings_interface, "System", "system")] diff --git a/modules/ui_control.py b/modules/ui_control.py index 8f07b9241..c2d6f0d20 100644 --- a/modules/ui_control.py +++ b/modules/ui_control.py @@ -23,7 +23,9 @@ def return_controls(res): # return preview, image, video, gallery, text debug(f'Control received: type={type(res)} {res}') - if isinstance(res, str): # error response + if res is None: # no response + return [None, None, None, None, ''] + elif isinstance(res, str): # error response return [None, None, None, None, res] elif isinstance(res, tuple): # standard response received as tuple via control_run->yield(output_images, process_image, result_txt) preview_image = res[1] # may be None @@ -93,11 +95,11 @@ def create_ui(_blocks: gr.Blocks=None): with gr.Accordion(open=False, label="Size", elem_id="control_size", elem_classes=["small-accordion"]): with gr.Tabs(): with gr.Tab('Before'): - resize_mode_before, resize_name_before, width_before, height_before, scale_by_before, selected_scale_tab_before = ui_sections.create_resize_inputs('control', [], scale_visible=False, mode='Fixed', accordion=False, latent=True) + resize_mode_before, resize_name_before, width_before, height_before, scale_by_before, selected_scale_tab_before = ui_sections.create_resize_inputs('control', [], accordion=False, latent=True) with gr.Tab('After'): - resize_mode_after, resize_name_after, width_after, height_after, scale_by_after, selected_scale_tab_after = ui_sections.create_resize_inputs('control', [], scale_visible=False, mode='Fixed', accordion=False, latent=False) + resize_mode_after, resize_name_after, width_after, height_after, scale_by_after, selected_scale_tab_after = ui_sections.create_resize_inputs('control', [], accordion=False, latent=False) with gr.Tab('Mask'): - resize_mode_mask, resize_name_mask, width_mask, height_mask, scale_by_mask, selected_scale_tab_mask = ui_sections.create_resize_inputs('control', [], scale_visible=False, mode='Fixed', accordion=False, latent=False) + resize_mode_mask, resize_name_mask, width_mask, height_mask, scale_by_mask, selected_scale_tab_mask = ui_sections.create_resize_inputs('control', [], accordion=False, latent=False) with gr.Accordion(open=False, label="Sampler", elem_id="control_sampler", elem_classes=["small-accordion"]): sd_samplers.set_samplers() @@ -124,6 +126,8 @@ def create_ui(_blocks: gr.Blocks=None): video_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False) video_type.change(fn=helpers.video_type_change, inputs=[video_type], outputs=[video_duration, video_loop, video_pad, video_interpolate]) + enable_hr, hr_sampler_index, hr_denoising_strength, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('txt2img') + with gr.Row(): override_settings = ui_common.create_override_inputs('control') @@ -403,54 +407,54 @@ def create_ui(_blocks: gr.Blocks=None): if i == 0: units[-1].enabled = True # enable first unit in group - with gr.Tab('Processor settings') as _tab_settings: - with gr.Group(elem_classes=['processor-group']): - settings = [] - with gr.Accordion('HED', open=True, elem_classes=['processor-settings']): - settings.append(gr.Checkbox(label="Scribble", value=False)) - with gr.Accordion('Midas depth', open=True, elem_classes=['processor-settings']): - settings.append(gr.Slider(label="Background threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.1)) - settings.append(gr.Checkbox(label="Depth and normal", value=False)) - with gr.Accordion('MLSD', open=True, elem_classes=['processor-settings']): - settings.append(gr.Slider(label="Score threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.1)) - settings.append(gr.Slider(label="Distance threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.1)) - with gr.Accordion('OpenBody', open=True, elem_classes=['processor-settings']): - settings.append(gr.Checkbox(label="Body", value=True)) - settings.append(gr.Checkbox(label="Hands", value=False)) - settings.append(gr.Checkbox(label="Face", value=False)) - with gr.Accordion('PidiNet', open=True, elem_classes=['processor-settings']): - settings.append(gr.Checkbox(label="Scribble", value=False)) - settings.append(gr.Checkbox(label="Apply filter", value=False)) - with gr.Accordion('LineArt', open=True, elem_classes=['processor-settings']): - settings.append(gr.Checkbox(label="Coarse", value=False)) - with gr.Accordion('Leres Depth', open=True, elem_classes=['processor-settings']): - settings.append(gr.Checkbox(label="Boost", value=False)) - settings.append(gr.Slider(label="Near threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.0)) - settings.append(gr.Slider(label="Background threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.0)) - with gr.Accordion('MediaPipe Face', open=True, elem_classes=['processor-settings']): - settings.append(gr.Slider(label="Max faces", minimum=1, maximum=10, step=1, value=1)) - settings.append(gr.Slider(label="Min confidence", minimum=0.0, maximum=1.0, step=0.01, value=0.5)) - with gr.Accordion('Canny', open=True, elem_classes=['processor-settings']): - settings.append(gr.Slider(label="Low threshold", minimum=0, maximum=1000, step=1, value=100)) - settings.append(gr.Slider(label="High threshold", minimum=0, maximum=1000, step=1, value=200)) - with gr.Accordion('DWPose', open=True, elem_classes=['processor-settings']): - settings.append(gr.Radio(label="Model", choices=['Tiny', 'Medium', 'Large'], value='Tiny')) - settings.append(gr.Slider(label="Min confidence", minimum=0.0, maximum=1.0, step=0.01, value=0.3)) - with gr.Accordion('SegmentAnything', open=True, elem_classes=['processor-settings']): - settings.append(gr.Radio(label="Model", choices=['Base', 'Large'], value='Base')) - with gr.Accordion('Edge', open=True, elem_classes=['processor-settings']): - settings.append(gr.Checkbox(label="Parameter free", value=True)) - settings.append(gr.Radio(label="Mode", choices=['edge', 'gradient'], value='edge')) - with gr.Accordion('Zoe Depth', open=True, elem_classes=['processor-settings']): - settings.append(gr.Checkbox(label="Gamma corrected", value=False)) - with gr.Accordion('Marigold Depth', open=True, elem_classes=['processor-settings']): - settings.append(gr.Dropdown(label="Color map", choices=['None'] + plt.colormaps(), value='None')) - settings.append(gr.Slider(label="Denoising steps", minimum=1, maximum=99, step=1, value=10)) - settings.append(gr.Slider(label="Ensemble size", minimum=1, maximum=99, step=1, value=10)) - with gr.Accordion('Depth Anything', open=True, elem_classes=['processor-settings']): - settings.append(gr.Dropdown(label="Color map", choices=['none'] + masking.COLORMAP, value='inferno')) - for setting in settings: - setting.change(fn=processors.update_settings, inputs=settings, outputs=[]) + with gr.Accordion('Processor settings', open=False, elem_classes=['control-settings']) as _tab_settings: + with gr.Group(elem_classes=['processor-group']): + settings = [] + with gr.Accordion('HED', open=True, elem_classes=['processor-settings']): + settings.append(gr.Checkbox(label="Scribble", value=False)) + with gr.Accordion('Midas depth', open=True, elem_classes=['processor-settings']): + settings.append(gr.Slider(label="Background threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.1)) + settings.append(gr.Checkbox(label="Depth and normal", value=False)) + with gr.Accordion('MLSD', open=True, elem_classes=['processor-settings']): + settings.append(gr.Slider(label="Score threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.1)) + settings.append(gr.Slider(label="Distance threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.1)) + with gr.Accordion('OpenBody', open=True, elem_classes=['processor-settings']): + settings.append(gr.Checkbox(label="Body", value=True)) + settings.append(gr.Checkbox(label="Hands", value=False)) + settings.append(gr.Checkbox(label="Face", value=False)) + with gr.Accordion('PidiNet', open=True, elem_classes=['processor-settings']): + settings.append(gr.Checkbox(label="Scribble", value=False)) + settings.append(gr.Checkbox(label="Apply filter", value=False)) + with gr.Accordion('LineArt', open=True, elem_classes=['processor-settings']): + settings.append(gr.Checkbox(label="Coarse", value=False)) + with gr.Accordion('Leres Depth', open=True, elem_classes=['processor-settings']): + settings.append(gr.Checkbox(label="Boost", value=False)) + settings.append(gr.Slider(label="Near threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.0)) + settings.append(gr.Slider(label="Background threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.0)) + with gr.Accordion('MediaPipe Face', open=True, elem_classes=['processor-settings']): + settings.append(gr.Slider(label="Max faces", minimum=1, maximum=10, step=1, value=1)) + settings.append(gr.Slider(label="Min confidence", minimum=0.0, maximum=1.0, step=0.01, value=0.5)) + with gr.Accordion('Canny', open=True, elem_classes=['processor-settings']): + settings.append(gr.Slider(label="Low threshold", minimum=0, maximum=1000, step=1, value=100)) + settings.append(gr.Slider(label="High threshold", minimum=0, maximum=1000, step=1, value=200)) + with gr.Accordion('DWPose', open=True, elem_classes=['processor-settings']): + settings.append(gr.Radio(label="Model", choices=['Tiny', 'Medium', 'Large'], value='Tiny')) + settings.append(gr.Slider(label="Min confidence", minimum=0.0, maximum=1.0, step=0.01, value=0.3)) + with gr.Accordion('SegmentAnything', open=True, elem_classes=['processor-settings']): + settings.append(gr.Radio(label="Model", choices=['Base', 'Large'], value='Base')) + with gr.Accordion('Edge', open=True, elem_classes=['processor-settings']): + settings.append(gr.Checkbox(label="Parameter free", value=True)) + settings.append(gr.Radio(label="Mode", choices=['edge', 'gradient'], value='edge')) + with gr.Accordion('Zoe Depth', open=True, elem_classes=['processor-settings']): + settings.append(gr.Checkbox(label="Gamma corrected", value=False)) + with gr.Accordion('Marigold Depth', open=True, elem_classes=['processor-settings']): + settings.append(gr.Dropdown(label="Color map", choices=['None'] + plt.colormaps(), value='None')) + settings.append(gr.Slider(label="Denoising steps", minimum=1, maximum=99, step=1, value=10)) + settings.append(gr.Slider(label="Ensemble size", minimum=1, maximum=99, step=1, value=10)) + with gr.Accordion('Depth Anything', open=True, elem_classes=['processor-settings']): + settings.append(gr.Dropdown(label="Color map", choices=['none'] + masking.COLORMAP, value='inferno')) + for setting in settings: + setting.change(fn=processors.update_settings, inputs=settings, outputs=[]) with gr.Row(elem_id="control_script_container"): input_script_args = scripts.scripts_current.setup_ui(parent='control', accordion=True) @@ -501,6 +505,8 @@ def create_ui(_blocks: gr.Blocks=None): resize_mode_after, resize_name_after, width_after, height_after, scale_by_after, selected_scale_tab_after, resize_mode_mask, resize_name_mask, width_mask, height_mask, scale_by_mask, selected_scale_tab_mask, denoising_strength, batch_count, batch_size, + enable_hr, hr_sampler_index, hr_denoising_strength, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, + refiner_start, refiner_prompt, refiner_negative, video_skip_frames, video_type, video_duration, video_loop, video_pad, video_interpolate, ] output_fields = [ diff --git a/modules/ui_extra_networks.py b/modules/ui_extra_networks.py index 25f75c9f9..985465ee8 100644 --- a/modules/ui_extra_networks.py +++ b/modules/ui_extra_networks.py @@ -297,14 +297,14 @@ def find_preview_file(self, path): if os.path.join('models', 'Reference') in path: return path exts = ["jpg", "jpeg", "png", "webp", "tiff", "jp2"] + reference_path = os.path.abspath(os.path.join('models', 'Reference')) + files = list(files_cache.list_files(reference_path, ext_filter=exts, recursive=False)) if shared.opts.diffusers_dir in path: path = os.path.relpath(path, shared.opts.diffusers_dir) - reference_path = os.path.abspath(os.path.join('models', 'Reference')) fn = os.path.join(reference_path, path.replace('models--', '').replace('\\', '/').split('/')[0]) - files = list(files_cache.list_files(reference_path, ext_filter=exts, recursive=False)) else: fn = os.path.splitext(path)[0] - files = list(files_cache.list_files(os.path.dirname(path), ext_filter=exts, recursive=False)) + files += list(files_cache.list_files(os.path.dirname(path), ext_filter=exts, recursive=False)) for file in [f'{fn}{mid}{ext}' for ext in exts for mid in ['.thumb.', '.', '.preview.']]: if file in files: if '.thumb.' not in file: @@ -324,6 +324,7 @@ def update_all_previews(self, items): possible_paths = list(set([os.path.dirname(item['filename']) for item in items] + [reference_path])) exts = ["jpg", "jpeg", "png", "webp", "tiff", "jp2"] all_previews = list(files_cache.list_files(*possible_paths, ext_filter=exts, recursive=False)) + all_previews_fn = [os.path.basename(x) for x in all_previews] for item in items: if item.get('preview', None) is not None: continue @@ -336,11 +337,13 @@ def update_all_previews(self, items): model_path = os.path.join(shared.opts.diffusers_dir, match[0]) item['local_preview'] = f'{os.path.join(model_path, match[1])}.{shared.opts.samples_format}' all_previews += list(files_cache.list_files(model_path, ext_filter=exts, recursive=False)) + base = os.path.basename(base) for file in [f'{base}{mid}{ext}' for ext in exts for mid in ['.thumb.', '.', '.preview.']]: - if file in all_previews: + if file in all_previews_fn: + file_idx = all_previews_fn.index(os.path.basename(file)) if '.thumb.' not in file: - self.missing_thumbs.append(file) - item['preview'] = self.link_preview(file) + self.missing_thumbs.append(all_previews[file_idx]) + item['preview'] = self.link_preview(all_previews[file_idx]) break if item.get('preview', None) is None: item['preview'] = self.link_preview('html/card-no-preview.png') @@ -447,10 +450,12 @@ def get_pages(title=None): class ExtraNetworksUi: def __init__(self): self.tabname: str = None - self.pages: list(str) = None + self.pages: list[str] = None self.visible: gr.State = None self.state: gr.Textbox = None self.details: gr.Group = None + self.details_tabs: gr.Group = None + self.details_text: gr.Group = None self.tabs: gr.Tabs = None self.gallery: gr.Gallery = None self.description: gr.Textbox = None @@ -478,7 +483,7 @@ def create_ui(container, button_parent, tabname, skip_indexing = False): ui.pages = [] ui.state = gr.Textbox('{}', elem_id=f"{tabname}_extra_state", visible=False) ui.visible = gr.State(value=False) # pylint: disable=abstract-class-instantiated - ui.details = gr.Group(elem_id=f"{tabname}_extra_details", visible=False) + ui.details = gr.Group(elem_id=f"{tabname}_extra_details", elem_classes=["extra-details"], visible=False) ui.tabs = gr.Tabs(elem_id=f"{tabname}_extra_tabs") ui.button_details = gr.Button('Details', elem_id=f"{tabname}_extra_details_btn", visible=False) state = {} @@ -533,26 +538,39 @@ def toggle_visibility(is_visible): with gr.Row(): btn_save_img = gr.Button('Replace', elem_classes=['small-button']) btn_delete_img = gr.Button('Delete', elem_classes=['small-button']) - with gr.Tabs(): - with gr.Tab('Description'): - desc = gr.Textbox('', show_label=False, lines=8, placeholder="Extra network description...") - ui.details_components.append(desc) - with gr.Row(): - btn_save_desc = gr.Button('Save', elem_classes=['small-button'], elem_id=f'{tabname}_extra_details_save_desc') - btn_delete_desc = gr.Button('Delete', elem_classes=['small-button'], elem_id=f'{tabname}_extra_details_delete_desc') - btn_close_desc = gr.Button('Close', elem_classes=['small-button'], elem_id=f'{tabname}_extra_details_close_desc') - btn_close_desc.click(fn=lambda: gr.update(visible=False), _js='refeshDetailsEN', inputs=[], outputs=[ui.details]) - with gr.Tab('Model metadata'): - info = gr.JSON({}, show_label=False) - ui.details_components.append(info) - with gr.Row(): - btn_save_info = gr.Button('Save', elem_classes=['small-button'], elem_id=f'{tabname}_extra_details_save_info') - btn_delete_info = gr.Button('Delete', elem_classes=['small-button'], elem_id=f'{tabname}_extra_details_delete_info') - btn_close_info = gr.Button('Close', elem_classes=['small-button'], elem_id=f'{tabname}_extra_details_close_info') - btn_close_info.click(fn=lambda: gr.update(visible=False), _js='refeshDetailsEN', inputs=[], outputs=[ui.details]) - with gr.Tab('Embedded metadata'): - meta = gr.JSON({}, show_label=False) - ui.details_components.append(meta) + with gr.Group(elem_id=f"{tabname}_extra_details_tabs", visible=False) as ui.details_tabs: + with gr.Tabs(): + with gr.Tab('Description', elem_classes=['extra-details-tabs']): + desc = gr.Textbox('', show_label=False, lines=8, placeholder="Extra network description...") + ui.details_components.append(desc) + with gr.Row(): + btn_save_desc = gr.Button('Save', elem_classes=['small-button'], elem_id=f'{tabname}_extra_details_save_desc') + btn_delete_desc = gr.Button('Delete', elem_classes=['small-button'], elem_id=f'{tabname}_extra_details_delete_desc') + btn_close_desc = gr.Button('Close', elem_classes=['small-button'], elem_id=f'{tabname}_extra_details_close_desc') + btn_close_desc.click(fn=lambda: gr.update(visible=False), _js='refeshDetailsEN', inputs=[], outputs=[ui.details]) + with gr.Tab('Model metadata', elem_classes=['extra-details-tabs']): + info = gr.JSON({}, show_label=False) + ui.details_components.append(info) + with gr.Row(): + btn_save_info = gr.Button('Save', elem_classes=['small-button'], elem_id=f'{tabname}_extra_details_save_info') + btn_delete_info = gr.Button('Delete', elem_classes=['small-button'], elem_id=f'{tabname}_extra_details_delete_info') + btn_close_info = gr.Button('Close', elem_classes=['small-button'], elem_id=f'{tabname}_extra_details_close_info') + btn_close_info.click(fn=lambda: gr.update(visible=False), _js='refeshDetailsEN', inputs=[], outputs=[ui.details]) + with gr.Tab('Embedded metadata', elem_classes=['extra-details-tabs']): + meta = gr.JSON({}, show_label=False) + ui.details_components.append(meta) + with gr.Group(elem_id=f"{tabname}_extra_details_text", elem_classes=["extra-details-text"], visible=False) as ui.details_text: + description = gr.Textbox(label='Description', lines=1, placeholder="Style description...") + prompt = gr.Textbox(label='Prompt', lines=2, placeholder="Prompt...") + negative = gr.Textbox(label='Negative prompt', lines=2, placeholder="Negative prompt...") + extra = gr.Textbox(label='Parameters', lines=2, placeholder="Generation parameters overrides...") + wildcards = gr.Textbox(label='Wildcards', lines=2, placeholder="Wildcard prompt replacements...") + ui.details_components += [description, prompt, negative, extra, wildcards] + with gr.Row(): + btn_save_style = gr.Button('Save', elem_classes=['small-button'], elem_id=f'{tabname}_extra_details_save_style') + btn_delete_style = gr.Button('Delete', elem_classes=['small-button'], elem_id=f'{tabname}_extra_details_delete_style') + btn_close_style = gr.Button('Close', elem_classes=['small-button'], elem_id=f'{tabname}_extra_details_close_style') + btn_close_style.click(fn=lambda: gr.update(visible=False), _js='refeshDetailsEN', inputs=[], outputs=[ui.details]) with ui.tabs: def ui_tab_change(page): @@ -570,7 +588,7 @@ def ui_tab_change(page): ui.button_close = ToolButton(symbols.close, elem_id=f"{tabname}_extra_close", visible=True) ui.button_model = ToolButton(symbols.refine, elem_id=f"{tabname}_extra_model", visible=True) ui.search = gr.Textbox('', show_label=False, elem_id=f"{tabname}_extra_search", placeholder="Search...", elem_classes="textbox", lines=2, container=False) - ui.description = gr.Textbox('', show_label=False, elem_id=f"{tabname}_description", elem_classes="textbox", lines=2, interactive=False, container=False) + ui.description = gr.Textbox('', show_label=False, elem_id=f"{tabname}_description", elem_classes=["textbox", "extra-description"], lines=2, interactive=False, container=False) if ui.tabname == 'txt2img': # refresh only once global refresh_time # pylint: disable=global-statement @@ -639,10 +657,7 @@ def fn_save_desc(desc): def fn_delete_desc(desc): if ui.last_item is None: return desc - if hasattr(ui.last_item, 'type') and ui.last_item.type == 'Style': - fn = os.path.splitext(ui.last_item.filename)[0] + '.json' - else: - fn = os.path.splitext(ui.last_item.filename)[0] + '.txt' + fn = os.path.splitext(ui.last_item.filename)[0] + '.txt' if os.path.exists(fn): shared.log.debug(f'Extra network delete desc: item={ui.last_item.name} filename="{fn}"') os.remove(fn) @@ -665,14 +680,39 @@ def fn_delete_info(info): return '' return info + def fn_save_style(info, description, prompt, negative, extra, wildcards): + if not isinstance(info, dict) or isinstance(info, list): + shared.log.warning(f'Extra network save style skip: item={ui.last_item.name} not a dict: {type(info)}') + return info + if ui.last_item is None: + return info + fn = os.path.splitext(ui.last_item.filename)[0] + '.json' + if hasattr(ui.last_item, 'type') and ui.last_item.type == 'Style': + info.update(**{ 'description': description, 'prompt': prompt, 'negative': negative, 'extra': extra, 'wildcards': wildcards }) + shared.writefile(info, fn, silent=True) + shared.log.debug(f'Extra network save style: item={ui.last_item.name} filename="{fn}"') + return info + + def fn_delete_style(info): + if ui.last_item is None: + return info + fn = os.path.splitext(ui.last_item.filename)[0] + '.json' + if os.path.exists(fn): + shared.log.debug(f'Extra network delete style: item={ui.last_item.name} filename="{fn}"') + os.remove(fn) + return {} + return info + btn_save_img.click(fn=fn_save_img, _js='closeDetailsEN', inputs=[img], outputs=[img]) btn_delete_img.click(fn=fn_delete_img, _js='closeDetailsEN', inputs=[img], outputs=[img]) btn_save_desc.click(fn=fn_save_desc, _js='closeDetailsEN', inputs=[desc], outputs=[desc]) btn_delete_desc.click(fn=fn_delete_desc, _js='closeDetailsEN', inputs=[desc], outputs=[desc]) btn_save_info.click(fn=fn_save_info, _js='closeDetailsEN', inputs=[info], outputs=[info]) btn_delete_info.click(fn=fn_delete_info, _js='closeDetailsEN', inputs=[info], outputs=[info]) + btn_save_style.click(fn=fn_save_style, _js='closeDetailsEN', inputs=[info, description, prompt, negative, extra, wildcards], outputs=[info]) + btn_delete_style.click(fn=fn_delete_style, _js='closeDetailsEN', inputs=[info], outputs=[info]) - def show_details(text, img, desc, info, meta, params): + def show_details(text, img, desc, info, meta, description, prompt, negative, parameters, wildcards, params, _dummy1=None, _dummy2=None): page, item = get_item(state, params) if item is not None and hasattr(item, 'name'): stat = os.stat(item.filename) if os.path.exists(item.filename) else None @@ -730,12 +770,17 @@ def show_details(text, img, desc, info, meta, params): Comment{meta.get('ss_training_comment', 'N/A')} ''' if page.title == 'Style': + description = item.description + prompt = item.prompt + negative = item.negative + parameters = item.extra + wildcards = item.wildcards style = f''' Name{item.name} Description{item.description} Preview Embedded{item.preview.startswith('data:')} ''' - desc = f'Name: {os.path.basename(item.name)}\nDescription: {item.description}\nPrompt: {item.prompt}\nNegative: {item.negative}\nExtra: {item.extra}\n' + # desc = f'Name: {os.path.basename(item.name)}\nDescription: {item.description}\nPrompt: {item.prompt}\nNegative: {item.negative}\nExtra: {item.extra}\n' text = f'''

{item.name}

@@ -752,7 +797,21 @@ def show_details(text, img, desc, info, meta, params):
{note} ''' - return [text, img, desc, info, meta, gr.update(visible=item is not None)] + return [ + text, # gr.html + img, # gr.image + desc, # gr.textbox + info, # gr.json + meta, # gr.json + description, # gr.textbox + prompt, # gr.textbox + negative, # gr.textbox + parameters, # gr.textbox + wildcards, # gr.textbox + gr.update(visible=item is not None), # details ui visible + gr.update(visible=page is not None and page.title != 'Style'), # details ui tabs visible + gr.update(visible=page is not None and page.title == 'Style'), # details ui text visible + ] def ui_refresh_click(title): pages = [] @@ -796,7 +855,7 @@ def ui_save_click(): else: prompt = '' params = generation_parameters_copypaste.parse_generation_parameters(prompt) - res = show_details(text=None, img=None, desc=None, info=None, meta=None, params=params) + res = show_details(text=None, img=None, desc=None, info=None, meta=None, parameters=None, description=None, prompt=None, negative=None, wildcards=None, params=params) return res def ui_quicksave_click(name): @@ -836,7 +895,7 @@ def ui_sort_cards(msg): ui.button_scan.click(fn=ui_scan_click, _js='getENActivePage', inputs=[ui.search], outputs=ui.pages) ui.button_save.click(fn=ui_save_click, inputs=[], outputs=ui.details_components + [ui.details]) ui.button_quicksave.click(fn=ui_quicksave_click, _js="() => prompt('Prompt name', '')", inputs=[ui.search], outputs=[]) - ui.button_details.click(show_details, _js="getCardDetails", inputs=ui.details_components + [dummy], outputs=ui.details_components + [ui.details]) + ui.button_details.click(show_details, _js="getCardDetails", inputs=ui.details_components + [dummy, dummy, dummy], outputs=ui.details_components + [ui.details, ui.details_tabs, ui.details_text]) ui.state.change(state_change, inputs=[ui.state], outputs=[]) return ui diff --git a/modules/ui_extra_networks_checkpoints.py b/modules/ui_extra_networks_checkpoints.py index ab11f0f1d..75ad968ed 100644 --- a/modules/ui_extra_networks_checkpoints.py +++ b/modules/ui_extra_networks_checkpoints.py @@ -15,8 +15,7 @@ def refresh(self): shared.refresh_checkpoints() def list_reference(self): # pylint: disable=inconsistent-return-statements - reference_models = shared.readfile(os.path.join('html', 'reference.json')) - for k, v in reference_models.items(): + for k, v in shared.reference_models.items(): if shared.backend != shared.Backend.DIFFUSERS: if not v.get('original', False): continue diff --git a/modules/extra_networks_hypernet.py b/modules/ui_extra_networks_hypernet.py similarity index 100% rename from modules/extra_networks_hypernet.py rename to modules/ui_extra_networks_hypernet.py diff --git a/modules/ui_extra_networks_styles.py b/modules/ui_extra_networks_styles.py index 859fba2cd..d8bb13cfa 100644 --- a/modules/ui_extra_networks_styles.py +++ b/modules/ui_extra_networks_styles.py @@ -13,7 +13,7 @@ def refresh(self): def parse_desc(self, desc): lines = desc.strip().split("\n") - params = { 'name': '', 'description': '', 'prompt': '', 'negative': '', 'extra': ''} + params = { 'name': '', 'description': '', 'prompt': '', 'negative': '', 'extra': '', 'wildcards': ''} found = '' for line in lines: line = line.strip() @@ -32,6 +32,9 @@ def parse_desc(self, desc): elif line.lower().startswith('extra:'): found = 'extra' params['extra'] = line[6:].strip() + elif line.lower().startswith('wildcards:'): + found = 'wildcards' + params['wildcards'] = line[10:].strip() elif found != '': params[found] += '\n' + line if params['name'] == '': @@ -53,10 +56,11 @@ def create_style(self, params): "title": name, "filename": fn, "preview": self.find_preview(name), - "description": '', + "description": params.get('Description', ''), "prompt": params.get('Prompt', ''), "negative": params.get('Negative prompt', ''), - "extra": '', + "extra": params.get('Extra', ''), + "wildcards": params.get('Wildcards', ''), "local_preview": f"{name}.{shared.opts.samples_format}", } return item @@ -82,6 +86,7 @@ def create_item(self, k): "prompt": getattr(style, 'prompt', ''), "negative": getattr(style, 'negative_prompt', ''), "extra": getattr(style, 'extra', ''), + "wildcards": getattr(style, 'wildcards', ''), "local_preview": f"{fn}.{shared.opts.samples_format}", "onclick": '"' + html.escape(f"""return selectStyle({json.dumps(name)})""") + '"', "mtime": getattr(style, 'mtime', 0), diff --git a/modules/ui_img2img.py b/modules/ui_img2img.py index 8defb34eb..1c6f75953 100644 --- a/modules/ui_img2img.py +++ b/modules/ui_img2img.py @@ -58,10 +58,10 @@ def add_copy_image_controls(tab_name, elem): with gr.Row(variant="compact", elem_id=f"img2img_copy_to_{tab_name}"): for title, name in zip(['➠ Image', '➠ Sketch', '➠ Inpaint', '➠ Composite'], ['img2img', 'sketch', 'inpaint', 'inpaint_sketch']): if name == tab_name: - gr.Button(title, interactive=False) + gr.Button(title, elem_id=f'copy_to_{name}', interactive=False) copy_image_destinations[name] = elem continue - button = gr.Button(title) + button = gr.Button(title, elem_id=f'copy_to_{name}') copy_image_buttons.append((button, name, elem)) with gr.Tabs(elem_id="mode_img2img"): @@ -121,7 +121,7 @@ def update_orig(image, state): with gr.Group(elem_classes="settings-accordion"): steps, sampler_index = ui_sections.create_sampler_inputs('img2img') - resize_mode, resize_name, width, height, scale_by, selected_scale_tab = ui_sections.create_resize_inputs('img2img', [init_img, sketch]) + resize_mode, resize_name, width, height, scale_by, selected_scale_tab = ui_sections.create_resize_inputs('img2img', [init_img, sketch], latent=True) batch_count, batch_size = ui_sections.create_batch_inputs('img2img') seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w = ui_sections.create_seed_inputs('img2img') @@ -134,7 +134,7 @@ def update_orig(image, state): hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio, = ui_sections.create_correction_inputs('img2img') # with gr.Group(elem_id="inpaint_controls", visible=False) as inpaint_controls: - with gr.Accordion(open=True, label="Mask", elem_classes=["small-accordion"], elem_id="img2img_mask_group") as inpaint_controls: + with gr.Accordion(open=False, label="Mask", elem_classes=["small-accordion"], elem_id="img2img_mask_group") as inpaint_controls: with gr.Row(): mask_blur = gr.Slider(label='Blur', minimum=0, maximum=64, step=1, value=4, elem_id="img2img_mask_blur") inpaint_full_res_padding = gr.Slider(label='Padding', minimum=0, maximum=256, step=4, value=32, elem_id="img2img_inpaint_full_res_padding") diff --git a/modules/ui_interrogate.py b/modules/ui_interrogate.py deleted file mode 100644 index 930fe8bb9..000000000 --- a/modules/ui_interrogate.py +++ /dev/null @@ -1,231 +0,0 @@ -import os -import gradio as gr -import torch -from PIL import Image -import modules.generation_parameters_copypaste as parameters_copypaste -from modules import devices, lowvram, shared, paths, ui_common - - -ci = None -low_vram = False - - -class BatchWriter: - def __init__(self, folder): - self.folder = folder - self.csv, self.file = None, None - - def add(self, file, prompt): - txt_file = os.path.splitext(file)[0] + ".txt" - with open(os.path.join(self.folder, txt_file), 'w', encoding='utf-8') as f: - f.write(prompt) - - def close(self): - if self.file is not None: - self.file.close() - - -def get_models(): - import open_clip - return ['/'.join(x) for x in open_clip.list_pretrained()] - - -def load_interrogator(clip_model_name): - from clip_interrogator import Config, Interrogator - global ci # pylint: disable=global-statement - if ci is None: - config = Config(device=devices.get_optimal_device(), cache_path=os.path.join(paths.models_path, 'Interrogator'), clip_model_name=clip_model_name, quiet=True) - if low_vram: - config.apply_low_vram_defaults() - shared.log.info(f'Interrogate load: config={config}') - ci = Interrogator(config) - elif clip_model_name != ci.config.clip_model_name: - ci.config.clip_model_name = clip_model_name - shared.log.info(f'Interrogate load: config={ci.config}') - ci.load_clip_model() - - -def unload(): - if ci is not None: - shared.log.debug('Interrogate offload') - ci.caption_model = ci.caption_model.to(devices.cpu) - ci.clip_model = ci.clip_model.to(devices.cpu) - ci.caption_offloaded = True - ci.clip_offloaded = True - devices.torch_gc() - - -def interrogate(image, mode, caption=None): - shared.log.info(f'Interrogate: image={image} mode={mode} config={ci.config}') - if mode == 'best': - prompt = ci.interrogate(image, caption=caption) - elif mode == 'caption': - prompt = ci.generate_caption(image) if caption is None else caption - elif mode == 'classic': - prompt = ci.interrogate_classic(image, caption=caption) - elif mode == 'fast': - prompt = ci.interrogate_fast(image, caption=caption) - elif mode == 'negative': - prompt = ci.interrogate_negative(image) - else: - raise RuntimeError(f"Unknown mode {mode}") - return prompt - - -def interrogate_image(image, model, mode): - shared.state.begin() - shared.state.job = 'interrogate' - try: - if shared.backend == shared.Backend.ORIGINAL and (shared.cmd_opts.lowvram or shared.cmd_opts.medvram): - lowvram.send_everything_to_cpu() - devices.torch_gc() - load_interrogator(model) - image = image.convert('RGB') - shared.log.info(f'Interrogate: image={image} mode={mode} config={ci.config}') - prompt = interrogate(image, mode) - except Exception as e: - prompt = f"Exception {type(e)}" - shared.log.error(f'Interrogate: {e}') - shared.state.end() - return prompt - - -def interrogate_batch(batch_files, batch_folder, batch_str, model, mode, write): - files = [] - if batch_files is not None: - files += [f.name for f in batch_files] - if batch_folder is not None: - files += [f.name for f in batch_folder] - if batch_str is not None and len(batch_str) > 0 and os.path.exists(batch_str) and os.path.isdir(batch_str): - files += [os.path.join(batch_str, f) for f in os.listdir(batch_str) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp'))] - if len(files) == 0: - shared.log.error('Interrogate batch no images') - return '' - shared.state.begin() - shared.state.job = 'batch interrogate' - prompts = [] - try: - if shared.backend == shared.Backend.ORIGINAL and (shared.cmd_opts.lowvram or shared.cmd_opts.medvram): - lowvram.send_everything_to_cpu() - devices.torch_gc() - load_interrogator(model) - shared.log.info(f'Interrogate batch: images={len(files)} mode={mode} config={ci.config}') - captions = [] - # first pass: generate captions - for file in files: - caption = "" - try: - if shared.state.interrupted: - break - image = Image.open(file).convert('RGB') - caption = ci.generate_caption(image) - except Exception as e: - shared.log.error(f'Interrogate caption: {e}') - finally: - captions.append(caption) - # second pass: interrogate - if write: - writer = BatchWriter(os.path.dirname(files[0])) - for idx, file in enumerate(files): - try: - if shared.state.interrupted: - break - image = Image.open(file).convert('RGB') - prompt = interrogate(image, mode, caption=captions[idx]) - prompts.append(prompt) - if write: - writer.add(file, prompt) - except OSError as e: - shared.log.error(f'Interrogate batch: {e}') - if write: - writer.close() - ci.config.quiet = False - unload() - except Exception as e: - shared.log.error(f'Interrogate batch: {e}') - shared.state.end() - return '\n\n'.join(prompts) - - -def analyze_image(image, model): - load_interrogator(model) - image = image.convert('RGB') - image_features = ci.image_to_features(image) - top_mediums = ci.mediums.rank(image_features, 5) - top_artists = ci.artists.rank(image_features, 5) - top_movements = ci.movements.rank(image_features, 5) - top_trendings = ci.trendings.rank(image_features, 5) - top_flavors = ci.flavors.rank(image_features, 5) - medium_ranks = dict(zip(top_mediums, ci.similarities(image_features, top_mediums))) - artist_ranks = dict(zip(top_artists, ci.similarities(image_features, top_artists))) - movement_ranks = dict(zip(top_movements, ci.similarities(image_features, top_movements))) - trending_ranks = dict(zip(top_trendings, ci.similarities(image_features, top_trendings))) - flavor_ranks = dict(zip(top_flavors, ci.similarities(image_features, top_flavors))) - return medium_ranks, artist_ranks, movement_ranks, trending_ranks, flavor_ranks - - -def create_ui(): - global low_vram # pylint: disable=global-statement - low_vram = shared.cmd_opts.lowvram or shared.cmd_opts.medvram - if not low_vram and torch.cuda.is_available(): - device = devices.get_optimal_device() - vram_total = torch.cuda.get_device_properties(device).total_memory - if vram_total <= 12*1024*1024*1024: - low_vram = True - with gr.Row(elem_id="interrogate_tab"): - with gr.Column(): - with gr.Tab("Image"): - with gr.Row(): - image = gr.Image(type='pil', label="Image") - with gr.Row(): - prompt = gr.Textbox(label="Prompt", lines=3) - with gr.Row(): - medium = gr.Label(label="Medium", num_top_classes=5) - artist = gr.Label(label="Artist", num_top_classes=5) - movement = gr.Label(label="Movement", num_top_classes=5) - trending = gr.Label(label="Trending", num_top_classes=5) - flavor = gr.Label(label="Flavor", num_top_classes=5) - with gr.Row(): - clip_model = gr.Dropdown([], value='ViT-L-14/openai', label='CLIP Model') - ui_common.create_refresh_button(clip_model, get_models, lambda: {"choices": get_models()}, 'refresh_interrogate_models') - mode = gr.Radio(['best', 'fast', 'classic', 'caption', 'negative'], label='Mode', value='best') - with gr.Row(): - btn_interrogate_img = gr.Button("Interrogate", variant='primary') - btn_analyze_img = gr.Button("Analyze", variant='primary') - btn_unload = gr.Button("Unload") - with gr.Row(): - buttons = parameters_copypaste.create_buttons(["txt2img", "img2img", "extras", "control"]) - for tabname, button in buttons.items(): - parameters_copypaste.register_paste_params_button(parameters_copypaste.ParamBinding(paste_button=button, tabname=tabname, source_text_component=prompt, source_image_component=image,)) - btn_interrogate_img.click(interrogate_image, inputs=[image, clip_model, mode], outputs=prompt) - btn_analyze_img.click(analyze_image, inputs=[image, clip_model], outputs=[medium, artist, movement, trending, flavor]) - btn_unload.click(unload) - with gr.Tab("Batch"): - with gr.Row(): - batch_files = gr.File(label="Files", show_label=True, file_count='multiple', file_types=['image'], type='file', interactive=True, height=100) - with gr.Row(): - batch_folder = gr.File(label="Folder", show_label=True, file_count='directory', file_types=['image'], type='file', interactive=True, height=100) - with gr.Row(): - batch_str = gr.Text(label="Folder", value="", interactive=True) - with gr.Row(): - batch = gr.Text(label="Prompts", lines=10) - with gr.Row(): - write = gr.Checkbox(label='Write prompts to files', value=False) - with gr.Row(): - clip_model = gr.Dropdown([], value='ViT-L-14/openai', label='CLIP Model') - ui_common.create_refresh_button(clip_model, get_models, lambda: {"choices": get_models()}, 'refresh_interrogate_models') - with gr.Row(): - btn_interrogate_batch = gr.Button("Interrogate", variant='primary') - btn_interrogate_batch.click(interrogate_batch, inputs=[batch_files, batch_folder, batch_str, clip_model, mode, write], outputs=[batch]) - with gr.Tab("VQA"): - from modules import vqa - with gr.Row(): - vqa_image = gr.Image(type='pil', label="Image") - with gr.Row(): - vqa_question = gr.Textbox(label="Question") - with gr.Row(): - vqa_answer = gr.Textbox(label="Answer", lines=3) - with gr.Row(): - vqa_model = gr.Dropdown(list(vqa.MODELS), value='None', label='VQA Model') - vqa_submit = gr.Button("Interrogate", variant='primary') - vqa_submit.click(vqa.interrogate, inputs=[vqa_question, vqa_image, vqa_model], outputs=[vqa_answer]) diff --git a/modules/ui_javascript.py b/modules/ui_javascript.py index b6201c3cb..c8edb20c7 100644 --- a/modules/ui_javascript.py +++ b/modules/ui_javascript.py @@ -60,6 +60,8 @@ def stylesheet(fn): theme_name = modules.shared.cmd_opts.theme or modules.shared.opts.gradio_theme or '' if theme_name == 'default': theme_name = 'black-teal' + if theme_name == 'modern' or theme_name == 'modern/default': + theme_name = 'modern/sdxl_alpha' if theme_name.startswith('modern/'): theme_name = theme_name[7:] theme_folder = next((e.path for e in modules.extensions.extensions if e.name == 'sdnext-ui-ux'), None) diff --git a/modules/ui_models.py b/modules/ui_models.py index e0b6a0160..a434834d5 100644 --- a/modules/ui_models.py +++ b/modules/ui_models.py @@ -474,9 +474,13 @@ def civit_select1(evt: gr.SelectData, in_data): for model in data: if model['id'] == model_id: for d in model['modelVersions']: - if d.get('images') is not None and len(d['images']) > 0 and len(d['images'][0]['url']) > 0: - preview_img = d['images'][0]['url'] - data2.append([d['id'], d['modelId'], d['name'], d['baseModel'], d['createdAt']]) + try: + if d.get('images') is not None and len(d['images']) > 0 and len(d['images'][0]['url']) > 0: + preview_img = d['images'][0]['url'] + data2.append([d.get('id', None), d.get('modelId', None) or model_id, d.get('name', None), d.get('baseModel', None), d.get('createdAt', None) or d.get('publishedAt', None)]) + except Exception as e: + log.error(f'CivitAI select: model="{in_data[evt.index[0]]}" {e}') + log.error(f'CivitAI version data={type(d)}: {d}') log.debug(f'CivitAI select: model="{in_data[evt.index[0]]}" versions={len(data2)}') return data2, None, preview_img @@ -501,12 +505,12 @@ def civit_select3(evt: gr.SelectData, in_data): log.debug(f'CivitAI select: variant={in_data[evt.index[0]]}') return in_data[evt.index[0]][3], in_data[evt.index[0]][0], gr.update(interactive=True) - def civit_download_model(model_url: str, model_name: str, model_path: str, model_type: str, image_url: str, token: str = None): + def civit_download_model(model_url: str, model_name: str, model_path: str, model_type: str, token: str = None): if model_url is None or len(model_url) == 0: return 'No model selected' try: from modules.modelloader import download_civit_model - res = download_civit_model(model_url, model_name, model_path, model_type, image_url, token) + res = download_civit_model(model_url, model_name, model_path, model_type, token=token) except Exception as e: res = f"CivitAI model downloaded error: model={model_url} {e}" log.error(res) @@ -649,7 +653,7 @@ def is_visible(component): civit_results1.change(fn=is_visible, inputs=[civit_results1], outputs=[civit_results1]) civit_results2.change(fn=is_visible, inputs=[civit_results2], outputs=[civit_results2]) civit_results3.change(fn=is_visible, inputs=[civit_results3], outputs=[civit_results3]) - civit_download_model_btn.click(fn=civit_download_model, inputs=[civit_selected, civit_name, civit_path, civit_model_type, models_image, civit_token], outputs=[models_outcome]) + civit_download_model_btn.click(fn=civit_download_model, inputs=[civit_selected, civit_name, civit_path, civit_model_type, civit_token], outputs=[models_outcome]) civit_previews_btn.click(fn=civit_search_metadata, inputs=[civit_previews_rehash, civit_previews_rehash], outputs=[models_outcome]) with gr.Tab(label="Update"): @@ -760,7 +764,7 @@ def civit_update_download(): model_name = f'{selected_model.name} {selected_model.latest}.safetensors' else: model_name = selected_model.latest_name - return civit_download_model(selected_model.url, model_name, model_path='', model_type='Model', image_url=None) + return civit_download_model(selected_model.url, model_name, model_path='', model_type='Model') civit_update_btn.click(fn=civit_update_metadata, inputs=[], outputs=[civit_results4, models_outcome]) civit_results4.select(fn=civit_update_select, inputs=[civit_results4], outputs=[models_outcome, civit_update_download_btn]) diff --git a/modules/ui_postprocessing.py b/modules/ui_postprocessing.py index 5effd83d3..5322840a8 100644 --- a/modules/ui_postprocessing.py +++ b/modules/ui_postprocessing.py @@ -1,18 +1,18 @@ import json import gradio as gr -from modules import scripts, shared, ui_common, postprocessing, call_queue +from modules import scripts, shared, ui_common, postprocessing, call_queue, interrogate import modules.generation_parameters_copypaste as parameters_copypaste from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call, wrap_gradio_call # pylint: disable=unused-import from modules.extras import run_pnginfo from modules.ui_common import infotext_to_html -def wrap_pnginfo(image): +def submit_info(image): _, geninfo, info = run_pnginfo(image) return infotext_to_html(geninfo), info, geninfo -def submit_click(tab_index, extras_image, image_batch, extras_batch_input_dir, extras_batch_output_dir, show_extras_results, save_output, *script_inputs): +def submit_process(tab_index, extras_image, image_batch, extras_batch_input_dir, extras_batch_output_dir, show_extras_results, save_output, *script_inputs): result_images, geninfo, js_info = postprocessing.run_postprocessing(tab_index, extras_image, image_batch, extras_batch_input_dir, extras_batch_output_dir, show_extras_results, *script_inputs, save_output=save_output) return result_images, geninfo, json.dumps(js_info), '' @@ -22,18 +22,72 @@ def create_ui(): with gr.Row(equal_height=False, variant='compact', elem_classes="extras"): with gr.Column(variant='compact'): with gr.Tabs(elem_id="mode_extras"): - with gr.TabItem('Single Image', id="single_image", elem_id="extras_single_tab") as tab_single: - extras_image = gr.Image(label="Source", source="upload", interactive=True, type="pil", elem_id="extras_image") - with gr.TabItem('Process Batch', id="batch_process", elem_id="extras_batch_process_tab") as tab_batch: + with gr.Tab('Process Image', id="single_image", elem_id="extras_single_tab") as tab_single: + with gr.Row(): + extras_image = gr.Image(label="Source", source="upload", interactive=True, type="pil", elem_id="extras_image") + with gr.Row(elem_id='copy_buttons_process'): + copy_process_buttons = parameters_copypaste.create_buttons(["txt2img", "img2img", "inpaint", "control"]) + with gr.Tab('Process Batch', id="batch_process", elem_id="extras_batch_process_tab") as tab_batch: image_batch = gr.Files(label="Batch process", interactive=True, elem_id="extras_image_batch") - with gr.TabItem('Process Folder', id="batch_from_directory", elem_id="extras_batch_directory_tab") as tab_batch_dir: + with gr.Tab('Process Folder', id="batch_from_directory", elem_id="extras_batch_directory_tab") as tab_batch_dir: extras_batch_input_dir = gr.Textbox(label="Input directory", **shared.hide_dirs, placeholder="A directory on the same machine where the server is running.", elem_id="extras_batch_input_dir") extras_batch_output_dir = gr.Textbox(label="Output directory", **shared.hide_dirs, placeholder="Leave blank to save images to the default path.", elem_id="extras_batch_output_dir") show_extras_results = gr.Checkbox(label='Show result images', value=True, elem_id="extras_show_extras_results") - with gr.Row(): - buttons = parameters_copypaste.create_buttons(["txt2img", "img2img", "inpaint", "control"]) - with gr.Row(): - save_output = gr.Checkbox(label='Save output', value=True, elem_id="extras_save_output") + + with gr.Tab("Interrogate Image"): + with gr.Row(): + image = gr.Image(type='pil', label="Image") + with gr.Row(): + prompt = gr.Textbox(label="Prompt", lines=3) + with gr.Row(elem_id="interrogate_labels"): + medium = gr.Label(elem_id="interrogate_label_medium", label="Medium", num_top_classes=5) + artist = gr.Label(elem_id="interrogate_label_artist", label="Artist", num_top_classes=5) + movement = gr.Label(elem_id="interrogate_label_movement", label="Movement", num_top_classes=5) + trending = gr.Label(elem_id="interrogate_label_trending", label="Trending", num_top_classes=5) + flavor = gr.Label(elem_id="interrogate_label_flavor", label="Flavor", num_top_classes=5) + with gr.Row(): + clip_model = gr.Dropdown([], value='ViT-L-14/openai', label='CLIP Model') + ui_common.create_refresh_button(clip_model, interrogate.get_clip_models, lambda: {"choices": interrogate.get_clip_models()}, 'refresh_interrogate_models') + mode = gr.Radio(['best', 'fast', 'classic', 'caption', 'negative'], label='Mode', value='best') + with gr.Row(elem_id='interrogate_buttons_image'): + btn_interrogate_img = gr.Button("Interrogate", elem_id="interrogate_btn_interrogate", variant='primary') + btn_analyze_img = gr.Button("Analyze", elem_id="interrogate_btn_analyze", variant='primary') + btn_unload = gr.Button("Unload", elem_id="interrogate_btn_unload") + with gr.Row(elem_id='copy_buttons_interrogate'): + copy_interrogate_buttons = parameters_copypaste.create_buttons(["txt2img", "img2img", "extras", "control"]) + btn_interrogate_img.click(interrogate.interrogate_image, inputs=[image, clip_model, mode], outputs=prompt) + btn_analyze_img.click(interrogate.analyze_image, inputs=[image, clip_model], outputs=[medium, artist, movement, trending, flavor]) + btn_unload.click(interrogate.unload_clip_model) + with gr.Tab("Interrogate Batch"): + with gr.Row(): + batch_files = gr.File(label="Files", show_label=True, file_count='multiple', file_types=['image'], type='file', interactive=True, height=100) + with gr.Row(): + batch_folder = gr.File(label="Folder", show_label=True, file_count='directory', file_types=['image'], type='file', interactive=True, height=100) + with gr.Row(): + batch_str = gr.Text(label="Folder", value="", interactive=True) + with gr.Row(): + batch = gr.Text(label="Prompts", lines=10) + with gr.Row(): + clip_model = gr.Dropdown([], value='ViT-L-14/openai', label='CLIP Model') + ui_common.create_refresh_button(clip_model, interrogate.get_clip_models, lambda: {"choices": interrogate.get_clip_models()}, 'refresh_interrogate_models') + with gr.Row(elem_id='interrogate_buttons_batch'): + btn_interrogate_batch = gr.Button("Interrogate", elem_id="interrogate_btn_interrogate", variant='primary') + with gr.Tab("Visual Query"): + from modules import vqa + with gr.Row(): + vqa_image = gr.Image(type='pil', label="Image") + with gr.Row(): + vqa_question = gr.Textbox(label="Question", placeholder="Descirbe the image") + with gr.Row(): + vqa_answer = gr.Textbox(label="Answer", lines=3) + with gr.Row(elem_id='interrogate_buttons_query'): + vqa_model = gr.Dropdown(list(vqa.MODELS), value='Moondream 2', label='VQA Model') + vqa_submit = gr.Button("Interrogate", elem_id="interrogate_btn_interrogate", variant='primary') + vqa_submit.click(vqa.interrogate, inputs=[vqa_question, vqa_image, vqa_model], outputs=[vqa_answer]) + + with gr.Row(): + save_output = gr.Checkbox(label='Save output', value=True, elem_id="extras_save_output") + script_inputs = scripts.scripts_postproc.setup_ui() with gr.Column(): id_part = 'extras' @@ -47,20 +101,23 @@ def create_ui(): gr.HTML('File metadata') exif_info = gr.HTML(elem_id="pnginfo_html_info") gen_info = gr.Text(elem_id="pnginfo_gen_info", visible=False) - for tabname, button in buttons.items(): + for tabname, button in copy_process_buttons.items(): parameters_copypaste.register_paste_params_button(parameters_copypaste.ParamBinding(paste_button=button, tabname=tabname, source_text_component=gen_info, source_image_component=extras_image)) + for tabname, button in copy_interrogate_buttons.items(): + parameters_copypaste.register_paste_params_button(parameters_copypaste.ParamBinding(paste_button=button, tabname=tabname, source_text_component=prompt, source_image_component=image,)) + tab_single.select(fn=lambda: 0, inputs=[], outputs=[tab_index]) tab_batch.select(fn=lambda: 1, inputs=[], outputs=[tab_index]) tab_batch_dir.select(fn=lambda: 2, inputs=[], outputs=[tab_index]) extras_image.change( - fn=wrap_gradio_call(wrap_pnginfo), + fn=wrap_gradio_call(submit_info), inputs=[extras_image], outputs=[html_info_formatted, exif_info, gen_info], ) submit.click( _js="submit_postprocessing", - fn=call_queue.wrap_gradio_gpu_call(submit_click, extra_outputs=[None, '']), + fn=call_queue.wrap_gradio_gpu_call(submit_process, extra_outputs=[None, '']), inputs=[ tab_index, extras_image, @@ -78,6 +135,11 @@ def create_ui(): html_log, ] ) + btn_interrogate_batch.click( + fn=interrogate.interrogate_batch, + inputs=[batch_files, batch_folder, batch_str, clip_model, mode, save_output], + outputs=[batch], + ) parameters_copypaste.add_paste_fields("extras", extras_image, None) diff --git a/modules/ui_sections.py b/modules/ui_sections.py index 3fa9d9f4c..5b4538b98 100644 --- a/modules/ui_sections.py +++ b/modules/ui_sections.py @@ -55,6 +55,34 @@ def parse_style(styles): return prompt, styles, negative_prompt, submit, button_paste, button_extra, token_counter, token_button, negative_token_counter, negative_token_button +def ar_change(ar, width, height): + if ar == 'AR': + return gr.update(interactive=True), gr.update(interactive=True) + try: + (w, h) = [float(x) for x in ar.split(':')] + except Exception as e: + shared.log.warning(f"Invalid aspect ratio: {ar} {e}") + return gr.update(interactive=True), gr.update(interactive=True) + if w > h: + return gr.update(interactive=True, value=width), gr.update(interactive=False, value=int(width * h / w)) + elif w < h: + return gr.update(interactive=False, value=int(height * w / h)), gr.update(interactive=True, value=height) + else: + return gr.update(interactive=True, value=width), gr.update(interactive=False, value=width) + + +def create_resolution_inputs(tab): + width = gr.Slider(minimum=64, maximum=4096, step=8, label="Width", value=512, elem_id=f"{tab}_width") + height = gr.Slider(minimum=64, maximum=4096, step=8, label="Height", value=512, elem_id=f"{tab}_height") + ar_list = ['AR'] + [x.strip() for x in shared.opts.aspect_ratios.split(',') if x.strip() != ''] + ar_dropdown = gr.Dropdown(show_label=False, interactive=True, choices=ar_list, value=ar_list[0], elem_id=f"{tab}_ar", elem_classes=["ar-dropdown"]) + for c in [ar_dropdown, width, height]: + c.change(fn=ar_change, inputs=[ar_dropdown, width, height], outputs=[width, height], show_progress=False) + res_switch_btn = ToolButton(value=ui_symbols.switch, elem_id=f"{tab}_res_switch_btn", label="Switch dims") + res_switch_btn.click(lambda w, h: (h, w), inputs=[width, height], outputs=[width, height], show_progress=False) + return width, height + + def create_interrogate_buttons(tab): button_interrogate = gr.Button(ui_symbols.int_clip, elem_id=f"{tab}_interrogate", elem_classes=['interrogate-clip']) button_deepbooru = gr.Button(ui_symbols.int_blip, elem_id=f"{tab}_deepbooru", elem_classes=['interrogate-blip']) @@ -74,8 +102,6 @@ def create_batch_inputs(tab): with gr.Row(elem_id=f"{tab}_row_batch"): batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1, elem_id=f"{tab}_batch_count") batch_size = gr.Slider(minimum=1, maximum=32, step=1, label='Batch size', value=1, elem_id=f"{tab}_batch_size") - batch_switch_btn = ToolButton(value=ui_symbols.switch, elem_id=f"{tab}_batch_switch_btn", label="Switch dims") - batch_switch_btn.click(lambda w, h: (h, w), inputs=[batch_count, batch_size], outputs=[batch_count, batch_size], show_progress=False) return batch_count, batch_size @@ -114,7 +140,7 @@ def create_advanced_inputs(tab): gr.HTML('
') with gr.Row(elem_id=f"{tab}_advanced_options"): full_quality = gr.Checkbox(label='Full quality', value=True, elem_id=f"{tab}_full_quality") - restore_faces = gr.Checkbox(label='Face restore', value=False, visible=len(shared.face_restorers) > 1, elem_id=f"{tab}_restore_faces") + restore_faces = gr.Checkbox(label='Face restore', value=False, elem_id=f"{tab}_restore_faces") tiling = gr.Checkbox(label='Tiling', value=False, elem_id=f"{tab}_tiling", visible=True) return cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, diffusers_sag_scale, cfg_end, full_quality, restore_faces, tiling @@ -200,10 +226,10 @@ def create_hires_inputs(tab): hr_sampler_index = gr.Dropdown(label='Secondary sampler', elem_id=f"{tab}_sampling_alt", choices=[x.name for x in sd_samplers.samplers], value='Default', type="index") with gr.Row(elem_id=f"{tab}_hires_row2"): hr_second_pass_steps = gr.Slider(minimum=0, maximum=99, step=1, label='HiRes steps', elem_id=f"{tab}_steps_alt", value=20) - denoising_strength = gr.Slider(minimum=0.0, maximum=0.99, step=0.01, label='Strength', value=0.5, elem_id=f"{tab}_denoising_strength") + denoising_strength = gr.Slider(minimum=0.0, maximum=0.99, step=0.01, label='Strength', value=0.3, elem_id=f"{tab}_denoising_strength") with gr.Group(visible=shared.backend == shared.Backend.DIFFUSERS): with gr.Row(elem_id=f"{tab}_refiner_row1", variant="compact"): - refiner_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Refiner start', value=0.8, elem_id=f"{tab}_refiner_start") + refiner_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Refiner start', value=0.0, elem_id=f"{tab}_refiner_start") refiner_steps = gr.Slider(minimum=0, maximum=99, step=1, label="Refiner steps", elem_id=f"{tab}_refiner_steps", value=10) with gr.Row(elem_id=f"{tab}_refiner_row3", variant="compact"): refiner_prompt = gr.Textbox(value='', label='Secondary prompt', elem_id=f"{tab}_refiner_prompt") @@ -212,23 +238,14 @@ def create_hires_inputs(tab): return enable_hr, hr_sampler_index, denoising_strength, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative -def create_resize_inputs(tab, images, scale_visible=True, mode=None, accordion=True, latent=False): - def resize_from_to_html(width, height, scale_by): - target_width = int(width * scale_by) - target_height = int(height * scale_by) - if not target_width or not target_height: - return "Hires resize: no image selected" - return f"Hires resize: from {width}x{height} to {target_width}x{target_height}" - +def create_resize_inputs(tab, images, accordion=True, latent=False): dummy_component = gr.Number(visible=False, value=0) with gr.Accordion(open=False, label="Resize", elem_classes=["small-accordion"], elem_id=f"{tab}_resize_group") if accordion else gr.Group(): + # with gr.Row(): + # resize_mode = gr.Radio(label="Mode", elem_id=f"{tab}_resize_mode", choices=shared.resize_modes, type="index", value='Fixed') with gr.Row(): - if mode is not None: - resize_mode = gr.Radio(label="Resize mode", elem_id=f"{tab}_resize_mode", choices=shared.resize_modes, type="index", value=mode, visible=False) - else: - resize_mode = gr.Radio(label="Resize mode", elem_id=f"{tab}_resize_mode", choices=shared.resize_modes, type="index", value='None') - with gr.Row(): - resize_name = gr.Dropdown(label="Resize method", elem_id=f"{tab}_resize_name", choices=([] if not latent else list(shared.latent_upscale_modes)) + [x.name for x in shared.sd_upscalers], value=shared.latent_upscale_default_mode) + resize_mode = gr.Dropdown(label="Mode", elem_id=f"{tab}_resize_mode", choices=shared.resize_modes, type="index", value='Fixed') + resize_name = gr.Dropdown(label="Method", elem_id=f"{tab}_resize_name", choices=([] if not latent else list(shared.latent_upscale_modes)) + [x.name for x in shared.sd_upscalers], value=shared.latent_upscale_default_mode) ui_common.create_refresh_button(resize_name, modelloader.load_upscalers, lambda: {"choices": modelloader.load_upscalers()}, 'refresh_upscalers') with gr.Row(visible=True) as _resize_group: @@ -241,25 +258,18 @@ def resize_from_to_html(width, height, scale_by): with gr.Row(): width = gr.Slider(minimum=64, maximum=8192, step=8, label="Width", value=512, elem_id=f"{tab}_width") height = gr.Slider(minimum=64, maximum=8192, step=8, label="Height", value=512, elem_id=f"{tab}_height") + ar_list = ['AR'] + [x.strip() for x in shared.opts.aspect_ratios.split(',') if x.strip() != ''] + ar_dropdown = gr.Dropdown(show_label=False, interactive=True, choices=ar_list, value=ar_list[0], elem_id=f"{tab}_ar", elem_classes=["ar-dropdown"]) + for c in [ar_dropdown, width, height]: + c.change(fn=ar_change, inputs=[ar_dropdown, width, height], outputs=[width, height], show_progress=False) res_switch_btn = ToolButton(value=ui_symbols.switch, elem_id=f"{tab}_res_switch_btn") res_switch_btn.click(lambda w, h: (h, w), inputs=[width, height], outputs=[width, height], show_progress=False) detect_image_size_btn = ToolButton(value=ui_symbols.detect, elem_id=f"{tab}_detect_image_size_btn") detect_image_size_btn.click(fn=lambda w, h, _: (w or gr.update(), h or gr.update()), _js=f'currentImageResolution{tab}', inputs=[dummy_component, dummy_component, dummy_component], outputs=[width, height], show_progress=False) - with gr.Tab(label="Scale") as tab_scale_by: scale_by = gr.Slider(minimum=0.05, maximum=8.0, step=0.05, label="Scale", value=1.0, elem_id=f"{tab}_scale") - if scale_visible: - with gr.Row(): - scale_by_html = gr.HTML(resize_from_to_html(0, 0, 0.0), elem_id=f"{tab}_scale_resolution_preview") - gr.Slider(label="Unused", elem_id=f"{tab}_unused_scale_by_slider") - button_update_resize_to = gr.Button(visible=False, elem_id=f"{tab}_update_resize_to") - on_change_args = dict(fn=resize_from_to_html, _js=f'currentImageResolution{tab}', inputs=[dummy_component, dummy_component, scale_by], outputs=scale_by_html, show_progress=False) - scale_by.release(**on_change_args) - button_update_resize_to.click(**on_change_args) - for component in images: component.change(fn=lambda: None, _js="updateImg2imgResizeToTextAfterChangingImage", inputs=[], outputs=[], show_progress=False) - tab_scale_to.select(fn=lambda: 0, inputs=[], outputs=[selected_scale_tab]) tab_scale_by.select(fn=lambda: 1, inputs=[], outputs=[selected_scale_tab]) # resize_mode.change(fn=lambda x: gr.update(visible=x != 0), inputs=[resize_mode], outputs=[_resize_group]) diff --git a/modules/ui_txt2img.py b/modules/ui_txt2img.py index 7a58acbb8..18b4db8ae 100644 --- a/modules/ui_txt2img.py +++ b/modules/ui_txt2img.py @@ -1,7 +1,7 @@ import gradio as gr from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call -from modules import timer, shared, ui_common, ui_symbols, ui_sections, generation_parameters_copypaste -from modules.ui_components import ToolButton +from modules import timer, shared, ui_common, ui_sections, generation_parameters_copypaste +from modules.ui_components import ToolButton # pylint: disable=unused-import def calc_resolution_hires(width, height, hr_scale, hr_resize_x, hr_resize_y, hr_upscaler): @@ -35,10 +35,7 @@ def create_ui(): with gr.Column(variant='compact', elem_id="txt2img_settings"): with gr.Row(): - width = gr.Slider(minimum=64, maximum=4096, step=8, label="Width", value=512, elem_id="txt2img_width") - height = gr.Slider(minimum=64, maximum=4096, step=8, label="Height", value=512, elem_id="txt2img_height") - res_switch_btn = ToolButton(value=ui_symbols.switch, elem_id="txt2img_res_switch_btn", label="Switch dims") - res_switch_btn.click(lambda w, h: (h, w), inputs=[width, height], outputs=[width, height], show_progress=False) + width, height = ui_sections.create_resolution_inputs('txt2img') with gr.Group(elem_classes="settings-accordion"): diff --git a/modules/upscaler.py b/modules/upscaler.py index 70e61384e..2f66c5d34 100644 --- a/modules/upscaler.py +++ b/modules/upscaler.py @@ -218,7 +218,11 @@ def compile_upscaler(model): if "Upscaler" in shared.opts.cuda_compile and shared.opts.cuda_compile_backend != 'none': import torch._dynamo # pylint: disable=unused-import,redefined-outer-name torch._dynamo.reset() # pylint: disable=protected-access - shared.log.debug(f"Upscaler compile available backends: {torch._dynamo.list_backends()}") # pylint: disable=protected-access + if shared.opts.cuda_compile_backend not in torch._dynamo.list_backends(): # pylint: disable=protected-access + shared.log.warning(f"Upscaler compile not available: backend={shared.opts.cuda_compile_backend} available={torch._dynamo.list_backends()}") # pylint: disable=protected-access + return model + else: + shared.log.info(f"Upscaler compile: backend={shared.opts.cuda_compile_backend} available={torch._dynamo.list_backends()}") # pylint: disable=protected-access if shared.opts.cuda_compile_backend == "openvino_fx": from modules.intel.openvino import openvino_fx # pylint: disable=unused-import @@ -241,11 +245,8 @@ def compile_upscaler(model): shared.log.error(f"Torch inductor config error: {e}") t0 = time.time() - model = torch.compile(model, mode=shared.opts.cuda_compile_mode, backend=shared.opts.cuda_compile_backend, fullgraph=shared.opts.cuda_compile_fullgraph) # pylint: disable=attribute-defined-outside-init - setup_logging() # compile messes with logging so reset is needed - t1 = time.time() shared.log.info(f"Upscaler compile: time={t1-t0:.2f}") except Exception as e: diff --git a/modules/vqa.py b/modules/vqa.py index 8a9aa3415..3de7bef91 100644 --- a/modules/vqa.py +++ b/modules/vqa.py @@ -8,7 +8,7 @@ model = None loaded: str = None MODELS = { - "None": None, + "Moondream 2": "vikhyatk/moondream2", # 3.7GB "GIT TextCaps Base": "microsoft/git-base-textcaps", # 0.7GB "GIT VQA Base": "microsoft/git-base-vqav2", # 0.7GB "GIT VQA Large": "microsoft/git-large-vqav2", # 1.6GB @@ -40,7 +40,6 @@ def git(question: str, image: Image.Image, repo: str = None): generated_ids = model.generate(**git_dict) response = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] - model.to(devices.cpu) shared.log.debug(f'VQA: response={response}') return response @@ -80,7 +79,6 @@ def vilt(question: str, image: Image.Image, repo: str = None): idx = logits.argmax(-1).item() response = model.config.id2label[idx] - model.to(devices.cpu) shared.log.debug(f'VQA: response={response}') return response @@ -102,25 +100,50 @@ def pix(question: str, image: Image.Image, repo: str = None): outputs = model.generate(**inputs) response = processor.decode(outputs[0], skip_special_tokens=True) - model.to(devices.cpu) shared.log.debug(f'VQA: response={response}') return response +def moondream(question: str, image: Image.Image, repo: str = None): + global processor, model, loaded # pylint: disable=global-statement + if model is None or loaded != repo: + model = transformers.AutoModelForCausalLM.from_pretrained(repo, trust_remote_code=True) # revision = "2024-03-05" + processor = transformers.AutoTokenizer.from_pretrained(repo) # revision = "2024-03-05" + loaded = repo + model.eval() + model.to(devices.device, devices.dtype) + shared.log.debug(f'VQA: class={model.__class__.__name__} processor={processor.__class__} model={repo}') + + if len(question) < 2: + question = "Describe the image." + encoded = model.encode_image(image) + with devices.inference_context(): + response = model.answer_question(encoded, question, processor) + + shared.log.debug(f'VQA: response="{response}"') + return response + + def interrogate(vqa_question, vqa_image, vqa_model): vqa_model = MODELS.get(vqa_model, None) - shared.log.debug(f'VQA: model="{vqa_model}" question={vqa_question} image={vqa_image}') + shared.log.debug(f'VQA: model="{vqa_model}" question="{vqa_question}" image={vqa_image}') if vqa_image is None: - return 'no image provided' + answer = 'no image provided' if vqa_model is None: - return 'no model selected' + answer = 'no model selected' if 'git' in vqa_model.lower(): - return git(vqa_question, vqa_image, vqa_model) + answer = git(vqa_question, vqa_image, vqa_model) if 'vilt' in vqa_model.lower(): - return vilt(vqa_question, vqa_image, vqa_model) + answer = vilt(vqa_question, vqa_image, vqa_model) if 'blip' in vqa_model.lower(): - return blip(vqa_question, vqa_image, vqa_model) + answer = blip(vqa_question, vqa_image, vqa_model) if 'pix' in vqa_model.lower(): - return pix(vqa_question, vqa_image, vqa_model) + answer = pix(vqa_question, vqa_image, vqa_model) + if 'moondream2' in vqa_model.lower(): + answer = moondream(vqa_question, vqa_image, vqa_model) else: - return 'unknown model' + answer = 'unknown model' + if model is not None: + model.to(devices.cpu) + devices.torch_gc() + return answer diff --git a/modules/zluda.py b/modules/zluda.py index 6eb6f06bc..6d2395043 100644 --- a/modules/zluda.py +++ b/modules/zluda.py @@ -1,9 +1,16 @@ import platform import torch +from torch._prims_common import DeviceLikeType from modules import shared, devices -def test(device: torch.device): +def is_zluda(device: DeviceLikeType): + device = torch.device(device) + return torch.cuda.get_device_name(device).endswith("[ZLUDA]") + + +def test(device: DeviceLikeType): + device = torch.device(device) try: ten1 = torch.randn((2, 4,), device=device) ten2 = torch.randn((4, 8,), device=device) @@ -15,7 +22,7 @@ def test(device: torch.device): def initialize_zluda(): device = devices.get_optimal_device() - if platform.system() == "Windows" and devices.cuda_ok and torch.cuda.get_device_name(device).endswith("[ZLUDA]"): + if platform.system() == "Windows" and devices.cuda_ok and is_zluda(device): torch.backends.cudnn.enabled = False torch.backends.cuda.enable_flash_sdp(False) torch.backends.cuda.enable_math_sdp(True) diff --git a/pyproject.toml b/pyproject.toml index 780dc95be..6bbb79fc5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,6 @@ exclude = [ "modules/control/proc/leres/", "modules/control/units/*_model.py", "modules/control/units/*_pipe.py", - "modules/pipelines/*.py", "modules/xadapter/*.py", "modules/tcd/*.py", ] diff --git a/requirements.txt b/requirements.txt index 996f09d40..f15b40793 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,12 +42,12 @@ clip-interrogator==0.6.0 antlr4-python3-runtime==4.9.3 requests==2.31.0 tqdm==4.66.1 -accelerate==0.27.2 +accelerate==0.28.0 opencv-contrib-python-headless==4.9.0.80 -diffusers==0.26.3 +diffusers==0.27.0 einops==0.4.1 gradio==3.43.2 -huggingface_hub==0.20.3 +huggingface_hub==0.21.4 numexpr==2.8.8 numpy==1.26.4 numba==0.59.0 @@ -55,7 +55,7 @@ pandas protobuf==3.20.3 pytorch_lightning==1.9.4 tokenizers==0.15.2 -transformers==4.38.1 +transformers==4.38.2 tomesd==0.1.3 urllib3==1.26.18 Pillow==10.2.0 diff --git a/scripts/differential_diffusion.py b/scripts/differential_diffusion.py index f0cfbf2eb..2d49f3ddb 100644 --- a/scripts/differential_diffusion.py +++ b/scripts/differential_diffusion.py @@ -1897,10 +1897,8 @@ def depthmap(self, image_init: Image.Image, image_map: Image.Image, model: str, from modules.control.proc.dpt import DPTDetector if image_init is None: return None, None, None - image_map = None if image_map is not None: image_map = image_map.resize(image_init.size, Image.Resampling.LANCZOS) - image_init = image_map if model != 'None': if detector is None: detector = DPTDetector() diff --git a/scripts/face-details.py b/scripts/face-details.py new file mode 100644 index 000000000..f10dbb72c --- /dev/null +++ b/scripts/face-details.py @@ -0,0 +1,168 @@ +import os +import numpy as np +from PIL import Image, ImageDraw +from modules import shared, processing +from modules.face_restoration import FaceRestoration + + +class YoLoResult: + def __init__(self, score: float, box: list[int], mask: Image.Image = None, size: float = 0): + self.score = score + self.box = box + self.mask = mask + self.size = size + + +class FaceRestorerYolo(FaceRestoration): + def name(self): + return "Face HiRes" + + def __init__(self): + from modules import paths + self.model = None + self.model_dir = os.path.join(paths.models_path, 'yolo') + self.model_name = 'yolov8n-face.pt' + self.model_url = 'https://github.com/akanametov/yolov8-face/releases/download/v0.0.0/yolov8n-face.pt' + # self.model_name = 'yolov9-c-face.pt' + # self.model_url = 'https://github.com/akanametov/yolov9-face/releases/download/1.0/yolov9-c-face.pt' + + def dependencies(self): + import installer + installer.install('ultralytics', ignore=False) + + def predict( + self, + image: Image.Image, + offload: bool = False, + conf: float = 0.5, + iou: float = 0.5, + imgsz: int = 640, + half: bool = True, + device = 'cuda', + n: int = 5, + augment: bool = True, + agnostic: bool = False, + retina: bool = False, + mask: bool = True, + ) -> list[YoLoResult]: + + self.model.to(device) + predictions = self.model.predict( + source=[image], + stream=False, + verbose=False, + conf=conf, + iou=iou, + imgsz=imgsz, + half=half, + device=device, + max_det=n, + augment=augment, + agnostic_nms=agnostic, + retina_masks=retina, + ) + if offload: + self.model.to('cpu') + result = [] + for prediction in predictions: + boxes = prediction.boxes.xyxy.detach().int().cpu().numpy() if prediction.boxes is not None else [] + scores = prediction.boxes.conf.detach().float().cpu().numpy() if prediction.boxes is not None else [] + for score, box in zip(scores, boxes): + box = box.tolist() + mask_image = None + size = (box[2] - box[0]) * (box[3] - box[1]) / (image.width * image.height) + if mask: + mask_image = image.copy() + mask_image = Image.new('L', image.size, 0) + draw = ImageDraw.Draw(mask_image) + draw.rectangle(box, fill="white", outline=None, width=0) + result.append(YoLoResult(score=score, box=box, mask=mask_image, size=size)) + return result + + def load(self): + from modules import modelloader + self.dependencies() + if self.model is None: + model_file = modelloader.load_file_from_url(url=self.model_url, model_dir=self.model_dir, file_name=self.model_name) + if model_file is not None: + shared.log.info(f'Loading: type=FaceHires model={model_file}') + from ultralytics import YOLO # pylint: disable=import-outside-toplevel + self.model = YOLO(model_file) + + def restore(self, np_image, p: processing.StableDiffusionProcessing = None): + from modules import devices, processing_class + if not hasattr(p, 'facehires'): + p.facehires = 0 + if np_image is None or p.facehires >= p.batch_size * p.n_iter: + return np_image + self.load() + if self.model is None: + shared.log.error(f"Model load: type=FaceHires model='{self.model_name}' dir={self.model_dir} url={self.model_url}") + return np_image + image = Image.fromarray(np_image) + faces = self.predict(image, mask=True, device=devices.device, offload=shared.opts.face_restoration_unload) + if len(faces) == 0: + return np_image + + # create backups + orig_apply_overlay = shared.opts.mask_apply_overlay + orig_p = p.__dict__.copy() + orig_cls = p.__class__ + + pp = None + shared.opts.data['mask_apply_overlay'] = True + args = { + 'batch_size': 1, + 'n_iter': 1, + 'inpaint_full_res': True, + 'inpainting_mask_invert': 0, + 'inpainting_fill': 1, # no fill + 'sampler_name': orig_p.get('hr_sampler_name', 'default'), + 'steps': orig_p.get('hr_second_pass_steps', 0), + 'negative_prompt': orig_p.get('refiner_negative', ''), + 'denoising_strength': shared.opts.facehires_strength if shared.opts.facehires_strength > 0 else orig_p.get('denoising_strength', 0.3), + 'styles': [], + 'prompt': orig_p.get('refiner_prompt', ''), + # TODO facehires expose as tunable + 'mask_blur': 10, + 'inpaint_full_res_padding': 15, + 'restore_faces': True, + } + p = processing_class.switch_class(p, processing.StableDiffusionProcessingImg2Img, args) + p.facehires += 1 # set flag to avoid recursion + + if p.steps < 1: + p.steps = orig_p.get('steps', 0) + if len(p.prompt) == 0: + p.prompt = orig_p.get('all_prompts', [''])[0] + if len(p.negative_prompt) == 0: + p.negative_prompt = orig_p.get('all_negative_prompts', [''])[0] + + shared.log.debug(f'Face HiRes: faces={[f.__dict__ for f in faces]} strength={p.denoising_strength} blur={p.mask_blur} padding={p.inpaint_full_res_padding} steps={p.steps}') + for face in faces: + if face.mask is None: + continue + if face.size < 0.0002 or face.size > 0.8: + shared.log.debug(f'Face HiRes skip: {face.__dict__}') + continue + p.init_images = [image] + p.image_mask = [face.mask] + p.recursion = True + pp = processing.process_images_inner(p) + del p.recursion + p.overlay_images = None # skip applying overlay twice + if pp is not None and pp.images is not None and len(pp.images) > 0: + image = pp.images[0] # update image to be reused for next face + + # restore pipeline + p = processing_class.switch_class(p, orig_cls, orig_p) + p.init_images = getattr(orig_p, 'init_images', None) + p.image_mask = getattr(orig_p, 'image_mask', None) + shared.opts.data['mask_apply_overlay'] = orig_apply_overlay + np_image = np.array(image) + # shared.log.debug(f'Face HiRes complete: faces={len(faces)} time={t1-t0:.3f}') + return np_image + + +yolo = FaceRestorerYolo() +shared.face_restorers.append(yolo) diff --git a/scripts/ipadapter.py b/scripts/ipadapter.py index 5447eefd0..3d37e2ead 100644 --- a/scripts/ipadapter.py +++ b/scripts/ipadapter.py @@ -72,14 +72,14 @@ def process(self, p: processing.StableDiffusionProcessing, *args): # pylint: dis return args = list(args) units = args.pop(0) - if p.ip_adapter_names == []: + if getattr(p, 'ip_adapter_names', []) == []: p.ip_adapter_names = args[:MAX_ADAPTERS][:units] - if p.ip_adapter_scales == [0.0]: + if getattr(p, 'ip_adapter_scales', [0.0]) == [0.0]: p.ip_adapter_scales = args[MAX_ADAPTERS:MAX_ADAPTERS*2][:units] - if p.ip_adapter_images == []: + if getattr(p, 'ip_adapter_images', []) == []: p.ip_adapter_images = args[MAX_ADAPTERS*2:MAX_ADAPTERS*3][:units] - if p.ip_adapter_starts == [0.0]: + if getattr(p, 'ip_adapter_starts', [0.0]) == [0.0]: p.ip_adapter_starts = args[MAX_ADAPTERS*3:MAX_ADAPTERS*4][:units] - if p.ip_adapter_ends == [1.0]: + if getattr(p, 'ip_adapter_ends', [1.0]) == [1.0]: p.ip_adapter_ends = args[MAX_ADAPTERS*4:MAX_ADAPTERS*5][:units] - # ipadapter.apply(shared.sd_model, p, adapter_name, scale, image) # called directly from processing.process_images_inner + # ipadapter.apply(shared.sd_model, p, p.ip_adapter_names, p.ip_adapter_scales, p.ip_adapter_starts, p.ip_adapter_ends, p.ip_adapter_images) # called directly from processing.process_images_inner diff --git a/scripts/ledits.py b/scripts/ledits.py new file mode 100644 index 000000000..71ed2c300 --- /dev/null +++ b/scripts/ledits.py @@ -0,0 +1,101 @@ +import diffusers +import gradio as gr +from modules import scripts, processing, shared, devices, sd_models + + +class Script(scripts.Script): + def title(self): + return 'LEdits++' + + def show(self, is_img2img): + return is_img2img if shared.backend == shared.Backend.DIFFUSERS else False + + # return signature is array of gradio components + def ui(self, _is_img2img): + with gr.Row(): + gr.HTML('  LEdits++
') + with gr.Row(): + edit_start = gr.Slider(label='Edit start', minimum=0.0, maximum=1.0, step=0.01, value=0.1) + edit_stop = gr.Slider(label='Edit stop', minimum=0.0, maximum=1.0, step=0.01, value=1.0) + intersect_mask = gr.Checkbox(label='Smooth mask', value=True) + with gr.Row(): + prompt1 = gr.Textbox(show_label=False, placeholder='Positive prompt') + scale1 = gr.Slider(label='Scale', minimum=0.0, maximum=1.0, step=0.01, value=0.5) + threshold1 = gr.Slider(label='Threshold', minimum=0.0, maximum=1.0, step=0.01, value=0.9) + with gr.Row(): + prompt2 = gr.Textbox(show_label=False, placeholder='Negative prompt') + scale2 = gr.Slider(label='Scale', minimum=0.0, maximum=1.0, step=0.01, value=0.5) + threshold2 = gr.Slider(label='Threshold', minimum=0.0, maximum=1.0, step=0.01, value=0.9) + return [edit_start, edit_stop, intersect_mask, prompt1, scale1, threshold1, prompt2, scale2, threshold2] + + def run(self, p: processing.StableDiffusionProcessing, edit_start, edit_stop, intersect_mask, prompt1, scale1, threshold1, prompt2, scale2, threshold2): # pylint: disable=arguments-differ, unused-argument + image = getattr(p, 'init_images', None) + if len(prompt1) == 0 and len(prompt2) == 0: + shared.log.error('LEdits: no prompts') + return None + if image is None or len(image) == 0: + shared.log.error('LEdits: no init_images') + return None + if shared.sd_model_type != 'sd' and shared.sd_model_type != 'sdxl': + shared.log.error(f'LEdits: invalid model type: {shared.sd_model_type}') + return None + + orig_pipeline = shared.sd_model + orig_offload = shared.opts.diffusers_model_cpu_offload + orig_prompt_attention = shared.opts.prompt_attention + shared.opts.data['diffusers_model_cpu_offload'] = False + shared.opts.data['prompt_attention'] = 'Fixed attention' + # shared.sd_model.maybe_free_model_hooks() # ledits is not compatible with offloading + # shared.sd_model.has_accelerate = False + sd_models.move_model(shared.sd_model, devices.device, force=True) + if shared.sd_model_type == 'sd': + shared.sd_model = sd_models.switch_pipe(diffusers.LEditsPPPipelineStableDiffusion, shared.sd_model) + elif shared.sd_model_type == 'sdxl': + shared.sd_model = sd_models.switch_pipe(diffusers.LEditsPPPipelineStableDiffusionXL, shared.sd_model) + if str(devices.dtype) == 'torch.float16': + shared.sd_model.vae.config.force_upcast = False # not compatible + + shared.sd_model.scheduler = diffusers.DPMSolverMultistepScheduler.from_config(shared.sd_model.scheduler.config, algorithm_type="sde-dpmsolver++", solver_order=2) # ledits is very picky + p.sampler_name = 'Default' + p.init() # run init early to take care of resizing + + invert_args = { + 'image': p.init_images[0], + 'source_prompt': p.prompt, + 'source_guidance_scale': p.cfg_scale, + 'num_inversion_steps': p.steps, + 'skip': 1.0 - p.denoising_strength, # invert start + 'generator': None, # not supported + } + shared.log.info(f'LEdits invert: {invert_args}') + _output = shared.sd_model.invert(**invert_args) + p.task_args = { + 'editing_prompt': [], + 'reverse_editing_direction': [], + 'edit_guidance_scale': [], + 'edit_threshold': [], + 'edit_warmup_steps': int(edit_start * p.steps), + 'edit_cooldown_steps': int((1.0 - edit_stop) * p.steps) if edit_stop < 1.0 else None, + 'use_intersect_mask': intersect_mask, # smoothing? + 'generator': None, + 'guidance_rescale': 0.0, # bug in pipeline if guidance rescale is enabled + } + if len(prompt1) > 0: + p.task_args['editing_prompt'].append(prompt1) + p.task_args['reverse_editing_direction'].append(False) + p.task_args['edit_guidance_scale'].append(10.0 * scale1) + p.task_args['edit_threshold'].append(threshold1) + if len(prompt2) > 0: + p.task_args['editing_prompt'].append(prompt2) + p.task_args['reverse_editing_direction'].append(True) + p.task_args['edit_guidance_scale'].append(10.0 * scale2) + p.task_args['edit_threshold'].append(threshold2) + + shared.log.info(f'LEdits: {p.task_args}') + processed = processing.process_images(p) + + # restore pipeline + shared.sd_model = orig_pipeline + shared.opts.data['prompt_attention'] = orig_prompt_attention + shared.opts.data['diffusers_model_cpu_offload'] = orig_offload + return processed diff --git a/scripts/stablevideodiffusion.py b/scripts/stablevideodiffusion.py index 80a20cf45..56a76189d 100644 --- a/scripts/stablevideodiffusion.py +++ b/scripts/stablevideodiffusion.py @@ -2,11 +2,18 @@ Additional params for StableVideoDiffusion """ +import os import torch import gradio as gr -from modules import scripts, processing, shared, sd_models, images +from modules import scripts, processing, shared, sd_models, images, modelloader +models = { + "SVD 1.0": "stabilityai/stable-video-diffusion-img2vid", + "SVD XT 1.0": "stabilityai/stable-video-diffusion-img2vid-xt", + "SVD XT 1.1": "stabilityai/stable-video-diffusion-img2vid-xt-1-1", +} + class Script(scripts.Script): def title(self): return 'Stable Video Diffusion' @@ -26,6 +33,8 @@ def video_type_change(video_type): with gr.Row(): gr.HTML('  Stable Video Diffusion
') + with gr.Row(): + model = gr.Dropdown(label='Model', choices=list(models), value=list(models)[0]) with gr.Row(): num_frames = gr.Slider(label='Frames', minimum=1, maximum=50, step=1, value=14) min_guidance_scale = gr.Slider(label='Min guidance', minimum=0.0, maximum=10.0, step=0.1, value=1.0) @@ -44,43 +53,54 @@ def video_type_change(video_type): mp4_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False) mp4_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False) video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, gif_loop, mp4_pad, mp4_interpolate]) - return [num_frames, override_resolution, min_guidance_scale, max_guidance_scale, decode_chunk_size, motion_bucket_id, noise_aug_strength, video_type, duration, gif_loop, mp4_pad, mp4_interpolate] + return [model, num_frames, override_resolution, min_guidance_scale, max_guidance_scale, decode_chunk_size, motion_bucket_id, noise_aug_strength, video_type, duration, gif_loop, mp4_pad, mp4_interpolate] - def run(self, p: processing.StableDiffusionProcessing, num_frames, override_resolution, min_guidance_scale, max_guidance_scale, decode_chunk_size, motion_bucket_id, noise_aug_strength, video_type, duration, gif_loop, mp4_pad, mp4_interpolate): # pylint: disable=arguments-differ, unused-argument - c = shared.sd_model.__class__.__name__ if shared.sd_model is not None else '' - if c != 'StableVideoDiffusionPipeline' and c != 'TextToVideoSDPipeline': - shared.log.error(f'StableVideo: model selected={c} required=StableVideoDiffusion') + def run(self, p: processing.StableDiffusionProcessing, model, num_frames, override_resolution, min_guidance_scale, max_guidance_scale, decode_chunk_size, motion_bucket_id, noise_aug_strength, video_type, duration, gif_loop, mp4_pad, mp4_interpolate): # pylint: disable=arguments-differ, unused-argument + image = getattr(p, 'init_images', None) + if image is None or len(image) == 0: + shared.log.error('SVD: no init_images') return None - if hasattr(p, 'init_images') and len(p.init_images) > 0: - if override_resolution: - p.width = 1024 - p.height = 576 - p.task_args['image'] = images.resize_image(resize_mode=2, im=p.init_images[0], width=p.width, height=p.height, upscaler_name=None, output_type='pil') - else: - p.task_args['image'] = p.init_images[0] - p.ops.append('stablevideo') - p.do_not_save_grid = True - if c == 'StableVideoDiffusionPipeline': - p.sampler_name = 'Default' # svd does not support non-default sampler - p.task_args['output_type'] = 'np' - else: - p.task_args['output_type'] = 'pil' - p.task_args['generator'] = torch.manual_seed(p.seed) # svd does not support gpu based generator - p.task_args['width'] = p.width - p.task_args['height'] = p.height - p.task_args['num_frames'] = num_frames - p.task_args['decode_chunk_size'] = decode_chunk_size - p.task_args['motion_bucket_id'] = round(255 * motion_bucket_id) - p.task_args['noise_aug_strength'] = noise_aug_strength - p.task_args['num_inference_steps'] = p.steps - p.task_args['min_guidance_scale'] = min_guidance_scale - p.task_args['max_guidance_scale'] = max_guidance_scale - shared.log.debug(f'StableVideo: args={p.task_args}') - shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE) - processed = processing.process_images(p) - if video_type != 'None': - images.save_video(p, filename=None, images=processed.images, video_type=video_type, duration=duration, loop=gif_loop, pad=mp4_pad, interpolate=mp4_interpolate) - return processed else: - shared.log.error('StableVideo: no init_images') - return None + image = image[0] + + # load/download model on-demand + model_path = models[model] + model_name = os.path.basename(model_path) + has_checkpoint = sd_models.get_closet_checkpoint_match(model_path) + if has_checkpoint is None: + shared.log.error(f'SVD: no checkpoint for {model_name}') + modelloader.load_reference(model_path, variant='fp16') + c = shared.sd_model.__class__.__name__ + model_loaded = shared.sd_model.sd_checkpoint_info.model_name if shared.sd_model is not None else None + if model_name != model_loaded or c != 'StableVideoDiffusionPipeline': + shared.opts.sd_model_checkpoint = model_path + sd_models.reload_model_weights() + + # set params + if override_resolution: + p.width = 1024 + p.height = 576 + image = images.resize_image(resize_mode=2, im=image, width=p.width, height=p.height, upscaler_name=None, output_type='pil') + p.ops.append('svd') + p.do_not_save_grid = True + p.init_images = [image] + p.sampler_name = 'Default' # svd does not support non-default sampler + p.task_args['output_type'] = 'pil' + p.task_args['generator'] = torch.manual_seed(p.seed) # svd does not support gpu based generator + p.task_args['image'] = image + p.task_args['width'] = p.width + p.task_args['height'] = p.height + p.task_args['num_frames'] = num_frames + p.task_args['decode_chunk_size'] = decode_chunk_size + p.task_args['motion_bucket_id'] = round(255 * motion_bucket_id) + p.task_args['noise_aug_strength'] = noise_aug_strength + p.task_args['num_inference_steps'] = p.steps + p.task_args['min_guidance_scale'] = min_guidance_scale + p.task_args['max_guidance_scale'] = max_guidance_scale + shared.log.debug(f'SVD: args={p.task_args}') + + # run processing + processed = processing.process_images(p) + if video_type != 'None': + images.save_video(p, filename=None, images=processed.images, video_type=video_type, duration=duration, loop=gif_loop, pad=mp4_pad, interpolate=mp4_interpolate) + return processed diff --git a/wiki b/wiki index 5c52cbb73..d855aabbc 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit 5c52cbb7301c3e008a9dfd76702f9321ae7b3a34 +Subproject commit d855aabbc4d628a417905f5ae0030c34528809b4