vladmandic · vladmandic · Jun 23, 2024 · Jun 13, 2024 · Jun 14, 2024 · Jun 14, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,96 @@
 # Change Log for SD.Next
 
+## Update for 2024-06-23
+
+### Highlights for 2024-06-23
+
+Following zero-day **SD3** release, a 10 days later here's a refresh with 10+ improvements  
+including full prompt attention, support for compressed weights, additional text-encoder quantization modes.  
+
+But there's more than SD3:  
+- support for quantized **T5** text encoder *FP16/FP8/FP4/INT8* in all models that use T5: SD3, PixArt-Σ, etc.  
+- support for **PixArt-Sigma** in small/medium/large variants  
+- support for **HunyuanDiT 1.1**  
+- additional **NNCF weights compression** support: SD3, PixArt, ControlNet, Lora  
+- integration of **MS Florence** VLM/VQA *Base* and *Large* models  
+- (finally) new release of **Torch-DirectML**  
+- additional efficiencies for users with low VRAM GPUs  
+- over 20 overall fixes  
+
+### Model Improvements
+
+- **SD3**: enable tiny-VAE (TAESD) preview and non-full quality mode  
+- SD3: enable base LoRA support  
+- SD3: add support for FP4 quantized T5 text encoder  
+  simply select in *settings -> model -> text encoder*  
+  *note* for SD3 with T5, set SD.Next to use FP16 precision, not BF16 precision  
+- SD3: add support for INT8 quantized T5 text encoder, thanks @Disty0!  
+- SD3: enable cpu-offloading for T5 text encoder, thanks @Disty0!  
+- SD3: simplified loading of model in single-file safetensors format  
+  model load can now be performed fully offline  
+- SD3: full support for prompt parsing and attention, thanks @AI-Casanova!
+- SD3: ability to target different prompts to each of text-encoders, thanks @AI-Casanova!  
+  example: `dog TE2: cat TE3: bird`
+- SD3: add support for sampler shift for Euler FlowMatch  
+  see *settings -> samplers*, also available as param in xyz grid  
+  higher shift means model will spend more time on structure and less on details  
+- SD3: add support for selecting T5 text encoder variant in XYZ grid
+- **Pixart-Σ**: Add *small* (512px) and *large* (2k) variations, in addition to existing *medium* (1k)  
+- Pixart-Σ: Add support for 4/8bit quantized t5 text encoder  
+  *note* by default pixart-Σ uses full fp16 t5 encoder with large memory footprint  
+  simply select in *settings -> model -> text encoder* before or after model load  
+- **HunyuanDiT**: support for model version 1.1  
+- **MS Florence**: integration of Microsoft Florence VLM/VQA Base and Large models  
+  simply select in *process -> visual query*!
+
+### General Improvements
+
+- support FP4 quantized T5 text encoder, in addtion to existing FP8 and FP16
+- support for T5 text-encoder loader in **all** models that use T5  
+  *example*: load FP4 or FP8 quantized T5 text-encoder into PixArt Sigma!
+- support for `torch-directml` **0.2.2**, thanks @lshqqytiger!  
+  *note*: new directml is finally based on modern `torch` 2.3.1!  
+- xyz grid: add support for LoRA selector
+- vae load: store original vae so it can be restored when set to none
+- extra networks: info display now contains link to source url if model if its known  
+  works for civitai and huggingface models  
+- force gc for lowvram users and improve gc logging
+- improved google.colab support
+- css tweaks for standardui
+- css tweaks for modernui
+- additional torch gc checks, thanks @Disty0!
+
+**Improvements: NNCF**, thanks @Disty0!  
+- SD3 and PixArt support  
+- moved the first compression step to CPU  
+- sequential cpu offload (lowvram) support  
+- Lora support without reloading the model  
+- ControlNet compression support  
+
+### Fixes
+
+- fix unsaturated outputs, force apply vae config on model load  
+- fix hidiffusion handling of non-square aspect ratios, thanks @ShenZhang-Shin!
+- fix control second pass resize  
+- fix hunyuandit set attention processor
+- fix civitai download without name
+- fix compatibility with latest adetailer
+- fix invalid sampler warning
+- fix starting from non git repo
+- fix control api negative prompt handling
+- fix saving style without name provided
+- fix t2i-color adapter
+- fix sdxl "has been incorrectly initialized"
+- fix api face-hires
+- fix api ip-adapter
+- fix memory exceptions with ROCm, thanks @Disty0!
+- fix face-hires with lowvram, thanks @Disty0!
+- fix pag incorrectly resetting pipeline
+- cleanup image metadata
+- restructure api examples: `cli/api-*`
+- handle theme fallback when invalid theme is specified
+- remove obsolete training code leftovers
+
 ## Update for 2024-06-13
 
 ### Highlights for 2024-06-13

diff --git a/TODO.md b/TODO.md
@@ -11,6 +11,7 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
 - diffusers public callbacks  
 - include reference styles
 - lora: sc lora, dora, etc
+- sd3 controlnet: <https://github.com/huggingface/diffusers/pull/8566>
 
 ## Experimental
 

diff --git a/cli/simple-control.py → cli/api-control.py b/cli/simple-control.py → cli/api-control.py
@@ -132,7 +132,7 @@ def get_image(encoded, output):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description = 'simple-img2img')
+    parser = argparse.ArgumentParser(description = 'api-img2img')
     parser.add_argument('--init', required=False, default=None, help='init image')
     parser.add_argument('--input', required=False, default=None, help='input image')
     parser.add_argument('--mask', required=False, help='mask image')

diff --git a/cli/api-faceid.py b/cli/api-faceid.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python
+import os
+import io
+import time
+import base64
+import logging
+import argparse
+import requests
+import urllib3
+from PIL import Image
+
+sd_url = os.environ.get('SDAPI_URL', "http://127.0.0.1:7860")
+sd_username = os.environ.get('SDAPI_USR', None)
+sd_password = os.environ.get('SDAPI_PWD', None)
+
+logging.basicConfig(level = logging.INFO, format = '%(asctime)s %(levelname)s: %(message)s')
+log = logging.getLogger(__name__)
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+options = {
+    "save_images": False,
+    "send_images": True,
+}
+
+
+def auth():
+    if sd_username is not None and sd_password is not None:
+        return requests.auth.HTTPBasicAuth(sd_username, sd_password)
+    return None
+
+
+def post(endpoint: str, dct: dict = None):
+    req = requests.post(f'{sd_url}{endpoint}', json = dct, timeout=300, verify=False, auth=auth())
+    if req.status_code != 200:
+        return { 'error': req.status_code, 'reason': req.reason, 'url': req.url }
+    else:
+        return req.json()
+
+
+def encode(f):
+    image = Image.open(f)
+    if image.mode == 'RGBA':
+        image = image.convert('RGB')
+    with io.BytesIO() as stream:
+        image.save(stream, 'JPEG')
+        image.close()
+        values = stream.getvalue()
+        encoded = base64.b64encode(values).decode()
+        return encoded
+
+
+def generate(args): # pylint: disable=redefined-outer-name
+    t0 = time.time()
+    if args.model is not None:
+        post('/sdapi/v1/options', { 'sd_model_checkpoint': args.model })
+        post('/sdapi/v1/reload-checkpoint') # needed if running in api-only to trigger new model load
+    options['prompt'] = args.prompt
+    options['negative_prompt'] = args.negative
+    options['steps'] = int(args.steps)
+    options['seed'] = int(args.seed)
+    options['sampler_name'] = args.sampler
+    options['width'] = args.width
+    options['height'] = args.height
+    options['face'] = {
+        'mode': 'FaceID',
+        'ip_model': 'FaceID Base',
+        'source_images': [encode(args.face)],
+    }
+    data = post('/sdapi/v1/txt2img', options)
+    t1 = time.time()
+    if 'images' in data:
+        for i in range(len(data['images'])):
+            b64 = data['images'][i].split(',',1)[0]
+            info = data['info']
+            image = Image.open(io.BytesIO(base64.b64decode(b64)))
+            log.info(f'received image: size={image.size} time={t1-t0:.2f} info="{info}"')
+            if args.output:
+                image.save(args.output)
+                log.info(f'image saved: size={image.size} filename={args.output}')
+
+    else:
+        log.warning(f'no images received: {data}')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description = 'api-faceid')
+    parser.add_argument('--width', required=False, default=512, help='image width')
+    parser.add_argument('--height', required=False, default=512, help='image height')
+    parser.add_argument('--face', required=False, help='face image')
+    parser.add_argument('--prompt', required=False, default='', help='prompt text')
+    parser.add_argument('--negative', required=False, default='', help='negative prompt text')
+    parser.add_argument('--steps', required=False, default=20, help='number of steps')
+    parser.add_argument('--seed', required=False, default=-1, help='initial seed')
+    parser.add_argument('--sampler', required=False, default='Euler a', help='sampler name')
+    parser.add_argument('--output', required=False, default=None, help='output image file')
+    parser.add_argument('--model', required=False, help='model name')
+    args = parser.parse_args()
+    log.info(f'img2img: {args}')
+    generate(args)
+
+"""
+request.face.mode,
+request.face.source_images,
+request.face.ip_model,
+request.face.ip_override_sampler,
+request.face.ip_cache_model,
+request.face.ip_strength,
+request.face.ip_structure,
+request.face.id_strength,
+request.face.id_conditioning,
+request.face.id_cache,
+request.face.pm_trigger,
+request.face.pm_strength,
+request.face.pm_start,
+request.face.fs_cache
+"""
diff --git a/cli/simple-img2img.py → cli/api-img2img.py b/cli/simple-img2img.py → cli/api-img2img.py
@@ -83,7 +83,7 @@ def generate(args): # pylint: disable=redefined-outer-name
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description = 'simple-img2img')
+    parser = argparse.ArgumentParser(description = 'api-img2img')
     parser.add_argument('--init', required=True, help='init image')
     parser.add_argument('--mask', required=False, help='mask image')
     parser.add_argument('--prompt', required=False, default='', help='prompt text')

diff --git a/cli/simple-info.py → cli/api-info.py b/cli/simple-info.py → cli/api-info.py
@@ -50,7 +50,7 @@ def info(args): # pylint: disable=redefined-outer-name
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description = 'simple-info')
+    parser = argparse.ArgumentParser(description = 'api-info')
     parser.add_argument('--input', required=True, help='input image')
     args = parser.parse_args()
     log.info(f'info: {args}')

diff --git a/cli/api-json.py b/cli/api-json.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+
+# curl -vX POST http://localhost:7860/sdapi/v1/txt2img --header "Content-Type: application/json" -d @3261.json
+import os
+import json
+import logging
+import argparse
+import requests
+import urllib3
+
+
+sd_url = os.environ.get('SDAPI_URL', "http://127.0.0.1:7860")
+sd_username = os.environ.get('SDAPI_USR', None)
+sd_password = os.environ.get('SDAPI_PWD', None)
+options = {
+    "save_images": True,
+    "send_images": True,
+}
+
+logging.basicConfig(level = logging.INFO, format = '%(asctime)s %(levelname)s: %(message)s')
+log = logging.getLogger(__name__)
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+
+def auth():
+    if sd_username is not None and sd_password is not None:
+        return requests.auth.HTTPBasicAuth(sd_username, sd_password)
+    return None
+
+
+def post(endpoint: str, payload: dict = None):
+    if 'sdapi' not in endpoint:
+        endpoint = f'sdapi/v1/{endpoint}'
+    if 'http' not in endpoint:
+        endpoint = f'{sd_url}/{endpoint}'
+    req = requests.post(endpoint, json = payload, timeout=300, verify=False, auth=auth())
+    return { 'error': req.status_code, 'reason': req.reason, 'url': req.url } if req.status_code != 200 else req.json()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description = 'api-txt2img')
+    parser.add_argument('endpoint', nargs=1, help='endpoint')
+    parser.add_argument('json', nargs=1, help='json data or file')
+    args = parser.parse_args()
+    log.info(f'api-json: {args}')
+    if os.path.isfile(args.json[0]):
+        with open(args.json[0], 'r', encoding='ascii') as f:
+            dct = json.load(f) # TODO fails with b64 encoded images inside json due to string encoding
+    else:
+        dct = json.loads(args.json[0])
+    res = post(endpoint=args.endpoint[0], payload=dct)
+    print(res)
diff --git a/cli/simple-mask.py → cli/api-mask.py b/cli/simple-mask.py → cli/api-mask.py
@@ -73,7 +73,7 @@ def info(args): # pylint: disable=redefined-outer-name
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description = 'simple-info')
+    parser = argparse.ArgumentParser(description = 'api-mask')
     parser.add_argument('--input', required=True, help='input image')
     parser.add_argument('--mask', required=False, help='input mask')
     parser.add_argument('--type', required=False, help='output mask type')

diff --git a/cli/simple-preprocess.py → cli/api-preprocess.py b/cli/simple-preprocess.py → cli/api-preprocess.py
@@ -67,7 +67,7 @@ def info(args): # pylint: disable=redefined-outer-name
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description = 'simple-info')
+    parser = argparse.ArgumentParser(description = 'api-preprocess')
     parser.add_argument('--input', required=True, help='input image')
     parser.add_argument('--model', required=True, help='preprocessing model')
     parser.add_argument('--output', required=False, help='output image')

diff --git a/cli/idle.py → cli/api-progress.py b/cli/idle.py → cli/api-progress.py
diff --git a/cli/simple-txt2img.js → cli/api-txt2img.js b/cli/simple-txt2img.js → cli/api-txt2img.js
diff --git a/cli/simple-txt2img.py → cli/api-txt2img.py b/cli/simple-txt2img.py → cli/api-txt2img.py
@@ -48,7 +48,10 @@ def generate(args): # pylint: disable=redefined-outer-name
     options['sampler_name'] = args.sampler
     options['width'] = int(args.width)
     options['height'] = int(args.height)
-    options['restore_faces'] = args.faces
+    if args.faces:
+        options['restore_faces'] = args.faces
+        options['denoising_strength'] = 0.5
+        options['hr_sampler_name'] = args.sampler
     data = post('/sdapi/v1/txt2img', options)
     t1 = time.time()
     if 'images' in data:
@@ -65,7 +68,7 @@ def generate(args): # pylint: disable=redefined-outer-name
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description = 'simple-txt2img')
+    parser = argparse.ArgumentParser(description = 'api-txt2img')
     parser.add_argument('--prompt', required=False, default='', help='prompt text')
     parser.add_argument('--negative', required=False, default='', help='negative prompt text')
     parser.add_argument('--width', required=False, default=512, help='image width')

diff --git a/cli/simple-upscale.py → cli/api-upscale.py b/cli/simple-upscale.py → cli/api-upscale.py
@@ -80,7 +80,7 @@ def upscale(args): # pylint: disable=redefined-outer-name
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description = 'simple-upscale')
+    parser = argparse.ArgumentParser(description = 'api-upscale')
     parser.add_argument('--input', required=True, help='input image')
     parser.add_argument('--output', required=True, help='output image')
     parser.add_argument('--upscaler', required=False, default='Nearest', help='upscaler name')

diff --git a/cli/simple-vqa.py → cli/api-vqa.py b/cli/simple-vqa.py → cli/api-vqa.py
@@ -55,7 +55,7 @@ def info(args): # pylint: disable=redefined-outer-name
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description = 'simple-info')
+    parser = argparse.ArgumentParser(description = 'api-vqa')
     parser.add_argument('--input', required=True, help='input image')
     parser.add_argument('--model', required=False, help='vqa model')
     parser.add_argument('--question', required=False, help='question')

diff --git a/cli/image-encode.py b/cli/image-encode.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+import io
+import os
+import sys
+import base64
+from PIL import Image
+from rich import print # pylint: disable=redefined-builtin
+
+
+def encode(file: str):
+    image = Image.open(file) if os.path.exists(file) else None
+    print(f'Input: file={file} image={image}')
+    if image is None:
+        return None
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+    with io.BytesIO() as stream:
+        image.save(stream, 'JPEG')
+        image.close()
+        values = stream.getvalue()
+        encoded = base64.b64encode(values).decode()
+        return encoded
+
+
+if __name__ == "__main__":
+    sys.argv.pop(0)
+    fn = sys.argv[0] if len(sys.argv) > 0 else ''
+    b64 = encode(fn)
+    print('=== BEGIN ===')
+    print(f'{b64}')
+    print('=== END ===')
+