From afc6abc47862df3a75cba4ed0b22cca3fac4a8c9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 4 Jan 2026 22:31:33 +0000 Subject: [PATCH 1/7] Initial plan From 7d22505c1a494c0121754e3e853d84815b790b42 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 4 Jan 2026 22:47:26 +0000 Subject: [PATCH 2/7] Fix Z-Image VAE encode/decode to request working memory Co-authored-by: lstein <111189+lstein@users.noreply.github.com> --- .../invocations/z_image_image_to_latents.py | 18 +++++++++++++++++- .../invocations/z_image_latents_to_image.py | 17 ++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/invokeai/app/invocations/z_image_image_to_latents.py b/invokeai/app/invocations/z_image_image_to_latents.py index d74ce073b69..da8bf028132 100644 --- a/invokeai/app/invocations/z_image_image_to_latents.py +++ b/invokeai/app/invocations/z_image_image_to_latents.py @@ -20,6 +20,7 @@ from invokeai.backend.model_manager.load.load_base import LoadedModel from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor from invokeai.backend.util.devices import TorchDevice +from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux, estimate_vae_working_memory_sd3 # Z-Image can use either the Diffusers AutoencoderKL or the FLUX AutoEncoder ZImageVAE = Union[AutoencoderKL, FluxAutoEncoder] @@ -47,7 +48,22 @@ def vae_encode(vae_info: LoadedModel, image_tensor: torch.Tensor) -> torch.Tenso "Ensure you are using a compatible VAE model." ) - with vae_info.model_on_device() as (_, vae): + # Estimate working memory needed for VAE encode + is_flux_vae = isinstance(vae_info.model, FluxAutoEncoder) + if is_flux_vae: + estimated_working_memory = estimate_vae_working_memory_flux( + operation="encode", + image_tensor=image_tensor, + vae=vae_info.model, + ) + else: + estimated_working_memory = estimate_vae_working_memory_sd3( + operation="encode", + image_tensor=image_tensor, + vae=vae_info.model, + ) + + with vae_info.model_on_device(working_mem_bytes=estimated_working_memory) as (_, vae): if not isinstance(vae, (AutoencoderKL, FluxAutoEncoder)): raise TypeError( f"Expected AutoencoderKL or FluxAutoEncoder, got {type(vae).__name__}. " diff --git a/invokeai/app/invocations/z_image_latents_to_image.py b/invokeai/app/invocations/z_image_latents_to_image.py index d29ec0998a4..094bbf7c880 100644 --- a/invokeai/app/invocations/z_image_latents_to_image.py +++ b/invokeai/app/invocations/z_image_latents_to_image.py @@ -21,6 +21,7 @@ from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEncoder from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt from invokeai.backend.util.devices import TorchDevice +from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux, estimate_vae_working_memory_sd3 # Z-Image can use either the Diffusers AutoencoderKL or the FLUX AutoEncoder ZImageVAE = Union[AutoencoderKL, FluxAutoEncoder] @@ -53,12 +54,26 @@ def invoke(self, context: InvocationContext) -> ImageOutput: is_flux_vae = isinstance(vae_info.model, FluxAutoEncoder) + # Estimate working memory needed for VAE decode + if is_flux_vae: + estimated_working_memory = estimate_vae_working_memory_flux( + operation="decode", + image_tensor=latents, + vae=vae_info.model, + ) + else: + estimated_working_memory = estimate_vae_working_memory_sd3( + operation="decode", + image_tensor=latents, + vae=vae_info.model, + ) + # FLUX VAE doesn't support seamless, so only apply for AutoencoderKL seamless_context = ( nullcontext() if is_flux_vae else SeamlessExt.static_patch_model(vae_info.model, self.vae.seamless_axes) ) - with seamless_context, vae_info.model_on_device() as (_, vae): + with seamless_context, vae_info.model_on_device(working_mem_bytes=estimated_working_memory) as (_, vae): context.util.signal_progress("Running VAE") if not isinstance(vae, (AutoencoderKL, FluxAutoEncoder)): raise TypeError( From 462182fe36e232747494a6fcd80259193e19842f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 4 Jan 2026 22:51:45 +0000 Subject: [PATCH 3/7] Add test for Z-Image working memory estimation Co-authored-by: lstein <111189+lstein@users.noreply.github.com> --- .../test_z_image_working_memory.py | 139 ++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 tests/app/invocations/test_z_image_working_memory.py diff --git a/tests/app/invocations/test_z_image_working_memory.py b/tests/app/invocations/test_z_image_working_memory.py new file mode 100644 index 00000000000..4173460417b --- /dev/null +++ b/tests/app/invocations/test_z_image_working_memory.py @@ -0,0 +1,139 @@ +"""Test that Z-Image VAE invocations properly estimate and request working memory.""" + +from unittest.mock import MagicMock, patch + +import pytest +import torch +from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL + +from invokeai.app.invocations.z_image_image_to_latents import ZImageImageToLatentsInvocation +from invokeai.app.invocations.z_image_latents_to_image import ZImageLatentsToImageInvocation +from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEncoder + + +class TestZImageWorkingMemory: + """Test that Z-Image VAE invocations request working memory.""" + + @pytest.mark.parametrize("vae_type", [AutoencoderKL, FluxAutoEncoder]) + def test_z_image_latents_to_image_requests_working_memory(self, vae_type): + """Test that ZImageLatentsToImageInvocation estimates and requests working memory.""" + # Create mock VAE + mock_vae = MagicMock(spec=vae_type) + mock_vae.config.scaling_factor = 1.0 + mock_vae.config.shift_factor = None + + # Create mock parameter for dtype detection + mock_param = torch.zeros(1) + mock_vae.parameters.return_value = iter([mock_param]) + + # Create mock vae_info + mock_vae_info = MagicMock() + mock_vae_info.model = mock_vae + + # Create mock context manager return value + mock_cm = MagicMock() + mock_cm.__enter__ = MagicMock(return_value=(None, mock_vae)) + mock_cm.__exit__ = MagicMock(return_value=None) + mock_vae_info.model_on_device = MagicMock(return_value=mock_cm) + + # Mock the context + mock_context = MagicMock() + mock_context.models.load.return_value = mock_vae_info + + # Mock latents + mock_latents = torch.zeros(1, 16, 64, 64) + mock_context.tensors.load.return_value = mock_latents + + # Mock the appropriate estimation function + if vae_type == FluxAutoEncoder: + estimation_path = "invokeai.app.invocations.z_image_latents_to_image.estimate_vae_working_memory_flux" + else: + estimation_path = "invokeai.app.invocations.z_image_latents_to_image.estimate_vae_working_memory_sd3" + + with patch(estimation_path) as mock_estimate: + expected_memory = 1024 * 1024 * 500 # 500MB + mock_estimate.return_value = expected_memory + + # Mock VAE decode to avoid actual computation + if vae_type == FluxAutoEncoder: + mock_vae.decode.return_value = torch.zeros(1, 3, 512, 512) + else: + mock_vae.decode.return_value = (torch.zeros(1, 3, 512, 512),) + + # Mock image save + mock_image_dto = MagicMock() + mock_context.images.save.return_value = mock_image_dto + + # Create and invoke + invocation = ZImageLatentsToImageInvocation( + latents=MagicMock(latents_name="test_latents"), + vae=MagicMock(vae=MagicMock(), seamless_axes=MagicMock()), + ) + + try: + invocation.invoke(mock_context) + except Exception: + # We expect some errors due to mocking, but we just want to verify the working memory was requested + pass + + # Verify that working memory estimation was called + mock_estimate.assert_called_once() + # Verify that model_on_device was called with the estimated working memory + mock_vae_info.model_on_device.assert_called_once_with(working_mem_bytes=expected_memory) + + @pytest.mark.parametrize("vae_type", [AutoencoderKL, FluxAutoEncoder]) + def test_z_image_image_to_latents_requests_working_memory(self, vae_type): + """Test that ZImageImageToLatentsInvocation estimates and requests working memory.""" + # Create mock VAE + mock_vae = MagicMock(spec=vae_type) + mock_vae.config.scaling_factor = 1.0 + mock_vae.config.shift_factor = None + + # Create mock parameter for dtype detection + mock_param = torch.zeros(1) + mock_vae.parameters.return_value = iter([mock_param]) + + # Create mock vae_info + mock_vae_info = MagicMock() + mock_vae_info.model = mock_vae + + # Create mock context manager return value + mock_cm = MagicMock() + mock_cm.__enter__ = MagicMock(return_value=(None, mock_vae)) + mock_cm.__exit__ = MagicMock(return_value=None) + mock_vae_info.model_on_device = MagicMock(return_value=mock_cm) + + # Mock image tensor + mock_image_tensor = torch.zeros(1, 3, 512, 512) + + # Mock the appropriate estimation function + if vae_type == FluxAutoEncoder: + estimation_path = "invokeai.app.invocations.z_image_image_to_latents.estimate_vae_working_memory_flux" + else: + estimation_path = "invokeai.app.invocations.z_image_image_to_latents.estimate_vae_working_memory_sd3" + + with patch(estimation_path) as mock_estimate: + expected_memory = 1024 * 1024 * 250 # 250MB + mock_estimate.return_value = expected_memory + + # Mock VAE encode to avoid actual computation + if vae_type == FluxAutoEncoder: + mock_vae.encode.return_value = torch.zeros(1, 16, 64, 64) + else: + mock_latent_dist = MagicMock() + mock_latent_dist.sample.return_value = torch.zeros(1, 16, 64, 64) + mock_encode_result = MagicMock() + mock_encode_result.latent_dist = mock_latent_dist + mock_vae.encode.return_value = mock_encode_result + + # Call the static method directly + try: + ZImageImageToLatentsInvocation.vae_encode(mock_vae_info, mock_image_tensor) + except Exception: + # We expect some errors due to mocking, but we just want to verify the working memory was requested + pass + + # Verify that working memory estimation was called + mock_estimate.assert_called_once() + # Verify that model_on_device was called with the estimated working memory + mock_vae_info.model_on_device.assert_called_once_with(working_mem_bytes=expected_memory) From e13401bd647b0374e40b5e242be648383fc5cb1a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 5 Jan 2026 00:54:00 +0000 Subject: [PATCH 4/7] Fix unit test: only set config attribute for AutoencoderKL, not FluxAutoEncoder Co-authored-by: lstein <111189+lstein@users.noreply.github.com> --- .../app/invocations/test_z_image_working_memory.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/app/invocations/test_z_image_working_memory.py b/tests/app/invocations/test_z_image_working_memory.py index 4173460417b..0d65a5a9abd 100644 --- a/tests/app/invocations/test_z_image_working_memory.py +++ b/tests/app/invocations/test_z_image_working_memory.py @@ -19,8 +19,11 @@ def test_z_image_latents_to_image_requests_working_memory(self, vae_type): """Test that ZImageLatentsToImageInvocation estimates and requests working memory.""" # Create mock VAE mock_vae = MagicMock(spec=vae_type) - mock_vae.config.scaling_factor = 1.0 - mock_vae.config.shift_factor = None + + # Only set config for AutoencoderKL (FluxAutoEncoder doesn't use config) + if vae_type == AutoencoderKL: + mock_vae.config.scaling_factor = 1.0 + mock_vae.config.shift_factor = None # Create mock parameter for dtype detection mock_param = torch.zeros(1) @@ -86,8 +89,11 @@ def test_z_image_image_to_latents_requests_working_memory(self, vae_type): """Test that ZImageImageToLatentsInvocation estimates and requests working memory.""" # Create mock VAE mock_vae = MagicMock(spec=vae_type) - mock_vae.config.scaling_factor = 1.0 - mock_vae.config.shift_factor = None + + # Only set config for AutoencoderKL (FluxAutoEncoder doesn't use config) + if vae_type == AutoencoderKL: + mock_vae.config.scaling_factor = 1.0 + mock_vae.config.shift_factor = None # Create mock parameter for dtype detection mock_param = torch.zeros(1) From b9bc9159c87e857c23886a1948cc4ba835fde40e Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sun, 4 Jan 2026 19:59:11 -0500 Subject: [PATCH 5/7] style: fix ruff errors --- tests/app/invocations/test_z_image_working_memory.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/app/invocations/test_z_image_working_memory.py b/tests/app/invocations/test_z_image_working_memory.py index 0d65a5a9abd..1607ccf2c9a 100644 --- a/tests/app/invocations/test_z_image_working_memory.py +++ b/tests/app/invocations/test_z_image_working_memory.py @@ -32,7 +32,7 @@ def test_z_image_latents_to_image_requests_working_memory(self, vae_type): # Create mock vae_info mock_vae_info = MagicMock() mock_vae_info.model = mock_vae - + # Create mock context manager return value mock_cm = MagicMock() mock_cm.__enter__ = MagicMock(return_value=(None, mock_vae)) @@ -42,7 +42,7 @@ def test_z_image_latents_to_image_requests_working_memory(self, vae_type): # Mock the context mock_context = MagicMock() mock_context.models.load.return_value = mock_vae_info - + # Mock latents mock_latents = torch.zeros(1, 16, 64, 64) mock_context.tensors.load.return_value = mock_latents @@ -102,7 +102,7 @@ def test_z_image_image_to_latents_requests_working_memory(self, vae_type): # Create mock vae_info mock_vae_info = MagicMock() mock_vae_info.model = mock_vae - + # Create mock context manager return value mock_cm = MagicMock() mock_cm.__enter__ = MagicMock(return_value=(None, mock_vae)) From 3205bf6606d0351407de5e432a4a845e77774502 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 5 Jan 2026 01:05:15 +0000 Subject: [PATCH 6/7] Fix test: use model_construct to bypass Pydantic validation for mock fields Co-authored-by: lstein <111189+lstein@users.noreply.github.com> --- tests/app/invocations/test_z_image_working_memory.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/app/invocations/test_z_image_working_memory.py b/tests/app/invocations/test_z_image_working_memory.py index 1607ccf2c9a..40a0a4ce925 100644 --- a/tests/app/invocations/test_z_image_working_memory.py +++ b/tests/app/invocations/test_z_image_working_memory.py @@ -67,10 +67,12 @@ def test_z_image_latents_to_image_requests_working_memory(self, vae_type): mock_image_dto = MagicMock() mock_context.images.save.return_value = mock_image_dto - # Create and invoke - invocation = ZImageLatentsToImageInvocation( + # Import and create invocation using model_construct to bypass validation + from invokeai.app.invocations.z_image_latents_to_image import ZImageLatentsToImageInvocation + + invocation = ZImageLatentsToImageInvocation.model_construct( latents=MagicMock(latents_name="test_latents"), - vae=MagicMock(vae=MagicMock(), seamless_axes=MagicMock()), + vae=MagicMock(vae=MagicMock(), seamless_axes=["x", "y"]), ) try: From 47957c0a0604cf62ef467f8aa1d1a3a0c4e89908 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sun, 4 Jan 2026 22:13:55 -0500 Subject: [PATCH 7/7] chore(ruff): fix ruff errors --- tests/app/invocations/test_z_image_working_memory.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/app/invocations/test_z_image_working_memory.py b/tests/app/invocations/test_z_image_working_memory.py index 40a0a4ce925..2652a4d05ab 100644 --- a/tests/app/invocations/test_z_image_working_memory.py +++ b/tests/app/invocations/test_z_image_working_memory.py @@ -7,7 +7,6 @@ from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL from invokeai.app.invocations.z_image_image_to_latents import ZImageImageToLatentsInvocation -from invokeai.app.invocations.z_image_latents_to_image import ZImageLatentsToImageInvocation from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEncoder @@ -19,7 +18,7 @@ def test_z_image_latents_to_image_requests_working_memory(self, vae_type): """Test that ZImageLatentsToImageInvocation estimates and requests working memory.""" # Create mock VAE mock_vae = MagicMock(spec=vae_type) - + # Only set config for AutoencoderKL (FluxAutoEncoder doesn't use config) if vae_type == AutoencoderKL: mock_vae.config.scaling_factor = 1.0 @@ -69,7 +68,7 @@ def test_z_image_latents_to_image_requests_working_memory(self, vae_type): # Import and create invocation using model_construct to bypass validation from invokeai.app.invocations.z_image_latents_to_image import ZImageLatentsToImageInvocation - + invocation = ZImageLatentsToImageInvocation.model_construct( latents=MagicMock(latents_name="test_latents"), vae=MagicMock(vae=MagicMock(), seamless_axes=["x", "y"]), @@ -91,7 +90,7 @@ def test_z_image_image_to_latents_requests_working_memory(self, vae_type): """Test that ZImageImageToLatentsInvocation estimates and requests working memory.""" # Create mock VAE mock_vae = MagicMock(spec=vae_type) - + # Only set config for AutoencoderKL (FluxAutoEncoder doesn't use config) if vae_type == AutoencoderKL: mock_vae.config.scaling_factor = 1.0