From afc6abc47862df3a75cba4ed0b22cca3fac4a8c9 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 4 Jan 2026 22:31:33 +0000
Subject: [PATCH 1/7] Initial plan


From 7d22505c1a494c0121754e3e853d84815b790b42 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 4 Jan 2026 22:47:26 +0000
Subject: [PATCH 2/7] Fix Z-Image VAE encode/decode to request working memory

Co-authored-by: lstein <111189+lstein@users.noreply.github.com>
---
 .../invocations/z_image_image_to_latents.py    | 18 +++++++++++++++++-
 .../invocations/z_image_latents_to_image.py    | 17 ++++++++++++++++-
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/invokeai/app/invocations/z_image_image_to_latents.py b/invokeai/app/invocations/z_image_image_to_latents.py
index d74ce073b69..da8bf028132 100644
--- a/invokeai/app/invocations/z_image_image_to_latents.py
+++ b/invokeai/app/invocations/z_image_image_to_latents.py
@@ -20,6 +20,7 @@
 from invokeai.backend.model_manager.load.load_base import LoadedModel
 from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor
 from invokeai.backend.util.devices import TorchDevice
+from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux, estimate_vae_working_memory_sd3
 
 # Z-Image can use either the Diffusers AutoencoderKL or the FLUX AutoEncoder
 ZImageVAE = Union[AutoencoderKL, FluxAutoEncoder]
@@ -47,7 +48,22 @@ def vae_encode(vae_info: LoadedModel, image_tensor: torch.Tensor) -> torch.Tenso
                 "Ensure you are using a compatible VAE model."
             )
 
-        with vae_info.model_on_device() as (_, vae):
+        # Estimate working memory needed for VAE encode
+        is_flux_vae = isinstance(vae_info.model, FluxAutoEncoder)
+        if is_flux_vae:
+            estimated_working_memory = estimate_vae_working_memory_flux(
+                operation="encode",
+                image_tensor=image_tensor,
+                vae=vae_info.model,
+            )
+        else:
+            estimated_working_memory = estimate_vae_working_memory_sd3(
+                operation="encode",
+                image_tensor=image_tensor,
+                vae=vae_info.model,
+            )
+
+        with vae_info.model_on_device(working_mem_bytes=estimated_working_memory) as (_, vae):
             if not isinstance(vae, (AutoencoderKL, FluxAutoEncoder)):
                 raise TypeError(
                     f"Expected AutoencoderKL or FluxAutoEncoder, got {type(vae).__name__}. "
diff --git a/invokeai/app/invocations/z_image_latents_to_image.py b/invokeai/app/invocations/z_image_latents_to_image.py
index d29ec0998a4..094bbf7c880 100644
--- a/invokeai/app/invocations/z_image_latents_to_image.py
+++ b/invokeai/app/invocations/z_image_latents_to_image.py
@@ -21,6 +21,7 @@
 from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEncoder
 from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
 from invokeai.backend.util.devices import TorchDevice
+from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux, estimate_vae_working_memory_sd3
 
 # Z-Image can use either the Diffusers AutoencoderKL or the FLUX AutoEncoder
 ZImageVAE = Union[AutoencoderKL, FluxAutoEncoder]
@@ -53,12 +54,26 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
 
         is_flux_vae = isinstance(vae_info.model, FluxAutoEncoder)
 
+        # Estimate working memory needed for VAE decode
+        if is_flux_vae:
+            estimated_working_memory = estimate_vae_working_memory_flux(
+                operation="decode",
+                image_tensor=latents,
+                vae=vae_info.model,
+            )
+        else:
+            estimated_working_memory = estimate_vae_working_memory_sd3(
+                operation="decode",
+                image_tensor=latents,
+                vae=vae_info.model,
+            )
+
         # FLUX VAE doesn't support seamless, so only apply for AutoencoderKL
         seamless_context = (
             nullcontext() if is_flux_vae else SeamlessExt.static_patch_model(vae_info.model, self.vae.seamless_axes)
         )
 
-        with seamless_context, vae_info.model_on_device() as (_, vae):
+        with seamless_context, vae_info.model_on_device(working_mem_bytes=estimated_working_memory) as (_, vae):
             context.util.signal_progress("Running VAE")
             if not isinstance(vae, (AutoencoderKL, FluxAutoEncoder)):
                 raise TypeError(

From 462182fe36e232747494a6fcd80259193e19842f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 4 Jan 2026 22:51:45 +0000
Subject: [PATCH 3/7] Add test for Z-Image working memory estimation

Co-authored-by: lstein <111189+lstein@users.noreply.github.com>
---
 .../test_z_image_working_memory.py            | 139 ++++++++++++++++++
 1 file changed, 139 insertions(+)
 create mode 100644 tests/app/invocations/test_z_image_working_memory.py

diff --git a/tests/app/invocations/test_z_image_working_memory.py b/tests/app/invocations/test_z_image_working_memory.py
new file mode 100644
index 00000000000..4173460417b
--- /dev/null
+++ b/tests/app/invocations/test_z_image_working_memory.py
@@ -0,0 +1,139 @@
+"""Test that Z-Image VAE invocations properly estimate and request working memory."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+import torch
+from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
+
+from invokeai.app.invocations.z_image_image_to_latents import ZImageImageToLatentsInvocation
+from invokeai.app.invocations.z_image_latents_to_image import ZImageLatentsToImageInvocation
+from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEncoder
+
+
+class TestZImageWorkingMemory:
+    """Test that Z-Image VAE invocations request working memory."""
+
+    @pytest.mark.parametrize("vae_type", [AutoencoderKL, FluxAutoEncoder])
+    def test_z_image_latents_to_image_requests_working_memory(self, vae_type):
+        """Test that ZImageLatentsToImageInvocation estimates and requests working memory."""
+        # Create mock VAE
+        mock_vae = MagicMock(spec=vae_type)
+        mock_vae.config.scaling_factor = 1.0
+        mock_vae.config.shift_factor = None
+
+        # Create mock parameter for dtype detection
+        mock_param = torch.zeros(1)
+        mock_vae.parameters.return_value = iter([mock_param])
+
+        # Create mock vae_info
+        mock_vae_info = MagicMock()
+        mock_vae_info.model = mock_vae
+        
+        # Create mock context manager return value
+        mock_cm = MagicMock()
+        mock_cm.__enter__ = MagicMock(return_value=(None, mock_vae))
+        mock_cm.__exit__ = MagicMock(return_value=None)
+        mock_vae_info.model_on_device = MagicMock(return_value=mock_cm)
+
+        # Mock the context
+        mock_context = MagicMock()
+        mock_context.models.load.return_value = mock_vae_info
+        
+        # Mock latents
+        mock_latents = torch.zeros(1, 16, 64, 64)
+        mock_context.tensors.load.return_value = mock_latents
+
+        # Mock the appropriate estimation function
+        if vae_type == FluxAutoEncoder:
+            estimation_path = "invokeai.app.invocations.z_image_latents_to_image.estimate_vae_working_memory_flux"
+        else:
+            estimation_path = "invokeai.app.invocations.z_image_latents_to_image.estimate_vae_working_memory_sd3"
+
+        with patch(estimation_path) as mock_estimate:
+            expected_memory = 1024 * 1024 * 500  # 500MB
+            mock_estimate.return_value = expected_memory
+
+            # Mock VAE decode to avoid actual computation
+            if vae_type == FluxAutoEncoder:
+                mock_vae.decode.return_value = torch.zeros(1, 3, 512, 512)
+            else:
+                mock_vae.decode.return_value = (torch.zeros(1, 3, 512, 512),)
+
+            # Mock image save
+            mock_image_dto = MagicMock()
+            mock_context.images.save.return_value = mock_image_dto
+
+            # Create and invoke
+            invocation = ZImageLatentsToImageInvocation(
+                latents=MagicMock(latents_name="test_latents"),
+                vae=MagicMock(vae=MagicMock(), seamless_axes=MagicMock()),
+            )
+
+            try:
+                invocation.invoke(mock_context)
+            except Exception:
+                # We expect some errors due to mocking, but we just want to verify the working memory was requested
+                pass
+
+            # Verify that working memory estimation was called
+            mock_estimate.assert_called_once()
+            # Verify that model_on_device was called with the estimated working memory
+            mock_vae_info.model_on_device.assert_called_once_with(working_mem_bytes=expected_memory)
+
+    @pytest.mark.parametrize("vae_type", [AutoencoderKL, FluxAutoEncoder])
+    def test_z_image_image_to_latents_requests_working_memory(self, vae_type):
+        """Test that ZImageImageToLatentsInvocation estimates and requests working memory."""
+        # Create mock VAE
+        mock_vae = MagicMock(spec=vae_type)
+        mock_vae.config.scaling_factor = 1.0
+        mock_vae.config.shift_factor = None
+
+        # Create mock parameter for dtype detection
+        mock_param = torch.zeros(1)
+        mock_vae.parameters.return_value = iter([mock_param])
+
+        # Create mock vae_info
+        mock_vae_info = MagicMock()
+        mock_vae_info.model = mock_vae
+        
+        # Create mock context manager return value
+        mock_cm = MagicMock()
+        mock_cm.__enter__ = MagicMock(return_value=(None, mock_vae))
+        mock_cm.__exit__ = MagicMock(return_value=None)
+        mock_vae_info.model_on_device = MagicMock(return_value=mock_cm)
+
+        # Mock image tensor
+        mock_image_tensor = torch.zeros(1, 3, 512, 512)
+
+        # Mock the appropriate estimation function
+        if vae_type == FluxAutoEncoder:
+            estimation_path = "invokeai.app.invocations.z_image_image_to_latents.estimate_vae_working_memory_flux"
+        else:
+            estimation_path = "invokeai.app.invocations.z_image_image_to_latents.estimate_vae_working_memory_sd3"
+
+        with patch(estimation_path) as mock_estimate:
+            expected_memory = 1024 * 1024 * 250  # 250MB
+            mock_estimate.return_value = expected_memory
+
+            # Mock VAE encode to avoid actual computation
+            if vae_type == FluxAutoEncoder:
+                mock_vae.encode.return_value = torch.zeros(1, 16, 64, 64)
+            else:
+                mock_latent_dist = MagicMock()
+                mock_latent_dist.sample.return_value = torch.zeros(1, 16, 64, 64)
+                mock_encode_result = MagicMock()
+                mock_encode_result.latent_dist = mock_latent_dist
+                mock_vae.encode.return_value = mock_encode_result
+
+            # Call the static method directly
+            try:
+                ZImageImageToLatentsInvocation.vae_encode(mock_vae_info, mock_image_tensor)
+            except Exception:
+                # We expect some errors due to mocking, but we just want to verify the working memory was requested
+                pass
+
+            # Verify that working memory estimation was called
+            mock_estimate.assert_called_once()
+            # Verify that model_on_device was called with the estimated working memory
+            mock_vae_info.model_on_device.assert_called_once_with(working_mem_bytes=expected_memory)

From e13401bd647b0374e40b5e242be648383fc5cb1a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 5 Jan 2026 00:54:00 +0000
Subject: [PATCH 4/7] Fix unit test: only set config attribute for
 AutoencoderKL, not FluxAutoEncoder

Co-authored-by: lstein <111189+lstein@users.noreply.github.com>
---
 .../app/invocations/test_z_image_working_memory.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/tests/app/invocations/test_z_image_working_memory.py b/tests/app/invocations/test_z_image_working_memory.py
index 4173460417b..0d65a5a9abd 100644
--- a/tests/app/invocations/test_z_image_working_memory.py
+++ b/tests/app/invocations/test_z_image_working_memory.py
@@ -19,8 +19,11 @@ def test_z_image_latents_to_image_requests_working_memory(self, vae_type):
         """Test that ZImageLatentsToImageInvocation estimates and requests working memory."""
         # Create mock VAE
         mock_vae = MagicMock(spec=vae_type)
-        mock_vae.config.scaling_factor = 1.0
-        mock_vae.config.shift_factor = None
+        
+        # Only set config for AutoencoderKL (FluxAutoEncoder doesn't use config)
+        if vae_type == AutoencoderKL:
+            mock_vae.config.scaling_factor = 1.0
+            mock_vae.config.shift_factor = None
 
         # Create mock parameter for dtype detection
         mock_param = torch.zeros(1)
@@ -86,8 +89,11 @@ def test_z_image_image_to_latents_requests_working_memory(self, vae_type):
         """Test that ZImageImageToLatentsInvocation estimates and requests working memory."""
         # Create mock VAE
         mock_vae = MagicMock(spec=vae_type)
-        mock_vae.config.scaling_factor = 1.0
-        mock_vae.config.shift_factor = None
+        
+        # Only set config for AutoencoderKL (FluxAutoEncoder doesn't use config)
+        if vae_type == AutoencoderKL:
+            mock_vae.config.scaling_factor = 1.0
+            mock_vae.config.shift_factor = None
 
         # Create mock parameter for dtype detection
         mock_param = torch.zeros(1)

From b9bc9159c87e857c23886a1948cc4ba835fde40e Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lincoln.stein@gmail.com>
Date: Sun, 4 Jan 2026 19:59:11 -0500
Subject: [PATCH 5/7] style: fix ruff errors

---
 tests/app/invocations/test_z_image_working_memory.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/app/invocations/test_z_image_working_memory.py b/tests/app/invocations/test_z_image_working_memory.py
index 0d65a5a9abd..1607ccf2c9a 100644
--- a/tests/app/invocations/test_z_image_working_memory.py
+++ b/tests/app/invocations/test_z_image_working_memory.py
@@ -32,7 +32,7 @@ def test_z_image_latents_to_image_requests_working_memory(self, vae_type):
         # Create mock vae_info
         mock_vae_info = MagicMock()
         mock_vae_info.model = mock_vae
-        
+
         # Create mock context manager return value
         mock_cm = MagicMock()
         mock_cm.__enter__ = MagicMock(return_value=(None, mock_vae))
@@ -42,7 +42,7 @@ def test_z_image_latents_to_image_requests_working_memory(self, vae_type):
         # Mock the context
         mock_context = MagicMock()
         mock_context.models.load.return_value = mock_vae_info
-        
+
         # Mock latents
         mock_latents = torch.zeros(1, 16, 64, 64)
         mock_context.tensors.load.return_value = mock_latents
@@ -102,7 +102,7 @@ def test_z_image_image_to_latents_requests_working_memory(self, vae_type):
         # Create mock vae_info
         mock_vae_info = MagicMock()
         mock_vae_info.model = mock_vae
-        
+
         # Create mock context manager return value
         mock_cm = MagicMock()
         mock_cm.__enter__ = MagicMock(return_value=(None, mock_vae))

From 3205bf6606d0351407de5e432a4a845e77774502 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 5 Jan 2026 01:05:15 +0000
Subject: [PATCH 6/7] Fix test: use model_construct to bypass Pydantic
 validation for mock fields

Co-authored-by: lstein <111189+lstein@users.noreply.github.com>
---
 tests/app/invocations/test_z_image_working_memory.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/app/invocations/test_z_image_working_memory.py b/tests/app/invocations/test_z_image_working_memory.py
index 1607ccf2c9a..40a0a4ce925 100644
--- a/tests/app/invocations/test_z_image_working_memory.py
+++ b/tests/app/invocations/test_z_image_working_memory.py
@@ -67,10 +67,12 @@ def test_z_image_latents_to_image_requests_working_memory(self, vae_type):
             mock_image_dto = MagicMock()
             mock_context.images.save.return_value = mock_image_dto
 
-            # Create and invoke
-            invocation = ZImageLatentsToImageInvocation(
+            # Import and create invocation using model_construct to bypass validation
+            from invokeai.app.invocations.z_image_latents_to_image import ZImageLatentsToImageInvocation
+            
+            invocation = ZImageLatentsToImageInvocation.model_construct(
                 latents=MagicMock(latents_name="test_latents"),
-                vae=MagicMock(vae=MagicMock(), seamless_axes=MagicMock()),
+                vae=MagicMock(vae=MagicMock(), seamless_axes=["x", "y"]),
             )
 
             try:

From 47957c0a0604cf62ef467f8aa1d1a3a0c4e89908 Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lincoln.stein@gmail.com>
Date: Sun, 4 Jan 2026 22:13:55 -0500
Subject: [PATCH 7/7] chore(ruff): fix ruff errors

---
 tests/app/invocations/test_z_image_working_memory.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/app/invocations/test_z_image_working_memory.py b/tests/app/invocations/test_z_image_working_memory.py
index 40a0a4ce925..2652a4d05ab 100644
--- a/tests/app/invocations/test_z_image_working_memory.py
+++ b/tests/app/invocations/test_z_image_working_memory.py
@@ -7,7 +7,6 @@
 from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
 
 from invokeai.app.invocations.z_image_image_to_latents import ZImageImageToLatentsInvocation
-from invokeai.app.invocations.z_image_latents_to_image import ZImageLatentsToImageInvocation
 from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEncoder
 
 
@@ -19,7 +18,7 @@ def test_z_image_latents_to_image_requests_working_memory(self, vae_type):
         """Test that ZImageLatentsToImageInvocation estimates and requests working memory."""
         # Create mock VAE
         mock_vae = MagicMock(spec=vae_type)
-        
+
         # Only set config for AutoencoderKL (FluxAutoEncoder doesn't use config)
         if vae_type == AutoencoderKL:
             mock_vae.config.scaling_factor = 1.0
@@ -69,7 +68,7 @@ def test_z_image_latents_to_image_requests_working_memory(self, vae_type):
 
             # Import and create invocation using model_construct to bypass validation
             from invokeai.app.invocations.z_image_latents_to_image import ZImageLatentsToImageInvocation
-            
+
             invocation = ZImageLatentsToImageInvocation.model_construct(
                 latents=MagicMock(latents_name="test_latents"),
                 vae=MagicMock(vae=MagicMock(), seamless_axes=["x", "y"]),
@@ -91,7 +90,7 @@ def test_z_image_image_to_latents_requests_working_memory(self, vae_type):
         """Test that ZImageImageToLatentsInvocation estimates and requests working memory."""
         # Create mock VAE
         mock_vae = MagicMock(spec=vae_type)
-        
+
         # Only set config for AutoencoderKL (FluxAutoEncoder doesn't use config)
         if vae_type == AutoencoderKL:
             mock_vae.config.scaling_factor = 1.0