WIP: fixing scalars in jittables / precision

monorimet · monorimet · commit d1fda83149a3 · 2024-01-19T11:49:30.000-06:00
diff --git a/python/turbine_models/custom_models/sd_inference/unet.py b/python/turbine_models/custom_models/sd_inference/unet.py
@@ -62,15 +62,15 @@
 
 
 class UnetModel(torch.nn.Module):
-    def __init__(self, hf_model_name, hf_auth_token):
+    def __init__(self, hf_model_name):
         super().__init__()
         self.unet = UNet2DConditionModel.from_pretrained(
             hf_model_name,
             subfolder="unet",
-            token=hf_auth_token,
         )
 
-    def forward(self, sample, timestep, encoder_hidden_states, guidance_scale):
+    def forward(self, sample, timestep, encoder_hidden_states):
+        guidance_scale = 7.5
         samples = torch.cat([sample] * 2)
         unet_out = self.unet.forward(
             samples, timestep, encoder_hidden_states, return_dict=False
@@ -127,10 +127,10 @@ def main(
             encoder_hidden_states=AbstractTensor(
                 *encoder_hidden_states_sizes, dtype=dtype
             ),
-            guidance_scale=AbstractTensor(1, dtype=dtype),
+            #guidance_scale=AbstractTensor(1, dtype=dtype), 
         ):
             return jittable(unet_model.forward)(
-                sample, timestep, encoder_hidden_states, guidance_scale
+                sample, timestep, encoder_hidden_states, # guidance_scale
             )
 
     import_to = "INPUT" if compile_to == "linalg" else "IMPORT"
diff --git a/python/turbine_models/custom_models/sd_inference/unet_runner.py b/python/turbine_models/custom_models/sd_inference/unet_runner.py
@@ -52,7 +52,7 @@ def run_unet(
     sample,
     timestep,
     encoder_hidden_states,
-    guidance_scale,
+    # guidance_scale,
     vmfb_path,
     hf_model_name,
     hf_auth_token,
@@ -64,7 +64,7 @@ def run_unet(
         ireert.asdevicearray(runner.config.device, sample),
         ireert.asdevicearray(runner.config.device, timestep),
         ireert.asdevicearray(runner.config.device, encoder_hidden_states),
-        ireert.asdevicearray(runner.config.device, guidance_scale),
+        # ireert.asdevicearray(runner.config.device, guidance_scale),
     ]
     results = runner.ctx.modules.compiled_unet["main"](*inputs)
     return results
@@ -90,13 +90,13 @@ def __init__(self, hf_model_name, hf_auth_token):
             )
             self.guidance_scale = 7.5
 
-        def forward(self, sample, timestep, encoder_hidden_states, guidance_scale):
+        def forward(self, sample, timestep, encoder_hidden_states): #, guidance_scale):
             samples = torch.cat([sample] * 2)
             unet_out = self.unet.forward(
                 samples, timestep, encoder_hidden_states, return_dict=False
             )[0]
             noise_pred_uncond, noise_pred_text = unet_out.chunk(2)
-            noise_pred = noise_pred_uncond + guidance_scale * (
+            noise_pred = noise_pred_uncond + self.guidance_scale * (
                 noise_pred_text - noise_pred_uncond
             )
             return noise_pred
@@ -106,7 +106,7 @@ def forward(self, sample, timestep, encoder_hidden_states, guidance_scale):
         hf_auth_token,
     )
     results = unet_model.forward(
-        sample, timestep, encoder_hidden_states, guidance_scale
+        sample, timestep, encoder_hidden_states, #guidance_scale
     )
     np_torch_output = results.detach().cpu().numpy()
     return np_torch_output
@@ -118,7 +118,7 @@ def forward(self, sample, timestep, encoder_hidden_states, guidance_scale):
         args.batch_size, 4, args.height // 8, args.width // 8, dtype=torch.float32
     )
     timestep = torch.zeros(1, dtype=torch.float32)
-    guidance_scale = torch.Tensor([7.5], dtype=torch.float32)
+    # guidance_scale = torch.Tensor([7.5], dtype=torch.float32)
     if args.hf_model_name == "CompVis/stable-diffusion-v1-4":
         encoder_hidden_states = torch.rand(2, 77, 768, dtype=torch.float32)
     elif args.hf_model_name == "stabilityai/stable-diffusion-2-1-base":
@@ -129,7 +129,7 @@ def forward(self, sample, timestep, encoder_hidden_states, guidance_scale):
         sample,
         timestep,
         encoder_hidden_states,
-        guidance_scale,
+        # guidance_scale,
         args.vmfb_path,
         args.hf_model_name,
         args.hf_auth_token,
@@ -152,7 +152,7 @@ def forward(self, sample, timestep, encoder_hidden_states, guidance_scale):
             sample,
             timestep,
             encoder_hidden_states,
-            guidance_scale,
+        #    guidance_scale,
         )
         print("TORCH OUTPUT:", torch_output, torch_output.shape, torch_output.dtype)
         err = utils.largest_error(torch_output, turbine_output)
diff --git a/python/turbine_models/custom_models/sd_inference/utils.py b/python/turbine_models/custom_models/sd_inference/utils.py
@@ -16,6 +16,7 @@ def save_external_weights(
             for name in mod_params:
                 mapper["params." + name] = name
             if external_weight_file:
+                print("Saving params to", external_weight_file)
                 safetensors.torch.save_file(mod_params, external_weight_file)
                 print("Saved params to", external_weight_file)
 
@@ -35,7 +36,6 @@ def compile_to_vmfb(module_str, device, target_triple, max_alloc, safe_name):
         "--iree-llvmcpu-target-triple=x86_64-linux-gnu",
         "--iree-stream-resource-index-bits=64",
         "--iree-vm-target-index-bits=64",
-        "--iree-codegen-check-ir-before-llvm-conversion=false",
         "--iree-opt-const-expr-hoisting=False",
     ]
     if device == "cpu":
diff --git a/python/turbine_models/custom_models/sd_inference/vae.py b/python/turbine_models/custom_models/sd_inference/vae.py
@@ -63,7 +63,7 @@ def __init__(
         super().__init__()
         self.vae = None
         self.base_vae = False
-        if custom_vae == "":
+        if custom_vae in ["", None]:
             self.vae = AutoencoderKL.from_pretrained(
                 hf_model_name,
                 subfolder="vae",
diff --git a/python/turbine_models/custom_models/sd_inference/vae_runner.py b/python/turbine_models/custom_models/sd_inference/vae_runner.py
@@ -51,7 +51,7 @@ def run_vae(device, example_input, vmfb_path, hf_model_name, external_weight_pat
     return results
 
 
-def run_torch_vae(hf_model_name, hf_auth_token, variant, example_input):
+def run_torch_vae(hf_model_name, variant, example_input):
     from diffusers import AutoencoderKL
 
     class VaeModel(torch.nn.Module):
@@ -89,7 +89,7 @@ def __init__(
                 self.vae.load_state_dict(custom_vae)
             self.base_vae = base_vae
 
-        def decode_inp(self, inp):
+        def decode_inp(self, input):
             with torch.no_grad():
                 if not self.base_vae:
                     input = 1 / 0.18215 * input
diff --git a/python/turbine_models/tests/sd_test.py b/python/turbine_models/tests/sd_test.py