Fix black formatting and keep decomps in separate file

aviator19941 · aviator19941 · commit 669b145b0339 · 2023-12-27T21:14:05.000Z
diff --git a/python/shark_turbine/dynamo/passes.py b/python/shark_turbine/dynamo/passes.py
@@ -1,15 +1,8 @@
 import torch
 from torch.fx.experimental.proxy_tensor import make_fx
-from torch._decomp import get_decompositions, register_decomposition
-from torch._prims_common.wrappers import out_wrapper
-from torch._prims_common import (
-    DeviceLikeType,
-    TensorLikeType,
-)
-import torch._refs as _refs
+from shark_turbine.dynamo import utils
 from torch.func import functionalize
-from torch import Tensor
-from typing import Dict, List, Tuple, Optional
+from typing import List
 
 # default decompositions pulled from SHARK / torch._decomp
 DEFAULT_DECOMPOSITIONS = [
@@ -59,87 +52,6 @@
 ]
 
 
-@register_decomposition(torch.ops.aten._scaled_dot_product_flash_attention.default)
-def scaled_dot_product_flash_attention(
-    query,
-    key,
-    value,
-    dropout_p: float = 0.0,
-    is_causal: bool = False,
-    return_debug_mask: bool = False,
-    *,
-    scale: float = None,
-) -> Tuple[Tensor, Tensor, Tensor, Tensor, int, int, Tensor, Tensor, Tensor]:
-    dtype = query.dtype
-    batchSize, num_head, qSize, headSize = (
-        query.shape[0],
-        query.shape[1],
-        query.shape[2],
-        query.shape[3],
-    )
-
-    logsumexp = torch.empty([batchSize, qSize, num_head, headSize], dtype=torch.float)
-    cum_seq_q, cum_seq_k = torch.empty([], dtype=torch.long), torch.empty(
-        [], dtype=torch.long
-    )
-    max_q, max_k = 0, 0
-    philox_seed, philox_offset = torch.empty([], dtype=torch.long), torch.empty(
-        [], dtype=torch.long
-    )
-    debug_attn_mask = torch.empty(
-        [],
-        dtype=query.dtype,
-        device="cpu",
-        requires_grad=query.requires_grad,
-    )
-    output, _ = torch.ops.aten._scaled_dot_product_attention_math.default(
-        query, key, value, None, dropout_p, is_causal, None, scale=scale
-    )
-    output = output.transpose(1, 2).contiguous(memory_format=torch.contiguous_format)
-    return (
-        output.transpose(1, 2),
-        logsumexp,
-        cum_seq_q,
-        cum_seq_k,
-        max_q,
-        max_k,
-        philox_seed,
-        philox_offset,
-        debug_attn_mask,
-    )
-
-
-# manually add decomposition to bypass the error that comes
-# from VAE encode(inp).latent_dist.sample() failing to symbolically
-# trace from torch fx.
-# diffusers side issue: https://github.com/huggingface/diffusers/issues/6239
-# temporary torch fix: https://github.com/pytorch/pytorch/issues/107170
-@register_decomposition(torch.ops.aten.randn.generator)
-@out_wrapper()
-def randn_generator(
-    *shape,
-    generator: Optional[torch.Generator] = None,
-    dtype: Optional[torch.dtype] = None,
-    device: Optional[DeviceLikeType] = None,
-    layout: Optional[torch.layout] = None,
-    requires_grad: bool = False,
-    pin_memory: bool = False,
-) -> TensorLikeType:
-    # We should eventually support the generator overload.
-    # However, if someone passes in a None generator explicitly,
-    # we can jut fall back to randn.default
-    if generator is None:
-        return _refs.randn(
-            *shape,
-            dtype=dtype,
-            device=device,
-            layout=layout,
-            requires_grad=requires_grad,
-            pin_memory=pin_memory,
-        )
-    return NotImplemented
-
-
 def apply_decompositions(
     gm: torch.fx.GraphModule,
     example_inputs,
diff --git a/python/shark_turbine/dynamo/utils.py b/python/shark_turbine/dynamo/utils.py
@@ -0,0 +1,91 @@
+import torch
+from torch._prims_common.wrappers import out_wrapper
+from torch._prims_common import (
+    DeviceLikeType,
+    TensorLikeType,
+)
+import torch._refs as _refs
+from torch._decomp import get_decompositions, register_decomposition
+from torch import Tensor
+from typing import Dict, List, Tuple, Optional
+
+
+@register_decomposition(torch.ops.aten._scaled_dot_product_flash_attention.default)
+def scaled_dot_product_flash_attention(
+    query,
+    key,
+    value,
+    dropout_p: float = 0.0,
+    is_causal: bool = False,
+    return_debug_mask: bool = False,
+    *,
+    scale: float = None,
+) -> Tuple[Tensor, Tensor, Tensor, Tensor, int, int, Tensor, Tensor, Tensor]:
+    dtype = query.dtype
+    batchSize, num_head, qSize, headSize = (
+        query.shape[0],
+        query.shape[1],
+        query.shape[2],
+        query.shape[3],
+    )
+
+    logsumexp = torch.empty([batchSize, qSize, num_head, headSize], dtype=torch.float)
+    cum_seq_q, cum_seq_k = torch.empty([], dtype=torch.long), torch.empty(
+        [], dtype=torch.long
+    )
+    max_q, max_k = 0, 0
+    philox_seed, philox_offset = torch.empty([], dtype=torch.long), torch.empty(
+        [], dtype=torch.long
+    )
+    debug_attn_mask = torch.empty(
+        [],
+        dtype=query.dtype,
+        device="cpu",
+        requires_grad=query.requires_grad,
+    )
+    output, _ = torch.ops.aten._scaled_dot_product_attention_math.default(
+        query, key, value, None, dropout_p, is_causal, None, scale=scale
+    )
+    output = output.transpose(1, 2).contiguous(memory_format=torch.contiguous_format)
+    return (
+        output.transpose(1, 2),
+        logsumexp,
+        cum_seq_q,
+        cum_seq_k,
+        max_q,
+        max_k,
+        philox_seed,
+        philox_offset,
+        debug_attn_mask,
+    )
+
+
+# manually add decomposition to bypass the error that comes
+# from VAE encode(inp).latent_dist.sample() failing to symbolically
+# trace from torch fx.
+# diffusers side issue: https://github.com/huggingface/diffusers/issues/6239
+# temporary torch fix: https://github.com/pytorch/pytorch/issues/107170
+@register_decomposition(torch.ops.aten.randn.generator)
+@out_wrapper()
+def randn_generator(
+    *shape,
+    generator: Optional[torch.Generator] = None,
+    dtype: Optional[torch.dtype] = None,
+    device: Optional[DeviceLikeType] = None,
+    layout: Optional[torch.layout] = None,
+    requires_grad: bool = False,
+    pin_memory: bool = False,
+) -> TensorLikeType:
+    # We should eventually support the generator overload.
+    # However, if someone passes in a None generator explicitly,
+    # we can jut fall back to randn.default
+    if generator is None:
+        return _refs.randn(
+            *shape,
+            dtype=dtype,
+            device=device,
+            layout=layout,
+            requires_grad=requires_grad,
+            pin_memory=pin_memory,
+        )
+    return NotImplemented
diff --git a/python/turbine_models/custom_models/sd_inference/vae_runner.py b/python/turbine_models/custom_models/sd_inference/vae_runner.py
@@ -74,7 +74,7 @@ def decode_inp(self, inp):
             with torch.no_grad():
                 x = self.vae.decode(inp, return_dict=False)[0]
                 return x
-        
+
         def encode_inp(self, inp):
             latents = self.vae.encode(inp).latent_dist.sample()
             return 0.18215 * latents
diff --git a/python/turbine_models/tests/sd_test.py b/python/turbine_models/tests/sd_test.py
@@ -169,7 +169,10 @@ def testExportVaeModelDecode(self):
             arguments["external_weight_path"],
         )
         torch_output = vae_runner.run_torch_vae(
-            arguments["hf_model_name"], arguments["hf_auth_token"], "decode", example_input
+            arguments["hf_model_name"],
+            arguments["hf_auth_token"],
+            "decode",
+            example_input,
         )
         err = utils.largest_error(torch_output, turbine)
         assert err < 9e-5
@@ -211,7 +214,10 @@ def testExportVaeModelEncode(self):
             arguments["external_weight_path"],
         )
         torch_output = vae_runner.run_torch_vae(
-            arguments["hf_model_name"], arguments["hf_auth_token"], "encode", example_input
+            arguments["hf_model_name"],
+            arguments["hf_auth_token"],
+            "encode",
+            example_input,
         )
         err = utils.largest_error(torch_output, turbine)
         assert err < 2e-3

Original file line number	Diff line number	Diff line change
`@@ -169,7 +169,10 @@ def testExportVaeModelDecode(self):`
`169`	`169`	`arguments["external_weight_path"],`
`170`	`170`	`)`
`171`	`171`	`torch_output = vae_runner.run_torch_vae(`
`172`		`- arguments["hf_model_name"], arguments["hf_auth_token"], "decode", example_input`
	`172`	`+ arguments["hf_model_name"],`
	`173`	`+ arguments["hf_auth_token"],`
	`174`	`+ "decode",`
	`175`	`+ example_input,`
`173`	`176`	`)`
`174`	`177`	`err = utils.largest_error(torch_output, turbine)`
`175`	`178`	`assert err < 9e-5`
`@@ -211,7 +214,10 @@ def testExportVaeModelEncode(self):`
`211`	`214`	`arguments["external_weight_path"],`
`212`	`215`	`)`
`213`	`216`	`torch_output = vae_runner.run_torch_vae(`
`214`		`- arguments["hf_model_name"], arguments["hf_auth_token"], "encode", example_input`
	`217`	`+ arguments["hf_model_name"],`
	`218`	`+ arguments["hf_auth_token"],`
	`219`	`+ "encode",`
	`220`	`+ example_input,`
`215`	`221`	`)`
`216`	`222`	`err = utils.largest_error(torch_output, turbine)`
`217`	`223`	`assert err < 2e-3`