[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit ee5d2061118e · 2025-01-14T00:32:18.000Z
for more information, see https://pre-commit.ci
diff --git a/thunder/core/rematerialization.py b/thunder/core/rematerialization.py
@@ -687,15 +687,19 @@ def add_to_swapmap(p):
     )
     new_fw_trace.bound_symbols.append(replace(fw_trace.bound_symbols[-1], args=fw_trace.bound_symbols[-1].args))
 
-    # outputs required for backward may have different names between forward and backward. 
+    # outputs required for backward may have different names between forward and backward.
     # Rematerialisation may remove some outs from the forward.
     old_saved_for_backward_fw = (*fw_trace.bound_symbols[-1].args[1][0], *fw_trace.bound_symbols[-1].args[1][1])
     old_saved_for_backward_bw = []
     for bsym in bw_trace.bound_symbols:
         if bsym.sym.id == PrimIDs.UNPACK_SEQUENCE:
             flattened_args = tree_flatten(bw_trace.args[1])[0]
             proxy_names = {y.name for y in flattened_args if isinstance(y, ProxyInterface)}
-            if all(not isinstance(out, CollectionProxy) and out.name not in proxy_names for out in bsym.flat_outs if out is not None):
+            if all(
+                not isinstance(out, CollectionProxy) and out.name not in proxy_names
+                for out in bsym.flat_outs
+                if out is not None
+            ):
                 old_saved_for_backward_bw += bsym.flat_outs
     assert len(old_saved_for_backward_fw) == len(old_saved_for_backward_bw)
     new_required_for_bakward_fw_to_bw_map = {
diff --git a/thunder/core/transforms.py b/thunder/core/transforms.py
@@ -3254,33 +3254,32 @@ def recompute_saved_for_backward(fwd_trace: Trace, bwd_trace: Trace, do_it=False
 
     # outputs required for backward may have different names between forward and backward.
     # Rematerialisation may remove some outs from the forward.
-    old_saved_for_backward_fw = (
-        *fwd_trace.bound_symbols[-1].args[1][0],
-        *fwd_trace.bound_symbols[-1].args[1][1]
-    )
+    old_saved_for_backward_fw = (*fwd_trace.bound_symbols[-1].args[1][0], *fwd_trace.bound_symbols[-1].args[1][1])
     old_saved_for_backward_bw = []
     for bsym in bwd_trace.bound_symbols:
         if bsym.sym.id == prims.PrimIDs.UNPACK_SEQUENCE:
             flattened_args = tree_flatten(bwd_trace.args[1])[0]
             proxy_names = {y.name for y in flattened_args if isinstance(y, Proxy)}
             if all(
                 not isinstance(out, CollectionProxy) and out.name not in proxy_names
-                for out in bsym.flat_outs if out is not None
+                for out in bsym.flat_outs
+                if out is not None
             ):
                 old_saved_for_backward_bw += bsym.flat_outs
     assert len(old_saved_for_backward_fw) == len(old_saved_for_backward_bw)
     new_required_for_bakward_fw_to_bw_map = {
-        x.name: y for x, y in zip(old_saved_for_backward_fw, old_saved_for_backward_bw)
-        if x is not None
+        x.name: y for x, y in zip(old_saved_for_backward_fw, old_saved_for_backward_bw) if x is not None
     }
     new_required_for_bakward_fw_to_bw_map_mirror = {
-        y.name: x for x, y in zip(old_saved_for_backward_fw, old_saved_for_backward_bw)
-        if x is not None
+        y.name: x for x, y in zip(old_saved_for_backward_fw, old_saved_for_backward_bw) if x is not None
     }
     all_recomputable_proxies = all_recomputable_proxies.union(
         OrderedSet(
-            variableify(new_required_for_bakward_fw_to_bw_map[unvariableify(a).name])
-            if unvariableify(a).name in new_required_for_bakward_fw_to_bw_map else a
+            (
+                variableify(new_required_for_bakward_fw_to_bw_map[unvariableify(a).name])
+                if unvariableify(a).name in new_required_for_bakward_fw_to_bw_map
+                else a
+            )
             for a in all_recomputable_proxies
         )
     )
diff --git a/thunder/tests/test_transforms.py b/thunder/tests/test_transforms.py
@@ -670,9 +670,7 @@ def transform_module(self, model: thunder.ThunderModule):
                 }
                 model._overrides_buffers[n] = qb
 
-        def transform_traces_pre_prologue(
-            self, prologue_trace, computation_trace, epilogue_trace, **kwargs
-        ):
+        def transform_traces_pre_prologue(self, prologue_trace, computation_trace, epilogue_trace, **kwargs):
             tm = self.thunder_module
             from thunder.core.trace import tracectx
 
@@ -681,19 +679,15 @@ def transform_traces_pre_prologue(
             prologue_proxy_map = {
                 get_param_bsym.output.name: dict(
                     shape=self.cast_states[model_weight_name]["qb.shape"],
-                    dtype=thunder.dtypes.to_dtype(
-                        self.cast_states[model_weight_name]["qb.dtype"]
-                    ),
+                    dtype=thunder.dtypes.to_dtype(self.cast_states[model_weight_name]["qb.dtype"]),
                 )
                 for model_weight_name, (check_bsym, get_param_bsym) in checks.items()
                 if model_weight_name in self.cast_states
             }
 
             # here we switch the prologue_trace to a copy with new metadata
-            prologue_trace = (
-                thunder.transforms.quantization.trace_with_replaced_proxy_metadata(
-                    prologue_trace, prologue_proxy_map
-                )
+            prologue_trace = thunder.transforms.quantization.trace_with_replaced_proxy_metadata(
+                prologue_trace, prologue_proxy_map
             )
 
             checks = thunder.transforms.utils.get_checks(prologue_trace)
@@ -714,79 +708,58 @@ def transform_traces_pre_prologue(
                     shape=psym.shape,
                     dtype=psym.dtype,
                 )
-                for psym, csym in zip(
-                    prologue_trace.bound_symbols[-1].args[0][0], computation_trace.args
-                )
+                for psym, csym in zip(prologue_trace.bound_symbols[-1].args[0][0], computation_trace.args)
                 if psym.shape != csym.shape or psym.dtype != csym.dtype
             }
 
-            new_computation_trace = (
-                thunder.transforms.quantization.trace_with_replaced_proxy_metadata(
-                    computation_trace, computation_proxy_map
-                )
+            new_computation_trace = thunder.transforms.quantization.trace_with_replaced_proxy_metadata(
+                computation_trace, computation_proxy_map
             )
 
-            producers, consumers = thunder.core.utils.producers_and_consumers(
-                new_computation_trace
-            )
+            producers, consumers = thunder.core.utils.producers_and_consumers(new_computation_trace)
 
             bound_symbols = new_computation_trace.bound_symbols
             new_computation_trace.bound_symbols = []
 
-            new_computation_trace._siginfo.args = [
-                (a.name, None) for a in new_computation_trace.args
-            ]
+            new_computation_trace._siginfo.args = [(a.name, None) for a in new_computation_trace.args]
 
             computation_proxy_map = {}
             new_bound_symbols = []
             for bsym in bound_symbols:
-                if (
-                    bsym.sym == thunder.torch.to
-                    and producers[bsym.args[0]].sym == thunder.core.prims.unpack_trivial
-                ):
+                if bsym.sym == thunder.torch.to and producers[bsym.args[0]].sym == thunder.core.prims.unpack_trivial:
                     inp = bsym.args[0]
                     args = (inp, inp.dtype, *bsym.args[2:])
-                    computation_proxy_map[bsym.output.name] = dict(
-                        shape=inp.shape, dtype=inp.dtype
-                    )
+                    computation_proxy_map[bsym.output.name] = dict(shape=inp.shape, dtype=inp.dtype)
                     assert (
-                        len(bsym.subsymbols) == 1
-                        and bsym.subsymbols[0].sym
-                        == thunder.core.prims.convert_element_type
+                        len(bsym.subsymbols) == 1 and bsym.subsymbols[0].sym == thunder.core.prims.convert_element_type
                     )
                     subsymbols = [bsym.subsymbols[0].from_bsym(args=(inp, inp.dtype))]
-                    new_bound_symbols.append(
-                        bsym.from_bsym(args=args, subsymbols=subsymbols)
-                    )
+                    new_bound_symbols.append(bsym.from_bsym(args=args, subsymbols=subsymbols))
                 else:
                     new_bound_symbols.append(bsym.from_bsym())
 
             new_computation_trace.bound_symbols = new_bound_symbols
 
-            new_computation_trace = (
-                thunder.transforms.quantization.trace_with_replaced_proxy_metadata(
-                    new_computation_trace, computation_proxy_map
-                )
+            new_computation_trace = thunder.transforms.quantization.trace_with_replaced_proxy_metadata(
+                new_computation_trace, computation_proxy_map
             )
 
-            new_computation_trace.set_provenance(
-                thunder.core.trace.TraceProvenance("Dtype Convert")
-            )
+            new_computation_trace.set_provenance(thunder.core.trace.TraceProvenance("Dtype Convert"))
             return prologue_trace, new_computation_trace, epilogue_trace
 
     class cast(nn.Module):
         def __init__(
             self,
-            k_shape: Tuple[int, int, int, int],
-            v_shape: Tuple[int, int, int, int],
-            device: Optional[torch.device] = None,
-            dtype: Optional[torch.dtype] = None,
+            k_shape: tuple[int, int, int, int],
+            v_shape: tuple[int, int, int, int],
+            device: torch.device | None = None,
+            dtype: torch.dtype | None = None,
         ) -> None:
             super().__init__()
             self.register_buffer("k", torch.zeros(k_shape, device=device, dtype=dtype), persistent=False)
             self.register_buffer("v", torch.zeros(v_shape, device=device, dtype=dtype), persistent=False)
 
-        def forward(self, k: torch.Tensor, v: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        def forward(self, k: torch.Tensor, v: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
             # move the buffer to the activation dtype for when AMP is used
             self.k = self.k.to(k.dtype)
             self.v = self.v.to(v.dtype)
@@ -795,23 +768,34 @@ def forward(self, k: torch.Tensor, v: torch.Tensor) -> Tuple[torch.Tensor, torch
 
     # BUG: issue: 1637
     class ParentModule(nn.Module):
-        def __init__(self, k_shape: Tuple[int, int, int, int], v_shape: Tuple[int, int, int, int], device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None):
+        def __init__(
+            self,
+            k_shape: tuple[int, int, int, int],
+            v_shape: tuple[int, int, int, int],
+            device: torch.device | None = None,
+            dtype: torch.dtype | None = None,
+        ):
             super().__init__()
             self.cast_module = cast(k_shape, v_shape, device=device, dtype=dtype)
 
-        def forward(self, k: torch.Tensor, v: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        def forward(self, k: torch.Tensor, v: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
             return self.cast_module(k, v)
 
     with torch.device("cpu"):
         k_shape = (2, 3, 4, 5)
         v_shape = (2, 3, 4, 5)
         device = torch.device("cpu")
         dtype = torch.float32
-        model = (ParentModule(k_shape, v_shape, device=device, dtype=dtype).eval().requires_grad_(False))
+        model = ParentModule(k_shape, v_shape, device=device, dtype=dtype).eval().requires_grad_(False)
 
     k = torch.randn(2, 3, 4, 5, device=device, dtype=torch.half)
     v = torch.randn(2, 3, 4, 5, device=device, dtype=torch.half)
-    cast_jit = thunder.jit(model, transforms=[CastBuffers(),])
+    cast_jit = thunder.jit(
+        model,
+        transforms=[
+            CastBuffers(),
+        ],
+    )
     output_k, output_v = cast_jit(k, v)
 
     def check_dtypes(bsym):