mindspore-lab
diff --git a/‎mindone/diffusers/hooks/context_parallel.py‎
Lines changed: 17 additions & 18 deletions b/‎mindone/diffusers/hooks/context_parallel.py‎
Lines changed: 17 additions & 18 deletions
@@ -232,33 +232,32 @@ def post_construct(self, module, output):
         return output[0] if is_tensor else tuple(output)
 
 
-class AllGatherFunction(ms.nn.Cell):
-    def __init__(self, dim, group):
-        super().__init__()
-        self.dim = dim
-        self.group = group
-        self.world_size = mint.distributed.get_world_size(group)
-        self.rank = mint.distributed.get_rank(group)
+class AllGatherFunction(ms.common._Function):
+    @staticmethod
+    def forward(ctx, tensor, dim, group):
+        ctx.dim = dim
+        ctx.group = group
+        ctx.world_size = mint.distributed.get_world_size(group)
+        ctx.rank = mint.distributed.get_rank(group)
 
-    def construct(self, tensor):
-        # return funcol.all_gather_tensor(tensor, dim, group=group)
         # mint.distributed.all_gather_into_tensor only support dim=0
-        tensor_t = tensor.transpose(self.dim, 0) if self.dim != 0 else tensor
+        tensor_t = tensor.transpose(dim, 0) if dim != 0 else tensor
 
         out_shape = list(tensor_t.shape)
-        out_shape[0] *= self.world_size
+        out_shape[0] *= ctx.world_size
         output = mint.zeros(out_shape, dtype=tensor_t.dtype)
 
-        mint.distributed.all_gather_into_tensor(output, tensor_t.contiguous(), group=self.group)
+        mint.distributed.all_gather_into_tensor(output, tensor_t.contiguous(), group=group)
 
-        if self.dim != 0:
-            output = output.transpose(0, self.dim)
+        if dim != 0:
+            output = output.transpose(0, dim)
 
         return output
 
-    def bprop(self, tensor, out, dout):
-        grad_chunks = mint.chunk(dout, self.world_size, dim=self.dim)
-        return (grad_chunks[self.rank],)
+    @staticmethod
+    def backward(ctx, grad_output):
+        grad_chunks = mint.chunk(grad_output, ctx.world_size, dim=ctx.dim)
+        return grad_chunks[ctx.rank], None, None
 
 
 class EquipartitionSharder:
@@ -278,7 +277,7 @@ def shard(cls, tensor: ms.Tensor, dim: int, mesh) -> ms.Tensor:
     @classmethod
     def unshard(cls, tensor: ms.Tensor, dim: int, mesh) -> ms.Tensor:
         tensor = tensor.contiguous()
-        tensor = AllGatherFunction(dim, mesh)(tensor)
+        tensor = AllGatherFunction.apply(tensor, dim, mesh)
         return tensor