reformat and correct minor errors

axonn-ai · Oct 24, 2023 · 2e74e9f · 2e74e9f
1 parent a2b10aa
commit 2e74e9f
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 7 deletions.
diff --git a/axonn/intra_layer/fully_connected.py b/axonn/intra_layer/fully_connected.py
@@ -3,6 +3,7 @@
 import torch
 from .communication import Drop
 from torch.autograd import Function
+import math
 
 
 def divide(a, b):
@@ -61,6 +62,10 @@ def backward(ctx, grad_output):
         return grad_input, grad_weight, None, None, None
 
 
+def default_init_method(weight):
+    return torch.nn.init.kaiming_uniform_(weight, a=math.sqrt(5))
+
+
 class Linear(torch.nn.Module):
     def __init__(
         self,
@@ -70,7 +75,7 @@ def __init__(
         transpose=False,
         skip_bias_add=False,
         init_method=None,
-        async_comm_in_backward_pass=True
+        async_comm_in_backward_pass=True,
         **kwargs
     ):
         super(Linear, self).__init__()
@@ -79,10 +84,10 @@ def __init__(
 
         self.inner_group_size = dist.get_world_size(self.inner_group)
         self.outer_group_size = dist.get_world_size(self.outer_group)
-        self.async_comm_in_backward_pass=async_comm_in_backward_pass
+        self.async_comm_in_backward_pass = async_comm_in_backward_pass
 
         if init_method is None:
-             init_method = lambda weight : torch.nn.init.kaiming_uniform_(weight, a=math.sqrt(5))
+            init_method = default_init_method
 
         if not transpose:
             assert in_features % self.inner_group_size == 0
@@ -125,11 +130,19 @@ def get_output_feature_size(self):
     def forward(self, x):
         if not self.transpose:
             x = AsyncLinear.apply(
-                x, self.weight, self.inner_group, self.outer_group, self.async_comm_in_backward_pass
+                x,
+                self.weight,
+                self.inner_group,
+                self.outer_group,
+                self.async_comm_in_backward_pass,
             )
         else:
             x = AsyncLinear.apply(
-                x, self.weight, self.outer_group, self.inner_group, self.async_comm_in_backward_pass
+                x,
+                self.weight,
+                self.outer_group,
+                self.inner_group,
+                self.async_comm_in_backward_pass,
             )
         if self.skip_bias_add:
             return x, self.bias

diff --git a/axonn/tests/test_intra_layer_fc.py b/axonn/tests/test_intra_layer_fc.py
@@ -52,7 +52,7 @@ def test_fw_pass(G_intra_r, G_intra_c, B, H):
 @pytest.mark.parametrize("B, H", [(32, 64), (16, 128), (2, 256)])
 @pytest.mark.parametrize("G_intra_r, G_intra_c", [(1, 2), (2, 1)])
 @pytest.mark.parametrize("async_comm_in_backward_pass", [True, False])
-def test_bw_pass(G_intra_r, G_intra_c, B, H):
+def test_bw_pass(G_intra_r, G_intra_c, B, H, async_comm_in_backward_pass):
     # These tests are in fp-32
     torch.manual_seed(42)
     ax.init(
@@ -69,7 +69,10 @@ def test_bw_pass(G_intra_r, G_intra_c, B, H):
 
     # parallel backward pass
     layer = Tensor_Parallel_Linear(
-        in_features=H, out_features=H, skip_bias_add=True, async_comm_in_backward_pass=async_comm_in_backward_pass
+        in_features=H,
+        out_features=H,
+        skip_bias_add=True,
+        async_comm_in_backward_pass=async_comm_in_backward_pass,
     ).cuda()
     X_local = (
         _drop(X, 1, inner_group).detach().clone()