Initialize layers on the GPU (#51)

axonn-ai · Nov 7, 2023 · f2816dd · f2816dd
1 parent f8773ee
commit f2816dd
Showing 1 changed file with 7 additions and 5 deletions.
diff --git a/axonn/intra_layer/fully_connected.py b/axonn/intra_layer/fully_connected.py
@@ -12,12 +12,13 @@ def divide(a, b):
     return a // b
 
 
+@torch.no_grad()
 def extract_local_params_from_full_params(
-    full_params, out_features_group, in_features_group, depth_group
+    params, out_features_group, in_features_group, depth_group
 ):
-    params = Drop.apply(torch.t(full_params).contiguous(), out_features_group)
-    params = torch.t(params).contiguous()
     params = Drop.apply(params, in_features_group)
+    params = Drop.apply(torch.t(params).contiguous(), out_features_group)
+    params = torch.t(params).contiguous()
     params = Drop.apply(params.reshape(-1), depth_group)  # create 1D view
     return params
 
@@ -30,12 +31,13 @@ def initialize_params(
     in_features_group,
     depth_group,
     init_method,
+    init_device="cuda",
 ):
-    params = torch.empty((out_features, in_features))
+    params = torch.empty((out_features, in_features), device=init_device)
     init_method(params)
     params = extract_local_params_from_full_params(
         params, out_features_group, in_features_group, depth_group
-    )
+    ).cpu()
     return params