Refactoring to use controller

NeuralNotW0rk · NeuralNotW0rk · commit a2e07010424c · 2023-09-24T16:19:59.000-04:00
diff --git a/loraw/__init__.py b/loraw/__init__.py
diff --git a/loraw/loraw_controller.py b/loraw/loraw_controller.py
@@ -0,0 +1,61 @@
+from torch import optim
+from ema_pytorch import EMA
+
+from .loraw_network import LoRAWNetwork
+from .loraw_module import LoRAWModule
+
+class LoRAWController:
+    def __init__(self, target_model, target_config) -> None:
+        self.target_model = target_model
+        self.target_config = target_config
+
+        self.lr = 0
+        self.lora_ema = None
+
+    def create_diffuser_lora(
+        self,
+        lora_dim=16,
+        alpha=1,
+        dropout=None,
+    ):
+        self.lora = LoRAWNetwork(
+            net=self.target_model,
+            target_subnets=["downsamples", "upsamples"],
+            target_modules=["Attention"],
+            lora_dim=lora_dim,
+            alpha=alpha,
+            dropout=dropout,
+            multiplier=1.0,
+            module_class=LoRAWModule,
+            verbose=False,
+        )
+
+    def configure_optimizer_patched(self):
+        return optim.Adam([*self.lora.parameters()], lr=self.lr)
+
+    def on_before_zero_grad_patched(self, *args, **kwargs):
+        self.lora_ema.update()
+    
+    def activate(self, training_wrapper=None):
+        #self.lora.to(device=self.target_model.device)
+        self.lora.activate()
+
+        if training_wrapper is not None:
+
+            # Freeze main diffusion model
+            self.target_model.requires_grad_(False)
+            self.lora.requires_grad_(True)
+
+            # Replace optimizer to use lora parameters
+            self.lr = training_wrapper.lr
+            training_wrapper.configure_optimizers = self.configure_optimizer_patched
+
+            # Replace ema update
+            self.lora_ema = EMA(
+                self.lora,
+                beta=0.9999,
+                power=3/4,
+                update_every=1,
+                update_after_step=1
+            )
+            training_wrapper.on_before_zero_grad = self.on_before_zero_grad_patched
diff --git a/loraw/loraw_module.py b/loraw/loraw_module.py
@@ -2,6 +2,7 @@
 import torch
 from torch import nn
 
+
 class LoRAWModule(nn.Module):
     def __init__(
         self,
@@ -28,7 +29,7 @@ def __init__(
 
         module_type = orig_module.__class__.__name__
 
-        if  module_type == "Conv1d":
+        if module_type == "Conv1d":
             in_dim = orig_module.in_channels
             out_dim = orig_module.out_channels
             kernel_size = orig_module.kernel_size
@@ -38,7 +39,7 @@ def __init__(
                 in_dim, self.lora_dim, kernel_size, stride, padding, bias=False
             )
             self.lora_up = torch.nn.Conv1d(self.lora_dim, out_dim, 1, 1, bias=False)
-        else:
+        elif module_type == "Linear":
             in_dim = orig_module.in_features
             out_dim = orig_module.out_features
             self.lora_down = torch.nn.Linear(in_dim, self.lora_dim, bias=False)
@@ -50,11 +51,10 @@ def __init__(
         self.scale = alpha / self.lora_dim
         self.register_buffer("alpha", torch.tensor(alpha))
 
-        # same as microsoft's
         torch.nn.init.kaiming_uniform_(self.lora_down.weight, a=math.sqrt(5))
         torch.nn.init.zeros_(self.lora_up.weight)
 
-    def apply_to(self):
+    def activate(self):
         self.orig_forward = self.orig_module.forward
         self.orig_module.forward = self.forward
         del self.orig_module
@@ -95,4 +95,4 @@ def forward(self, x):
 
         lx = self.lora_up(lx)
 
-        return orig_forwarded + lx * self.multiplier * scale
+        return orig_forwarded + lx * self.multiplier * scale
diff --git a/loraw/loraw_network.py b/loraw/loraw_network.py
@@ -0,0 +1,120 @@
+
+import torch
+from torch import nn
+from typing import List
+
+from .loraw_module import LoRAWModule
+
+class LoRAWNetwork(nn.Module):
+    def __init__(
+        self,
+        net,
+        target_subnets=None,
+        target_modules=[
+            'SelfAttention1d'
+        ],
+        multiplier=1.0,
+        lora_dim=4,
+        alpha=1,
+        dropout=None,
+        module_class=LoRAWModule,
+        verbose=False,
+    ):
+        super().__init__()
+
+        self.lora_map = {}
+        self.multiplier = multiplier
+        self.lora_dim = lora_dim
+        self.alpha = alpha
+        self.dropout = dropout
+
+        def create_modules(
+            root_name, root_module: nn.Module, target_replace_modules
+        ) -> nn.ModuleList:
+            loras = nn.ModuleList()
+            skipped = nn.ModuleList()
+            for name, module in root_module.named_modules():
+                if module.__class__.__name__ in target_replace_modules:
+                    for child_name, child_module in module.named_modules():
+                        is_linear = child_module.__class__.__name__ == "Linear"
+                        is_conv1d = child_module.__class__.__name__ == "Conv1d"
+
+                        if is_linear or is_conv1d:
+                            lora_name = "lora.{root_name}.{name}.{child_name}"
+                            lora_name = lora_name.replace(".", "_")
+
+                            lora = module_class(
+                                lora_name,
+                                child_module,
+                                multiplier=self.multiplier,
+                                lora_dim=self.lora_dim,
+                                alpha=self.alpha,
+                                dropout=self.dropout
+                            )
+                            loras.append(lora)
+            return loras, skipped
+
+        for subnet_name in target_subnets:
+            if hasattr(net.model, subnet_name):
+                subnet = getattr(net.model, subnet_name)
+                self.lora_map[subnet_name], _ = create_modules(subnet_name, subnet, target_modules)
+                print(f"Created LoRAW for {subnet_name}: {len(self.lora_map[subnet_name])} modules.")
+
+                '''
+                if verbose and len(skipped) > 0:
+                    print(
+                        f"because block_lr_weight is 0 or dim (rank) is 0, {len(skipped)} LoRA modules are skipped / block_lr_weightまたはdim (rank)が0の為、次の{len(skipped)}個のLoRAモジュールはスキップされます:"
+                    )
+                    for name in skipped:
+                        print(f"\t{name}")
+
+                self.up_lr_weight: List[float] = None
+                self.down_lr_weight: List[float] = None
+                self.mid_lr_weight: float = None
+                self.block_lr = False
+
+                # assertion
+                names = set()
+                for lora in self.unet_loras:
+                    assert (
+                        lora.lora_name not in names
+                    ), f"duplicated lora name: {lora.lora_name}"
+                    names.add(lora.lora_name)
+                '''
+            else:
+                print(f'Skipping {subnet_name}: not present in this network')
+        
+
+
+    def set_multiplier(self, multiplier):
+        self.multiplier = multiplier
+        for lora in self.unet_loras:
+            lora.multiplier = self.multiplier
+
+    def activate(self):
+        for subnet_name, subnet in self.lora_map.items():
+            for lora in subnet:
+                lora.activate()
+                self.add_module(lora.lora_name, lora)
+            print(f'Injected {len(subnet)} LoRAW modules into {subnet_name}')
+
+    def is_mergeable(self):
+        return True
+
+    def save_weights(self, file, dtype=torch.float16):
+
+        state_dict = self.state_dict()
+
+        if dtype is not None:
+            for key in list(state_dict.keys()):
+                v = state_dict[key]
+                v = v.detach().clone().to("cpu").to(dtype)
+                state_dict[key] = v
+
+        torch.save(state_dict, file)
+
+    def load_weights(self, file):
+        weights_sd = torch.load(file, map_location="cpu")
+
+        info = self.load_state_dict(weights_sd, False)
+        return info