feat: rough draft of lora linear

k223kim · k223kim · commit b4ce42e99abd · 2024-08-22T19:23:58.000+09:00
diff --git a/thunder/tests/test_transforms.py b/thunder/tests/test_transforms.py
@@ -334,3 +334,42 @@ def get_model():
 
     assert jm_ref._get_shared_names()["0.weight"] == {"0.weight", "4.weight"}
     assert jm._get_shared_names()["0.weight"] == {"0.weight", "4.weight"}
+
+
+def test_lora_transform_linear():
+    from thunder.transforms import LORATransform
+
+    DIM = 512
+
+    class Model(torch.nn.Module):
+        def __init__(self) -> None:
+            super().__init__()
+            self.fc1 = torch.nn.Linear(DIM, DIM)
+            self.fc2 = torch.nn.Linear(DIM, DIM)
+
+        def forward(self, x):
+            x = self.fc1(x)
+            x = torch.nn.functional.relu(x)
+            x = self.fc2(x)
+            return x
+
+    model = Model()
+    x = torch.randn(4, DIM)
+
+    loratransform = LORATransform(r=16, lora_alpha=32)
+
+    jmodel = thunder.jit(
+        model,
+        transforms=[
+            loratransform,
+        ],
+    )
+    actual = jmodel(x)
+    original_jmodel = thunder.jit(model)
+    expected = original_jmodel(x)
+
+    print(thunder.last_traces(original_jmodel)[-1])
+    print(thunder.last_traces(jmodel)[-1])
+
+    assert_close(actual, expected, atol=2e-1, rtol=2e-1)
+    assert False == True
diff --git a/thunder/transforms/__init__.py b/thunder/transforms/__init__.py
@@ -1,6 +1,8 @@
 from .materialization import MaterializationTransform
+from .quantization import LORATransform
 
 
 __all__ = [
     "MaterializationTransform",
+    "LORATransform",
 ]
diff --git a/thunder/transforms/quantization.py b/thunder/transforms/quantization.py
@@ -7,6 +7,7 @@
 from thunder.core import utils
 from thunder.core import prims
 import torch
+import math
 
 from .utils import (
     get_orig_and_thunder_module_proxies_from_prologue,
@@ -294,3 +295,76 @@ def transform_traces_pre_prologue(self, prologue_trace, computation_trace, epilo
 
         new_computation_trace.set_provenance(thunder.core.trace.TraceProvenance("quant pass"))
         return prologue_trace, new_computation_trace, epilogue_trace
+
+
+class LORATransform(Transform):
+    def __init__(
+        self,
+        r: int = 0,
+        lora_alpha: int = 1,
+        lora_dropout: float = 0.0,
+        **kwargs,
+    ):
+        self.r = r
+        self.lora_alpha = lora_alpha
+        self.lora_dropout = lora_dropout
+        self.lora_linear_names = set()
+
+    def lora_linear(self, x):
+        in_features, out_features = x.shape[0], x.shape[1]
+
+        if self.lora_dropout > 0.0:
+            dropout = torch.nn.Dropout(p=self.lora_dropout)
+        else:
+            dropout = lambda x: x
+
+        linear = torch.nn.Linear(in_features, out_features)
+        lora_A = torch.nn.Parameter(torch.empty((self.r, in_features)))
+        lora_B = torch.nn.Parameter(torch.empty((out_features, self.r)))
+        torch.nn.init.kaiming_uniform_(lora_A, a=math.sqrt(5))
+        torch.nn.init.zeros_(lora_B)
+        scaling = self.lora_alpha / self.r
+
+        pretrained = linear(x)
+        lora = (dropout(x) @ lora_A.transpose(0, 1) @ lora_B.transpose(0, 1)) * scaling
+        return pretrained + lora
+
+    def transform_module(self, model: thunder.ThunderModule):
+        self.thunder_module = model
+        shared_names = model._get_shared_names()
+        processed_names = set()
+
+        def convert_linear_submodule(tm, name):
+            self.lora_linear_names.add(name)
+            weight_name = f"{name}.weight"
+            processed_copies = shared_names[weight_name] & processed_names
+            if processed_copies:
+                copy_name = next(iter(processed_copies))
+                tm._overrides_parameters[weight_name] = tm._overrides_parameters[copy_name]
+
+            w = tm.get_parameter(weight_name)
+            qw = self.lora_linear(w)
+            tm._overrides_parameters[weight_name] = qw.to(w.device)
+            processed_copies.add(weight_name)
+
+        for n, submodule in model._model.named_modules():
+            if isinstance(submodule, torch.nn.Linear):
+                convert_linear_submodule(model, n)
+
+    def transform_state_dict_for_submodule(
+        self, model: thunder.ThunderModule, submodule_name: str, state_dict: dict
+    ) -> dict:
+        if submodule_name not in self.lora_linear_names:
+            return state_dict
+
+        weight_name_full = f"{submodule_name}.weight"
+        w = state_dict["weight"]
+        qw = self.lora_linear(w)
+
+        state_dict = state_dict.copy()
+        state_dict["weight"] = qw.to(w.device)
+
+        return state_dict
+
+    def transform_traces_pre_prologue(self, prologue_trace, computation_trace, epilogue_trace, **kwargs):
+        return prologue_trace, computation_trace, epilogue_trace

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,8 @@`
`1`	`1`	`from .materialization import MaterializationTransform`
	`2`	`+from .quantization import LORATransform`
`2`	`3`
`3`	`4`
`4`	`5`	`__all__ = [`
`5`	`6`	`"MaterializationTransform",`
	`7`	`+ "LORATransform",`
`6`	`8`	`]`