From 2d6e6badb75380737db0490453a917a2b8d8bd16 Mon Sep 17 00:00:00 2001
From: Cal Mitchell <33095819+calmitchell617@users.noreply.github.com>
Date: Sun, 10 Mar 2024 09:07:34 +0000
Subject: [PATCH] pytorch layernorm now takes bias arg

---
 model.py | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/model.py b/model.py
index c698f8b601..ff000e3521 100644
--- a/model.py
+++ b/model.py
@@ -15,17 +15,6 @@
 import torch.nn as nn
 from torch.nn import functional as F
 
-class LayerNorm(nn.Module):
-    """ LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """
-
-    def __init__(self, ndim, bias):
-        super().__init__()
-        self.weight = nn.Parameter(torch.ones(ndim))
-        self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None
-
-    def forward(self, input):
-        return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
-
 class CausalSelfAttention(nn.Module):
 
     def __init__(self, config):
@@ -95,9 +84,9 @@ class Block(nn.Module):
 
     def __init__(self, config):
         super().__init__()
-        self.ln_1 = LayerNorm(config.n_embd, bias=config.bias)
+        self.ln_1 = nn.LayerNorm(config.n_embd, bias=config.bias)
         self.attn = CausalSelfAttention(config)
-        self.ln_2 = LayerNorm(config.n_embd, bias=config.bias)
+        self.ln_2 = nn.LayerNorm(config.n_embd, bias=config.bias)
         self.mlp = MLP(config)
 
     def forward(self, x):
@@ -128,7 +117,7 @@ def __init__(self, config):
             wpe = nn.Embedding(config.block_size, config.n_embd),
             drop = nn.Dropout(config.dropout),
             h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
-            ln_f = LayerNorm(config.n_embd, bias=config.bias),
+            ln_f = nn.LayerNorm(config.n_embd, bias=config.bias),
         ))
         self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
         # with weight tying when using torch.compile() some warnings get generated: