From 2d6e6badb75380737db0490453a917a2b8d8bd16 Mon Sep 17 00:00:00 2001 From: Cal Mitchell <33095819+calmitchell617@users.noreply.github.com> Date: Sun, 10 Mar 2024 09:07:34 +0000 Subject: [PATCH] pytorch layernorm now takes bias arg --- model.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/model.py b/model.py index c698f8b601..ff000e3521 100644 --- a/model.py +++ b/model.py @@ -15,17 +15,6 @@ import torch.nn as nn from torch.nn import functional as F -class LayerNorm(nn.Module): - """ LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """ - - def __init__(self, ndim, bias): - super().__init__() - self.weight = nn.Parameter(torch.ones(ndim)) - self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None - - def forward(self, input): - return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) - class CausalSelfAttention(nn.Module): def __init__(self, config): @@ -95,9 +84,9 @@ class Block(nn.Module): def __init__(self, config): super().__init__() - self.ln_1 = LayerNorm(config.n_embd, bias=config.bias) + self.ln_1 = nn.LayerNorm(config.n_embd, bias=config.bias) self.attn = CausalSelfAttention(config) - self.ln_2 = LayerNorm(config.n_embd, bias=config.bias) + self.ln_2 = nn.LayerNorm(config.n_embd, bias=config.bias) self.mlp = MLP(config) def forward(self, x): @@ -128,7 +117,7 @@ def __init__(self, config): wpe = nn.Embedding(config.block_size, config.n_embd), drop = nn.Dropout(config.dropout), h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]), - ln_f = LayerNorm(config.n_embd, bias=config.bias), + ln_f = nn.LayerNorm(config.n_embd, bias=config.bias), )) self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) # with weight tying when using torch.compile() some warnings get generated: