-
Notifications
You must be signed in to change notification settings - Fork 91
/
Copy pathmodel.py
83 lines (67 loc) · 3.11 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import torch
import argparse
from collections import defaultdict
import lightning as L
from transformers import BartForConditionalGeneration, PreTrainedTokenizerFast
from transformers.optimization import AdamW, get_linear_schedule_with_warmup
from loguru import logger
class KoBARTConditionalGeneration(L.LightningModule):
def __init__(
self,
hparams,
**kwargs):
super().__init__()
self.save_hyperparameters(hparams)
self.model = BartForConditionalGeneration.from_pretrained('gogamza/kobart-base-v1')
self.model.train()
self.bos_token = '<s>'
self.eos_token = '</s>'
self.tokenizer = PreTrainedTokenizerFast.from_pretrained('gogamza/kobart-base-v1')
self.pad_token_id = self.tokenizer.pad_token_id
self.outputs = defaultdict(list)
def configure_optimizers(self):
# Prepare optimizer
param_optimizer = list(self.model.named_parameters())
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(
nd in n for nd in no_decay)], 'weight_decay': 0.01},
{'params': [p for n, p in param_optimizer if any(
nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters,
lr=self.hparams.lr, correct_bias=False)
num_workers = self.hparams.num_workers
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=int(self.trainer.estimated_stepping_batches * 0.1),
num_training_steps=self.trainer.estimated_stepping_batches,
)
lr_scheduler = {'scheduler': scheduler,
'monitor': 'loss', 'interval': 'step',
'frequency': 1}
return [optimizer], [lr_scheduler]
def forward(self, inputs):
attention_mask = inputs['input_ids'].ne(self.pad_token_id).float()
decoder_attention_mask = inputs['decoder_input_ids'].ne(self.pad_token_id).float()
return self.model(input_ids=inputs['input_ids'],
attention_mask=attention_mask,
decoder_input_ids=inputs['decoder_input_ids'],
decoder_attention_mask=decoder_attention_mask,
labels=inputs['labels'], return_dict=True)
def training_step(self, batch, batch_idx):
outs = self(batch)
loss = outs.loss
self.log('train_loss', loss, prog_bar=True)
return loss
def validation_step(self, batch, batch_idx, dataloader_idx=0):
outs = self(batch)
loss = outs['loss']
self.outputs[dataloader_idx].append({"loss": loss})
def on_validation_epoch_end(self):
flat_outputs = []
for lst in self.outputs.values():
flat_outputs.extend(lst)
loss = torch.stack([x["loss"] for x in flat_outputs]).mean()
self.log("val_loss", loss, prog_bar=True)
self.outputs.clear()