-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfinetune.py
144 lines (113 loc) · 3.4 KB
/
finetune.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import os
import bitsandbytes as bnb
import pandas as pd
import torch
import torch.nn as nn
import transformers
from huggingface_hub import login
import json
from datasets import load_dataset
from peft import(
LoraConfig,
PeftConfig,
PeftModel,
get_peft_model,
prepare_model_for_kbit_training,
)
from transformers import(
AutoConfig,
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
)
def print_trainable_parameters(model):
trainable_params = 0
all_param = 0
for _, param in model.named_parameters():
all_param += param.numel()
if param.requires_grad:
trainable_params += param.numel()
print(
f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
)
def get_model(model_name):
#load and adjust model
#model_name = "tiiuae/falcon-40b-instruct"
print(torch.cuda.is_available())
#if you are using AMD GPU you need to install bnb by hand
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
trust_remote_code=True,
#quantization_config=bnb_config,
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
return model, tokenizer
def tokenize_data(datapair):
formated_data = f"""<cuda>: {datapair["prompt"]}
<hip>: {datapair["completion"]}
"""
token = tokenizer(
formated_data,
padding=False,
truncation=True
)
return token
if __name__ == "__main__":
#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
login()
#get model
model_name = "tiiuae/falcon-40b-instruct"
model, tokenizer = get_model(model_name)
#prepare model
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
#process the training dataset
data = load_dataset("json", data_files="train.json")
print(data)
data = data["train"].shuffle().map(tokenize_data)
print(data)
config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["query_key_value"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
#apply lora here
model = get_peft_model(model, config)
print_trainable_parameters(model)
#training!
training_args = transformers.TrainingArguments(
auto_find_batch_size=True,
gradient_accumulation_steps=4,
num_train_epochs=3,
learning_rate=2e-5,
fp16=True,
save_total_limit=4,
logging_steps=5,
output_dir="./outputs",
save_strategy='epoch',
optim="paged_adamw_32bit",
lr_scheduler_type = 'cosine',
warmup_ratio = 0.05,
)
#turn deepspeed on
trainer = transformers.Trainer(
model=model,
train_dataset=data,
args=training_args,
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
trainer.train()
model.save_pretrained("hipifyPlus")
model.push_to_hub("jozzy/falcon-40b-instruct-hipify", use_auth_token=True)