-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathhf_sanity_test.py
156 lines (137 loc) · 7.67 KB
/
hf_sanity_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import os
os.environ["TRANSFORMERS_CACHE"] = "cache/"
os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1"
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
from transformers import LlamaForCausalLM, LlamaTokenizer, LlamaConfig
import transformers
from accelerate import init_empty_weights, load_checkpoint_and_dispatch, infer_auto_device_map
import torch
import time
import math
device = "cuda:0"
model_class = "LLAMA30_instruct"
print("model_class: ", model_class)
model2path = {"WizardVicunaLLAMA30": "../Wizard-Vicuna-30B-Uncensored-fp16/",
"MPT30": "../mpt-30b",
"LLAMA2_70": "../Llama-2-70b-hf",
'LLAMA2_13_chat': "../Llama-2-13b-chat-hf",
'LLAMA30_instruct': "../llama-30b-instruct-2048",
"Falcon40_instruct": "../falcon-40b-instruct",
"WizardFalcon40": "../WizardLM-Uncensored-Falcon-40b"}
checkpoint = model2path[model_class]
def select_opt_id(idx):
flag = False
for id in idx:
if flag:
return id
else:
token = tokenizer.decode(id)
if token == "(":
flag = True
return idx[0]
init_time = time.perf_counter()
tokenizer = AutoTokenizer.from_pretrained(checkpoint, cache_dir="cache/", use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
final_time = time.perf_counter()
print("tokenizer loading time: ", final_time - init_time)
init_time = time.perf_counter()
Bidx = tokenizer.encode("(B)")
Aidx = tokenizer.encode("(A)")
Aid = select_opt_id(Aidx)
Bid = select_opt_id(Bidx)
model = AutoModelForCausalLM.from_pretrained(checkpoint,
cache_dir="cache/",
device_map="auto",
torch_dtype=torch.float16,
revision=True,
trust_remote_code=True)
model.half()
final_time = time.perf_counter()
print("model loading time: ", final_time - init_time)
print(model.hf_device_map)
model.eval()
model = torch.compile(model)
question_prompts = ["2+2=4.\nIs the last step correct?\n(A) Yes\n(B) No\n",
"2+2=5.\nIs the last step correct?\n(A) Yes\n(B) No\n",
"2+2=4.\nIs the last reasoning step incorrect?\n(A) Yes\n(B) No\n",
"2+2=5.\nIs the last reasoning step incorrect?\n(A) Yes\n(B) No\n",
"2+2=4.\nIs the last calculation step correct?\n(A) Yes\n(B) No\n",
"2+2=5.\nIs the last calculation step correct?\n(A) Yes\n(B) No\n",
"2+2=4.\nIs the last calculation step incorrect?\n(A) Yes\n(B) No\n",
"2+2=5.\nIs the last calculation step incorrect?\n(A) Yes\n(B) No\n",
"2+2=4.\nIs the last calculation/reasoning step correct?\n(A) Yes\n(B) No\n",
"2+2=5.\nIs the last calculation/reasoning step correct?\n(A) Yes\n(B) No\n",
"2+2=4.\nIs the last calculation/reasoning step incorrect?\n(A) Yes\n(B) No\n",
"2+2=5.\nIs the last calculation/reasoning step incorrect?\n(A) Yes\n(B) No\n",
"Given: P, P => Q. Therefore: Q.\nIs the last step correct?\n(A) Yes\n(B) No\n",
"Given: Q, P => Q. Therefore: P.\nIs the last step correct?\n(A) Yes\n(B) No\n",
"Given: P, P => Q. Therefore: Q.\nIs the last reasoning step incorrect?\n(A) Yes\n(B) No\n",
"Given: Q, P => Q. Therefore: P.\nIs the last reasoning step incorrect?\n(A) Yes\n(B) No\n",
"Given: P, P => Q. Therefore: Q.\nIs the last calculation step correct?\n(A) Yes\n(B) No\n",
"Given: Q, P => Q. Therefore: P.\nIs the last calculation step correct?\n(A) Yes\n(B) No\n",
"Given: P, P => Q. Therefore: Q.\nIs the last calculation step incorrect?\n(A) Yes\n(B) No\n",
"Given: Q, P => Q. Therefore: P.\nIs the last calculation step incorrect?\n(A) Yes\n(B) No\n",
"Given: P, P => Q. Therefore: Q.\nIs the last calculation/reasoning step correct?\n(A) Yes\n(B) No\n",
"Given: Q, P => Q. Therefore: P.\nIs the last calculation/reasoning step correct?\n(A) Yes\n(B) No\n",
"Given: P, P => Q. Therefore: Q.\nIs the last calculation/reasoning step incorrect?\n(A) Yes\n(B) No\n",
"Given: Q, P => Q. Therefore: P.\nIs the last calculation/reasoning step incorrect?\n(A) Yes\n(B) No\n",
]
answer_prompt = "The answer is ("
prompt_templates = []
if model_class == "WizardVicunaLLAMA30":
for question_prompt in question_prompts:
prompt_template = f"USER: {question_prompt}\nASSISTANT: {answer_prompt}"
prompt_templates.append(prompt_template)
elif model_class == "MPT30":
for question_prompt in question_prompts:
prompt_templates.append(question_prompt + answer_prompt)
elif model_class == "LLAMA2_70":
for question_prompt in question_prompts:
prompt_templates.append(question_prompt + answer_prompt)
elif model_class == "LLAMA2_13_chat":
for question_prompt in question_prompts:
system = "You are an AI assistant that helps people find information. " \
"User will you give you a question. Your task is to answer as faithfully as you can. "
prompt_template = f"SYSTEM: {system}\nUSER: {question_prompt}\nASSISTANT: {answer_prompt}"
prompt_templates.append(prompt_template)
elif model_class == "LLAMA30_instruct":
for question_prompt in question_prompts:
system = "You are an AI assistant that helps people find information. " \
"User will you give you a question. Your task is to answer as faithfully as you can. "
prompt_template = f"{system}\n\n### Instruction: {question_prompt}\n\n### Response: {answer_prompt}"
prompt_templates.append(prompt_template)
elif model_class == "Falcon40_instruct":
for question_prompt in question_prompts:
prompt_template = f"User: {question_prompt}\nAssistant: {answer_prompt}"
prompt_templates.append(prompt_template)
elif model_class == "WizardFalcon40":
for question_prompt in question_prompts:
prompt_template = f"{question_prompt}\n### Response: {answer_prompt}"
prompt_templates.append(prompt_template)
with torch.no_grad():
for prompt_template in prompt_templates:
#print("prompt_template: ", prompt_template)
inputs = tokenizer(prompt_template, return_tensors="pt", max_length=1024, truncation=True, padding=True)
input_ids = inputs["input_ids"].to(device)
init_time = time.perf_counter()
output = model.generate(input_ids=input_ids,
max_new_tokens=100,
bos_token_id=tokenizer.bos_token_id,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
output_scores=True,
do_sample=False,
return_dict_in_generate=True,
use_cache=True)
#print("output: ", output)
scores = output.scores[0][0]
A_score = math.exp(scores[Aid].item())
B_score = math.exp(scores[Bid].item())
Aprob = 0 if A_score == 0 else A_score / (A_score + B_score)
Bprob = 0 if B_score == 0 else B_score / (A_score + B_score)
print("sequence: ", tokenizer.decode(output.sequences[0]))
print("Aprob: ", Aprob)
print("Bprob: ", Bprob)
final_time = time.perf_counter()
print("Generation Time: ", final_time - init_time)
print("\n\n")