-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathJukaAItest.py
41 lines (32 loc) · 1.04 KB
/
JukaAItest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#from transformers import AutoTokenizer, AutoModelForCausalLM
#import transformers
#import torch
#model_id = "codellama/CodeLlama-7b-hf"
#tokenizer = AutoTokenizer.from_pretrained(model_id)
#model = AutoModelForCausalLM.from_pretrained(
# model_id,
# torch_dtype=torch.float16,
# device_map="auto",
#)
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
model_id = "codellama/CodeLlama-7b-Instruct-hf"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.float16,
device_map="auto",
)
system = "Provide answers in Android Java. Do not include comments."
user = "Write a simple pong game"
prompt = f"<s>[INST] <<SYS>>\\n{system}\\n<</SYS>>\\n\\n{user}[/INST]"
inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False).to("cuda")
output = model.generate(
inputs["input_ids"],
max_new_tokens=200,
do_sample=True,
top_p=0.9,
temperature=0.1,
)
output = output[0].to("cpu")
print(tokenizer.decode(output))