-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAIModel.py
48 lines (39 loc) · 2.08 KB
/
AIModel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import numpy as np
from transformers import AutoModelForCausalLM, AutoProcessor
from PIL import Image
class AIModel:
def __init__(self) -> None:
# Constructor
self.model_id = "microsoft/Phi-3-vision-128k-instruct"
self.model = AutoModelForCausalLM.from_pretrained(self.model_id,
cache_dir="/notebooks/my_models/phi_3_vision",
device_map="cuda",
trust_remote_code=True,
torch_dtype="auto",
_attn_implementation="eager")
self.processor = AutoProcessor.from_pretrained(self.model_id, trust_remote_code=True)
self.generation_args = {
"max_new_tokens": 2048,
"temperature": 0.0,
"do_sample": False,
}
def get_response(self, message: str, image: Image.Image) -> str:
# Gets text response from Phi-3-Vision given a message and image
# Converts image into array
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
# Appends message to context
context = [
{"role": "user", "content": "<|image_1|>\n" + message}
]
# Tokenizes the context
prompt = self.processor.tokenizer.apply_chat_template(context, tokenize=False, add_generation_prompt=True)
# Constructs the embeddings
inputs = self.processor(prompt, [image], return_tensors="pt").to("cuda:0")
# Generates the output
generate_ids = self.model.generate(**inputs, eos_token_id=self.processor.tokenizer.eos_token_id, **self.generation_args)
# Gets rid of the input text
generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
# Decodes the output into a string
response = self.processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
return response