-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel.py
More file actions
30 lines (25 loc) · 1.2 KB
/
model.py
File metadata and controls
30 lines (25 loc) · 1.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from transformers import AutoModelForCausalLM, AutoProcessor
from PIL import Image
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForCausalLM.from_pretrained("./model/content/sample_data/save").to(device)
processor = AutoProcessor.from_pretrained("./model/content/sample_data/save")
def getAnswer(img, ques):
image = Image.open("." + img)
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
question = ques
input_ids = processor(text=question, add_special_tokens=False).input_ids
input_ids = [processor.tokenizer.cls_token_id] + input_ids
input_ids = torch.tensor(input_ids).unsqueeze(0).to(device)
generated_ids = model.generate(pixel_values=pixel_values, input_ids=input_ids, max_length=50)
# Decode the generated tokens
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
# Optionally, remove the question part from the generated text
# //lowercase ques
ques = ques.lower()
print(ques)
if ques in generated_text:
answer = generated_text.split(ques, 1)[-1].strip()
else:
answer = generated_text.strip()
return answer