-
Notifications
You must be signed in to change notification settings - Fork 0
/
sentiment_analysis.py
54 lines (26 loc) · 1.7 KB
/
sentiment_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
import torch
from transformers import BertTokenizer, BertForSequenceClassification, AutoModelForSequenceClassification, AutoTokenizer
class SentimentAnalyser:
"""
This class is used to analyze the sentiment of a given text.
INPUT: text (str) - the text to be analyzed
OUTPUT: probabilities (list) - a list of two lists, where the first list contains the probabilities of the sentiment
"""
def __init__(self):
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model1 = BertForSequenceClassification.from_pretrained('IDEA-CCNL/Erlangshen-Roberta-330M-Sentiment')
self.model1 = model1.to(self.device)
self.tokenizer1 = BertTokenizer.from_pretrained('IDEA-CCNL/Erlangshen-Roberta-330M-Sentiment')
model2 = AutoModelForSequenceClassification.from_pretrained("liam168/c2-roberta-base-finetuned-dianping-chinese", num_labels=2)
self.model2 = model2.to(self.device)
self.tokenizer2 = AutoTokenizer.from_pretrained("liam168/c2-roberta-base-finetuned-dianping-chinese")
def analyze_text(self, text):
encoded_input1 = self.tokenizer1.encode(text, return_tensors="pt").to(self.device)
output1 = self.model1(encoded_input1)
probabilities1 = torch.nn.functional.softmax(output1.logits, dim=-1).tolist()
encoded_input2 = self.tokenizer2.encode(text, return_tensors="pt").to(self.device)
output2 = self.model2(encoded_input2)
probabilities2 = torch.nn.functional.softmax(output2.logits, dim=-1).tolist()
return probabilities1, probabilities2 # lists[0] is for model1, lists[1] is for model2 for the probabilities