-
Notifications
You must be signed in to change notification settings - Fork 0
/
emotion.py
129 lines (106 loc) · 4.12 KB
/
emotion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import pathlib
import json
import config
import common
from transformers import pipeline
import torch
import random
# Load the BERT-Emotions-Classifier
use_device = 'cuda' if torch.cuda.is_available() else 'cpu'
classifier = None
analysis_file = {}
def load_bert_classifier():
global classifier
if classifier is None:
classifier = pipeline("text-classification", model="bhadresh-savani/bert-base-uncased-emotion", device=use_device)
def classify_emotion(text):
load_bert_classifier()
results = classifier(text)
return results[0]['label']
def load_analysis_file():
if not pathlib.Path(config.sentiment_analysis_dest).exists():
initialize_analysis_file()
global analysis_file
analysis_file = json.loads(pathlib.Path(
config.sentiment_analysis_dest).read_text())
return analysis_file
def initialize_analysis_file():
file_struct = {}
dataset = json.loads(pathlib.Path(
config.dataset_manifest_file_dest).read_text())
for char in dataset:
common.log(f"Initializing analysis file for {char}")
file_struct[char] = {
# seven emotions, which are sadness, joy, love, anger, fear, surprise
"sadness": [],
"joy": [],
"love": [],
"anger": [],
"fear": [],
"surprise": []
}
pathlib.Path(config.sentiment_analysis_dest).write_text(
json.dumps(file_struct))
global analysis_file
analysis_file = file_struct
def save_analysis_file():
pathlib.Path(config.sentiment_analysis_dest).write_text(
json.dumps(analysis_file))
def do_batch_classification(char):
load_bert_classifier()
dataset = json.loads(pathlib.Path(
config.dataset_manifest_file_dest).read_text())
if char not in dataset:
common.log(f"Character {char} not found in dataset")
return
for hash_id in dataset[char]:
try:
x = dataset[char][hash_id]
text = x["text"]
emotion = classifier(text)[0]
analysis_file[char][emotion['label']].append({
"text": text,
"hash_id": hash_id,
"score": emotion['score'],
"dest": x["dest"]
})
except RuntimeError as e:
common.log(f"Error processing {hash_id} for {char}: {e}, ignoring")
common.log(f"Classification of {char} complete")
def choose_a_voice_by_text(char: str, text: str, randomed: bool = True) -> dict[str, str] | None:
"""
Chooses a most representative voice for the given text based on the sentiment analysis file.
Args:
char (str): The character to dub
text (str): The text to analyze
Returns:
dict[str, str] | None: A dictionary containing the hash_id, text, score, and destination of the most representative voice for the given text. If no suitable voice is found, returns None.
"""
load_bert_classifier()
result = classifier(text)[0]
label = result['label']
# rank the analysises by score
ranked_analysis = sorted(analysis_file[char][label], key=lambda x: x['score'], reverse=True)
def resolver(ranked_analysis):
result = []
for analysis in ranked_analysis:
try:
if not common.check_if_audio_exceeds_10s(analysis['dest']):
result.append(analysis)
except Exception as e:
common.log(f"Error processing {analysis['hash_id']}: {e}, ignoring")
return result
ranked_analysis = common.cached_data(f"ranked_analysis_{char}_{label}", lambda: resolver(ranked_analysis))
return random.choice(ranked_analysis) if len(ranked_analysis) > 0 else None
def do_classification():
"""
Do the sentiment analysis on all the characters in the dataset.
"""
initialize_analysis_file()
dataset = json.loads(pathlib.Path(
config.dataset_manifest_file_dest).read_text())
common.log(f"Starting classification of {len(dataset)} characters")
for char in dataset:
do_batch_classification(char)
common.log(f"Classification of all characters complete")
save_analysis_file()