-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathutils.py
65 lines (47 loc) · 1.54 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import torch
import torch.nn as nn
def pad_collate_reddit(batch):
target = [item[0] for item in batch]
tweet = [item[1] for item in batch]
lens = [len(x) for x in tweet]
tweet = nn.utils.rnn.pad_sequence(tweet, batch_first=True, padding_value=0)
target = torch.tensor(target)
lens = torch.tensor(lens)
return [target, tweet, lens]
def class_FScore(op, t, expt_type):
FScores = []
for i in range(expt_type):
opc = op[t==i]
tc = t[t==i]
TP = (opc==tc).sum()
FN = (tc>opc).sum()
FP = (tc<opc).sum()
GP = TP/(TP + FP + 1e-8)
GR = TP/(TP + FN + 1e-8)
FS = 2 * GP * GR / (GP + GR + 1e-8)
FScores.append(FS)
return FScores
def gr_metrics(op, t):
TP = (op==t).sum()
FN = (t>op).sum()
FP = (t<op).sum()
GP = TP/(TP + FP)
GR = TP/(TP + FN)
FS = 2 * GP * GR / (GP + GR)
OE = (t-op > 1).sum()
OE = OE / op.shape[0]
return GP, GR, FS, OE
def splits(df, dist_values):
df = df.sample(frac=1).reset_index(drop=True)
df = df.sort_values(by='label').reset_index(drop=True)
df_test = df[df['label']==0][0:dist_values[0]].reset_index(drop=True)
for i in range(1,5):
df_test = df_test.append(df[df['label']==i][0:dist_values[i]], ignore_index=True)
for i in range(5):
df.drop(df[df['label']==i].index[0:dist_values[i]], inplace=True)
df = df.reset_index(drop=True)
return df, df_test
def make_31(five_class):
if five_class!=0:
five_class=five_class-1
return five_class