-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfind_spcorr.py
89 lines (75 loc) · 3.32 KB
/
find_spcorr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from scipy.stats.stats import spearmanr
import csv
# function to read the ground truth or benchmark rank list for the corresponding word list.
def log_ranklist():
with open('./ranking_jp.csv','r') as f:
#csvreader=csv.reader(f)
csvreader = csv.reader(f, delimiter=',')
next(csvreader, None)
ranklist_jp=[]
for row in csvreader:
ranklist_jp.append(float(row[1]))
return ranklist_jp
# function to read the rank list of the words by Metric 1 proposed by us.
def metric1_ranklist():
metric1_ranklist=[]
with open ('./outputfolder/sortedRankedWordByMetric1.txt','r') as fmt1:
lines1=fmt1.readlines()
for line1 in lines1:
metric1_ranklist1=line1.split(" ")[1]
metric1_ranklist.append(int(metric1_ranklist1.strip('\n')))
return metric1_ranklist
# function to read the rank list of the words by Metric 2 proposed by us.
def metric2_ranklist():
metric2_ranklist=[]
with open ('./outputfolder/sortedRankedWordByMetric2.txt','r') as fmt2:
lines2=fmt2.readlines()
for line2 in lines2:
metric2_ranklist1=line2.split(" ")[1]
metric2_ranklist.append(int(metric2_ranklist1.strip('\n')))
return metric2_ranklist
# function to read the rank list of the words by Metric 3 proposed by us.
def metric3_ranklist():
metric3_ranklist=[]
with open ('./outputfolder/sortedRankedWordByMetric3.txt','r') as fmt3:
lines3=fmt3.readlines()
for line3 in lines3:
metric3_ranklist3=line3.split(" ")[1]
metric3_ranklist.append(int(metric3_ranklist3.strip('\n')))
return metric3_ranklist
#function to read the rank list of the words by Metric 4 proposed by us.
def metric4_ranklist():
metric4_ranklist=[]
with open ('./outputfolder/sortedRankedWordByMetric4.txt','r') as fmt4:
lines4=fmt4.readlines()
for line4 in lines4:
metric4_ranklist4=line4.split(" ")[1]
metric4_ranklist.append(int(metric4_ranklist4.strip('\n')))
return metric4_ranklist
def main():
#valid_files()
#metric1_ranklist()
with open('./outputfolder/correlation.txt','a') as cf:
#calculate the sperman correlation coefficient between ground truth rank list and rank list by metric 1.
corr_metric1=spearmanr(log_ranklist(),metric1_ranklist())
cf.write('Metric 1: '+str(corr_metric1[0]))
cf.write('\n')
print(corr_metric1)
#calculate the sperman correlation coefficient between ground truth rank list and rank list by metric 2.
corr_metric2 = spearmanr(log_ranklist(), metric2_ranklist())
cf.write('Metric 2: '+str(corr_metric2[0]))
cf.write('\n')
print(corr_metric2)
#calculate the sperman correlation coefficient between ground truth rank list and rank list by metric 3.
corr_metric3 = spearmanr(log_ranklist(), metric3_ranklist())
cf.write('Metric 3: '+str(corr_metric3[0]))
cf.write('\n')
print(corr_metric3)
#calculate the sperman correlation coefficient between ground truth rank list and rank list by metric 4.
corr_metric4 = spearmanr(log_ranklist(), metric4_ranklist())
cf.write('Metric 4: '+str(corr_metric4[0]))
cf.write('\n')
print(corr_metric4)
#count_words()
if __name__=="__main__":
main()