-
Notifications
You must be signed in to change notification settings - Fork 0
/
edgesQuality.py
executable file
·123 lines (96 loc) · 2.98 KB
/
edgesQuality.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#coding =utf-8
# Read tagged edges file and compute rand index by compraring to "ground truth"
# gene cluster read count.
import sys
import os
from collections import defaultdict as dd
def computeIsoQuality(edgeList):
TP = 0
TN = 0
FP = 0
FN = 0
# current mapping
for edge in edgeList:
read1 = edge[0]
l1 = int(edge[1])
read2 = edge[2]
l2 = int(edge[3])
# orientation = edge[4]
pos = [tuple(int(a) for a in el.split(",")) for el in edge[6:]]
ref_gene = read1.split("_")[-1]
tgt_gene = read2.split("_")[-1]
same_iso = int(edge[5]) >= 2
# print(ref_gene,tgt_gene)
if(same_iso):
if(ref_gene == tgt_gene):
TP += 1
else:
FP += 1
else:
if(ref_gene == tgt_gene):
FN += 1
else:
TN += 1
return(TP, FP, TN, FN)
def computeClustQuality(edgeList):
TP = 0
TN = 0
FP = 0
FN = 0
# current mapping
for edge in edgeList:
read1 = edge[0]
l1 = int(edge[1])
read2 = edge[2]
l2 = int(edge[3])
# orientation = edge[4]
pos = [tuple(int(a) for a in el.split(",")) for el in edge[6:]]
ref_gene = read1.split("_")[-1]
tgt_gene = read2.split("_")[-1]
# True positive if edge is right
if(ref_gene == tgt_gene):
TP += 1
# False positive if edge is wrong
else:
FP += 1
return(TP, FP, TN, FN)
def parse_edge(edgeFile):
with open(edgeFile) as f:
edgeList = []
for i, line in enumerate(f):
if(i >= 2):
line = line.rstrip("\n")
edgeList.append(line.split("\t"))
return(edgeList)
def compute_stats(edgeList):
TP, FP, TN, FN = computeClustQuality(edgeList)
try:
precision = round(float(100 * TP) / (TP + FP), 2)
recall = round(float(100 * TP) / (TP + FN), 2)
fmeasure = round((2 * precision * recall) / (precision + recall), 2)
oddratio = round((TP / FP) / (FN / TN), 2)
except ZeroDivisionError:
print("One of the value is null.")
else:
print("Precision: ", precision)
print("Recall : ", recall)
print("fmeasure : ", fmeasure)
print("DOG : ", oddratio)
finally:
print("TP", "FP", "TN", "FN", sep="\t")
print(TP, FP, TN, FN, sep="\t")
current_path = sys.argv[1]
edge_files = []
# if we have a single file work on this one only
if(os.path.isfile(current_path)):
edge_files.append(current_path)
# if we get a path, work on all edge files
elif(os.path.isdir(current_path)):
for element in os.listdir(current_path):
if(".edge" in element[-6:]):
edge_files.append(os.path.join(current_path, element))
for edge_file in edge_files:
print(os.path.basename(edge_file))
el = parse_edge(edge_file)
print("Parsed " + str(len(el)) + " edges")
compute_stats(el)