-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmotifs_parser.py
executable file
·48 lines (43 loc) · 1.42 KB
/
motifs_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import csv
import os
from collections import Counter
from pprint import pprint as pp
from string import join
out_dir = "graphs"
ids = []
motif_counts = dict()
for path,dirs,files in os.walk('graphs/ec_hs_culled_30each_smallest'):
files = filter(lambda x: "MAT" in x and "_5_" not in x and "_3_" not in x ,files)
# if dirs == []:
# out_file = open(os.path.join(out_dir,os.path.basename(path)+"_5_len_motif_summary.txt"),'w')
for f in files:
# out_file.write("%s:"%f[3:7])
# print "\n====Motifs in file %s=======\n"%f
with open(os.path.join(path,f),'r') as csvf:
reader = csv.reader(csvf,delimiter=' ')
for row in reader:
zscore = float(row[4])
uniq = float(row[6])
mfact = float(row[7])
if zscore > 2.0 : # and mfact > 1.10:
ids.append(row[0])
if row[0] not in motif_counts.keys():
motif_counts[row[0]] = Counter()
motif_counts[row[0]][os.path.basename(path)] += 1
# out_file.write(" " + row[0])
# out_file.write("\n")
# if dirs == []:
# out_file.close()
# print row
# print "\n===================\n"
id_counts = Counter(ids)
pp(id_counts.most_common())
pp(motif_counts)
templ = "ec_?_hs"
with open("recurrences_4.txt",'w') as out_f:
for motif in motif_counts.keys():
out_f.write(motif)
for i in range(1,7):
ec_name = templ.replace("?",str(i))
out_f.write(" "+ str(motif_counts[motif][ec_name]))
out_f.write("\n")