forked from fjruizruano/ngs-protocols
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrexp_get_contigs.py
executable file
·62 lines (48 loc) · 1.12 KB
/
rexp_get_contigs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/python
import re
import operator
from Bio import SeqIO
import os
from subprocess import call
subfol = os.walk('.').next()[1]
com = "cat "
for num in range(1,len(subfol)+1):
n = str(num)
while len(n) < 4:
n = "0" + n
com = com + "./dir_CL%s/contigs_CL%s " % (n, num)
com = com + "> out.txt"
print com
call(com, shell=True)
data = SeqIO.parse("out.txt", "fasta")
id_dict = {}
for s in data:
info = re.split("CL|Contig|\055| ", s.description)
CL = info[1]
contig = info[2]
cov = float(info[4])
if CL in id_dict:
id_dict[CL][contig] = cov
else:
id_dict[CL] = {contig : cov}
sel = {}
for el in id_dict:
info = id_dict[el]
info_s = sorted(info.items(), key=operator.itemgetter(1))
info_s.reverse()
sel[int(el)] = []
i = 0
for x in info_s:
i += x[1]
ii = 1.0*i/2
j = 0
for x in info_s:
if j < ii:
sel[int(el)].append(x[0])
j += x[1]
sel_s = sorted(sel.items(), key=operator.itemgetter(0))
w = open("out.list","w")
for l in sel_s:
for con in l[1]:
w.write("CL%sContig%s\n" % (str(l[0]),con))
w.close()