-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHit_retainer.py
97 lines (81 loc) · 2.58 KB
/
Hit_retainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("input", help="input the Blast outfmt 7 file")
parser.add_argument("-f", "--filter_parameter_1", type=int, help="Input the column number you wish to filter your Blast outputs by\nquick reference: 3 % identity , 4 alignmnet length , 5 mismatches, 6 gap opens, 11 e value, 12 bit score ")
parser.add_argument("-c", "--cutoff_1", type=str, help="The cutoff value you wish to use, all retained hits will be greater than or equal to this value (or less than in the case of e value")
parser.add_argument("-f2", "--filter_parameter_2", type=int, default= 0)
parser.add_argument("-c2", "--cutoff_2", type=str, help="The cutoff value you wish to use, all retained hits will be greater than or equal to this value (or less than in the case of e value", default= 0)
args = parser.parse_args()
Retained_hits_output = 'Retained_hits_' + args.input
filter_parameter = args.filter_parameter_1
if filter_parameter == 11:
G_OR_L = '<'
elif filter_parameter == 5:
G_OR_L = '<'
elif filter_parameter == 6:
G_OR_L = '<'
elif filter_parameter == 9:
G_OR_L = '<'
else:
G_OR_L = '>'
def hit_retain(hit, parameter, cutoff, greater_or_less):
column = int(parameter) - 1
try:
value = float(hit[column])
except:
print(hit)
float_cut = float(cutoff)
if greater_or_less == '>':
if value >= float_cut:
return['keep']
else:
return['omit']
elif greater_or_less == '<':
if value <= float_cut:
return['keep']
else:
return['omit']
Blast_hits = []
with open(args.input) as file:
for line in file:
line_d = line.rstrip()
dat = line_d.split()
if line[0] == '#':
continue
else:
ruling = hit_retain(dat, args.filter_parameter_1, args.cutoff_1, G_OR_L)
if ruling == ['keep']:
Blast_hits.append(dat)
else:
continue
#below repeat the filtering if a second paramater is given at input
Blast_filtered_twice = []
if args.filter_parameter_2 == 0:
pass
else:
if args.filter_parameter_2 == 11:
G_OR_L_2 = '<'
elif args.filter_parameter_2 == 5:
G_OR_L_2 = '<'
elif args.filter_parameter_2 == 6:
G_OR_L_2 = '<'
else:
G_OR_L_2 = '>'
for line in Blast_hits:
ruling = hit_retain(line, args.filter_parameter_2, args.cutoff_2, G_OR_L_2)
if ruling == ['keep']:
Blast_filtered_twice.append(line)
else:
continue
if args.filter_parameter_2 == 0:
Retained_hits = Blast_hits
else:
Retained_hits = Blast_filtered_twice
Retained_hits_outstring = ''
for x in Retained_hits:
y = '\t'.join(x)
z = y + '\n'
Retained_hits_outstring += z
file=open(Retained_hits_output,'w')
file.write(Retained_hits_outstring)
file.close()