-
Notifications
You must be signed in to change notification settings - Fork 1
/
run_hmmsearch_parallel.py
70 lines (60 loc) · 1.91 KB
/
run_hmmsearch_parallel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
'''
'''
import glob
import os
import multiprocessing
import time
def timeit(start):
end = time.time()
t = end-start
if t < 60:
print('{:.2f} seconds elapsed'.format(t))
elif t < 3600:
print('{:.2f} minutes elapsed'.format(t/60))
else:
print('{:.2f} hours elapsed'.format(t/3600))
return end
#
#
# summaries = glob.glob("*.summary")
# n_seqs = list()
# for file in summaries:
# # reading the file and discarding the "#" starting sequences returns this kind of line:
# # 1 CL_1001 39 338 275 0.80 0.589
# lines = open(file).readlines()
# for line in lines:
# if not line.startswith("#") and not line.startswith("\n"):
# split = line.split(" ")
# while "" in split:
# split.remove("")
#
# # split is now like this:
# # ['1', 'CL_485', '30', '108', '90', '0.99', '0.635', '\n']
# n_seqs.append([f"{split[1]}.hmm", int(split[2])])
#
# order = sorted(n_seqs, key=lambda CL: int(CL[1]), reverse=True)
# print(order[:10])
##
hmms = sorted(glob.glob("*.hmm"))
# hmms_to_run = list()
#
# for CL in order:
# # In case we want to run only .hmm with a certain number of sequences
# #if CL[0] in hmms and CL[1] > 20:
# if CL[0] in hmms:
# hmms_to_run.append(CL[0])
#
# print(hmms_to_run[:10])
def run_hmm_hmmsearch(hmm_file):
outdir = "../0_hmmsearch_raw"
start = time.time()
CL_id = os.path.basename(hmm_file).split(".hmm")[0]
os.system(f"hmmsearch -E 0.01 --domE 0.01 --incE 0.01 --cpu 5 --noali --notextw -o {outdir}/{CL_id}.hmmsearch --domtblout {outdir}/{CL_id}.domtblout {hmm_file} ../nr")
print(CL_id)
timeit(start)
print()
print(f"{len(hmms)} profiles will be analyzed")
pool = multiprocessing.Pool(processes=14)
shared_content = pool.map(run_hmm_hmmsearch,hmms ) #hmms_to_run
pool.close()
pool.join()