-
Notifications
You must be signed in to change notification settings - Fork 2
/
combineNCBIGenome.py
executable file
·68 lines (47 loc) · 1.79 KB
/
combineNCBIGenome.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import sys, re
Usage = """
combineGenome - Combine chromosomes from NCBI and convert the
NC_ code to chrX code
=============================================================
\x1b[1mUSAGE:\x1b[0m
%s chr1.fasta chr2.fasta chr3.fasta... > outFile.fa
\x1b[1mDATE:\x1b[0m
2018-12-23
\x1b[1mAUTHOR:\x1b[0m
Li Pan
""" % (sys.argv[0], )
def main():
if len(sys.argv)<=1 or sys.argv[0] in ("--help", "-h"):
print(Usage)
exit(-1)
inFiles = sys.argv[1:]
for infile in inFiles:
sys.stderr.writelines("Process "+infile+"..."+"\n")
for line in open(infile):
if line[0] == '>':
data = line[1:].rstrip().split('|')
annotation = data[-1]
chrMatch = re.findall("chromosome (\\w+)", annotation)
NC_code = ""
for i in range(len(data)):
if data[i] == "ref":
NC_code = data[i+1]
if 'unplaced' in line or 'unlocalized' in line:
head = ">%s\n" % (NC_code, )
elif 'mitocho' in line:
data = line.split("|")
head = ">chrM %s\n" % (NC_code, )
elif chrMatch:
chr_id = chrMatch[0]
head = ">chr%s %s\n" % (chr_id, NC_code)
elif '|' in line:
head = ">%s\n" % (NC_code, )
else:
pass
sys.stdout.writelines(head)
else:
sys.stdout.writelines(line)
if __name__ == "__main__":
main()