-
Notifications
You must be signed in to change notification settings - Fork 0
/
Create_Corrected_AllLRReads.py
executable file
·72 lines (53 loc) · 1.37 KB
/
Create_Corrected_AllLRReads.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#This program adds the uncorrected LR reads (not in Pileup file)
#to the output file of HECIL
import fileinput, os, sys
#USAGE: python Create_Corrected_AllLRReads.py Orig_LR.fa Corrected_LR.fa
ref_file=sys.argv[1]
#num_LRread=int(sys.argv[2])
corrref_file=sys.argv[2]
#num_corrLRread=int(sys.argv[4])
# Open input files
fref=open(ref_file,'r')
fcorrref=open(corrref_file,'r')
# Count the number of sequences in ref_file
num_LRread = 0.0
for line in fref:
num_LRread += 0.5
num_LRread = int(num_LRread)
fref.seek(0)
# Count the number of sequences in corref_file
num_corrLRread = 0.0
for line in fcorrref:
num_corrLRread += 0.5
num_corrLRread = int(num_corrLRread)
fcorrref.seek(0)
out='Corrected_'+ref_file
fout=open(out,'w')
dict_ref={}
# Store all pre-corrected reads
for c in range(int(num_LRread)):
# Check for header
l1=fref.readline()
l2=fref.readline()
l1=l1.rstrip('\n')
l2=l2.rstrip('\n')
dict_ref[l1]=l2
dict_corrref={}
# Store all corrected reads (subset of original list)
for c1 in range(int(num_corrLRread)):
lc1=fcorrref.readline()
lc2=fcorrref.readline()
lc1=lc1.rstrip('\n')
lc2=lc2.rstrip('\n')
dict_corrref[lc1]=lc2
for ref in dict_ref.keys():
if (len(ref)>2):
if ref in dict_corrref.keys():
header=ref+'\n'
read=dict_corrref[ref]+'\n'
else:
header=ref+'\n'
read=dict_ref[ref]+'\n'
fout.write(header)
fout.write(read)
fout.close()