-
Notifications
You must be signed in to change notification settings - Fork 1
/
clean_vcf.py
executable file
·48 lines (42 loc) · 1.76 KB
/
clean_vcf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#author=Tyler Fording
import argparse
def main(vcf_filepath, out_filepath):
'''
This function takes a vcf file, iterates through each line checking for null calls. If a null call is found,
it skips the line, if the line doesn't have a null call it prints it to a new file.
:param vcf_filepath: PATH to vcf
:param out_filepath: PATH to output file
:return:
'''
fh = open(vcf_filepath, 'r')
fh_out = open(out_filepath, 'w')
counter = 0
for line in fh:
counter += 1
if counter % 100000 == 0:
print counter
if line[0] == '#':
fh_out.write(line)
else:
working_line = line.split('\t')
line2print = working_line[:-9]
for item in working_line[-9:]:
if item[0:3] == './.':
break
else:
line2print.append(item)
if item[-1] == '\n':
readyline = ''
for item in line2print:
if len(readyline) == 0:
readyline = readyline + str(item)
else:
readyline = readyline + '\t' + str(item)
fh_out.write(readyline)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Clean VCF")
parser.add_argument("vcf_filepath", type=str, help="Enter file path to a txt file containing a list of vcf files", nargs='?')
parser.add_argument("out_filepath", type=str, help="Enter file path for output file", nargs='?')
#parser.add_argument("out_put", type=str, help="Enter file name for output", nargs='?')
args = parser.parse_args()
main(args.vcf_filepath, args.out_filepath)