-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvcf2genotype.pl
executable file
·82 lines (66 loc) · 2.03 KB
/
vcf2genotype.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/perl -w
#
# vcf2genotype.pl
#
# Process Individual vcf files into Genotype files
# Edit the Chromosome Name to match the coverage file
# Print a Genotype String for each Individual
#
# September 24, 2013
# Liz Cooper
############################################################
# RUN THIS SCRIPT IN THE DIRECTORY WITH THE VCF FILES!!!!
############################################################
use strict;
my @files = glob("*.vcf");
foreach my $file (@files) {
my $prefix = ((split(/\./, $file))[0]);
my $new = $prefix . "." . "gens";
open (OUT, ">$new") || die "\nUnable to open the file $new!\n";
open (VCF, $file) || die "\nUnable to open the file $file!\n";
while (<VCF>) {
chomp $_;
if ($_ =~ /^\#/) {
next;
}
my ($chrom, $pos, $id, $ref, $alt, $qual, $filter, $info, $format, $string) = split(/\s{1,}/, $_);
# First Edit the chromosome name to have only the stacks tag# , followed by .pos
$chrom =~ s/\|\*\|[0-9]{1,}//;
$chrom .= "." . $pos;
# Get the Genotype String based on the DP4 field
# Change Indel Genotypes to 0 and 1
my @info_fields = split(/\;/, $info);
my $dp4;
my @bases = ();
my $num_ref = 0;
my $num_alt = 0;
my $gen_string = '';
# Figure out which field has the read depths
foreach my $if (@info_fields) {
if ($if =~ /^DP4/){
$dp4 = $if;
}
}
if ($info =~ /^INDEL/) {
#$dp4 = $info_fields[4];
$dp4 =~ s/^DP4\=//;
@bases = split(/,/, $dp4);
$num_ref = $bases[0] + $bases[1];
$num_alt = $bases[2] + $bases[3];
$gen_string = '0' x $num_ref;
$gen_string .= '1' x $num_alt;
} else {
#$dp4 = $info_fields[3];
$dp4 =~ s/^DP4\=//;
@bases = split(/,/, $dp4);
$num_ref = $bases[0] + $bases[1];
$num_alt = $bases[2] + $bases[3];
$gen_string = $ref x $num_ref;
$gen_string .= $alt x $num_alt;
}
print OUT $chrom, "\t", $ref, "\t", $alt, "\t", $gen_string, "\n";
}
close(VCF);
close(OUT);
}
exit;