-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathmarkercsv2linkage.pl
executable file
·85 lines (72 loc) · 2.08 KB
/
markercsv2linkage.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/perl
# csv2merlin.pl -- Copy genotype data and pedigree information
# to create MERLIN-compatible QTDT files
# Author: David Eccles (gringer), 2013 <bioinformatics@gringer.org>
use warnings;
use strict;
use Text::CSV;
sub usage {
print("usage: ./csv2merlin.pl <PED file> <genotype file> [options]\n");
print("\nConvert CSV format to MERLIN-QTDT\n");
print("\nOther Options:\n");
print("-help : Only display this help message\n");
print("\n");
}
my @files = ();
my $pedFileName = "";
my $gtFileName = "";
# extract command line arguments
while(@ARGV){
my $argument = shift @ARGV;
if(-f $argument){ # file existence check
if(!$pedFileName){
$pedFileName = $argument;
#printf(STDERR "Setting PED file name to '%s'\n", $pedFileName);
} elsif (!$gtFileName) {
$gtFileName = $argument;
#printf(STDERR "Setting genotype file ".
# "name to '%s'\n", $gtFileName);
} else {
print(STDERR "Error: only two files can be specified\n");
usage();
exit(1);
}
} else {
if($argument eq "-help"){
usage();
exit(0);
}
}
}
if(!$pedFileName || !$gtFileName){
print(STDERR "Error: *two* files must be specified, cannot continue\n");
usage();
exit(1);
}
my $csv = Text::CSV->new ( { binary => 1 } ) # should set binary attribute.
or die "Cannot use CSV: ".Text::CSV->error_diag ();
my $maxGTCount = 0;
my %gtLines = ();
open(my $gtFile, "<", $gtFileName) or die("Cannot open genotype file");
while(my $row = $csv->getline($gtFile)){
my @F = @{$row};
my $id = shift(@F);
if(scalar(@F) > $maxGTCount){
$maxGTCount = scalar(@F);
}
map{$_ = ($_)?$_:"0/0"} @F;
$gtLines{$id} = join(" ",@F)."\n";
}
close($gtFile);
my $blankLine = " 0/0" x $maxGTCount . "\n";
open(my $pedFile, "<", $pedFileName) or die("Cannot open PED file");
while(<$pedFile>){
chomp;
print $_." ";
my @F = split(/\s+/, $_, 3);
if($gtLines{$F[1]}){
print($gtLines{$F[1]});
} else {
print($blankLine);
}
}