-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfqQualProfiling.pl
executable file
·111 lines (99 loc) · 2.67 KB
/
fqQualProfiling.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env perl
=hey
Author: Shijian Sky Zhang
E-mail: zhangsjsky@pku.edu.cn
=cut
use 5.010;
use warnings;
use strict;
use Getopt::Long;
use File::Basename;
use lib dirname $0;
use pm::common;
sub usage{
my $scriptName = basename $0;
print <<HELP;
Usage: perl $scriptName OPTION read.fq(.gz) >OUTPUT.tsv
If read.fq(.gz) isn't specified, input from STDIN
Option:
-2 --read2 FQ The read2 file
-h --help Print this help information
HELP
}
my $read2;
GetOptions(
'2|read2=s' => \$read2,
'h|help' => sub{usage(); exit}
) || usage();
if(defined $ARGV[0]){
if(`file -L $ARGV[0]` =~ /gzip/){
open IN,"gzip -dc $ARGV[0]|" or die "Can't read file ($ARGV[0]): $!";
}else{
open IN,"$ARGV[0]" or die "Can't read file ($ARGV[0]): $!";
}
}else{
open IN, "-" or die "Can't read file ($ARGV[0]): $!";
}
if(defined $read2){
if(`file -L $read2` =~ /gzip/){
open READ2,"gzip -dc $read2|" or die "Can't read file ($read2): $!";
}else{
open READ2,"$read2" or die "Can't read file ($read2): $!";
}
}
my ($baseCount1, $totalQual1, $baseCount2, $totalQual2);
my (%profile1, %profile2);
while(<IN>){
<IN>;
<IN>;
my $qualLine = <IN>;
chomp $qualLine;
for my $base(split '', $qualLine){
my $qual = ord($base) - 32;
$totalQual1 += $qual;
my $tile = int($qual/10) * 10;
$profile1{$tile}++;
}
$baseCount1 += length $qualLine;
}
my $maxTile = (sort{$b<=>$a}keys %profile1)[0];
if(defined $read2){
while(<READ2>){
<READ2>;
<READ2>;
my $qualLine = <READ2>;
chomp $qualLine;
for my $base(split '', $qualLine){
my $qual = ord($base) - 32;
$totalQual2 += $qual;
my $tile = int($qual/10) * 10;
$profile2{$tile}++;
}
$baseCount2 += length $qualLine;
}
my $tmp = (sort{$b<=>$a}keys %profile2)[0];
$maxTile = $tmp if $tmp > $maxTile;
}
print "Base\t$baseCount1";
if(defined $read2){
say "\t$baseCount2"
}else{
print "\n";
}
my ($cumPercent1, $cumPercent2) = (0, 0);
for(my $tile = $maxTile; $tile >= 0; $tile-=10){
$cumPercent1 += $profile1{$tile}/$baseCount1*100 if defined $profile1{$tile};
print join "\t", ("Q$tile", sprintf "%.2f", $cumPercent1);
if(defined $read2){
$cumPercent2 += $profile2{$tile}/$baseCount2*100 if defined $profile2{$tile};
say join "\t", ("", sprintf "%.2f", $cumPercent2);
}else{
print "\n";
}
}
print join "\t", ("Mean", sprintf "%.2f", $totalQual1/$baseCount1);
if(defined $read2){
say "\t" . sprintf "%.2f", $totalQual2/$baseCount2;
}else{
print "\n";
}