-
Notifications
You must be signed in to change notification settings - Fork 0
/
getNonHuman.pl
81 lines (69 loc) · 2.16 KB
/
getNonHuman.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/perl -w
#
# Author: Emily Norris, Lavanya Rishishwar
# Creation Date : Jan 5, 2020
# Modified/Updated for Myc_16 runs : April 25, 2020
#
#############################################################
use strict;
use Getopt::Long;
#############################################################
my $usage = "Retrieve Non-human reads based on kraken output.\nUsage instructions:\n$0 -in <input kraken output file name> [-out <output FASTQ files. Default:nonHum-[inputFileName]>] [-help <FLAG. Prints the help>]\n";
my $in = ""; # Input file name
my $out = ""; # Output File
my $help = 0; # Print help
# Get the arguments
my $args = GetOptions ("in=s" => \$in,
"help" => \$help,
"out=s" => \$out);
#############################################################
if($help > 0){
print STDERR $usage;
exit 0;
}
die "Please specify input file name!\n$usage\n" if ($in eq "");
die "Input file doesn't exist!\n$usage\n" if (! -e $in);
$out = $in;
$out =~ s/.output.gz//;
my $r1 = "../rawReads/".$out.".R1.fastq.gz";
my $r2 = "../rawReads/".$out.".R2.fastq.gz";
$out = "nonHum-$out";
#############################################################
my %reads;
open FILE, "gunzip -c $in |" or die "ERROR: Cannot read input file $in: $!\n";
while (<FILE>){
chomp $_;
my @line = split(/\t/,$_);
if ($line[2] !~ /Homo sapiens.*/){
$reads{$line[1]} = 1;
}
}
close FILE;
open R1IN, "gunzip -c $r1 |" or die "ERROR: Cannot read input file $r1: $!\n";
open R1OUT, ">$out.R1.fa" or die "ERROR: Cannot create output file $out.R1.fa: $!\n";
while(<R1IN>){
chomp $_;
my($part1, $part2) = split(/\s+/,$_);
$part1 =~ s/@//;
$part2 =~ s/:.*//;
my $seq = <R1IN>;
print R1OUT ">$part1-$part2\n$seq" if (exists $reads{$part1});
<R1IN>;
<R1IN>;
}
close R1OUT;
close R1IN;
open R2IN, "gunzip -c $r2 |" or die "ERROR: Cannot read input file $r2: $!\n";
open R2OUT, ">$out.R2.fa" or die "ERROR: Cannot create output file $out.R2.fa: $!\n";
while(<R2IN>){
chomp $_;
my($part1, $part2) = split(/\s+/,$_);
$part1 =~ s/@//;
$part2 =~ s/:.*//;
my $seq = <R2IN>;
print R2OUT ">$part1-$part2\n$seq" if (exists $reads{$part1});
<R2IN>;
<R2IN>;
}
close R2OUT;
close R2IN;