-
Notifications
You must be signed in to change notification settings - Fork 14
/
hitMask.pl
executable file
·69 lines (59 loc) · 1.42 KB
/
hitMask.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env perl
#
#
use warnings;
use strict;
use FAlite;
#mask fasta per base where an alignment event happened
my $in = "-";
open IN, "$in";
my $bitScores = {};
while (my $line = <IN>) {
chomp $line;
my @parts = split /\t/, $line;
my $bitScore = $parts[11];
my $length = $parts[3];
my $origDef = $parts[0];
$origDef =~ s/;coords.*//g;
my $bitPerLength = $bitScore / $length;
if (not exists $bitScores->{$origDef}->{$parts[0]} or $bitScores->{$origDef}->{$parts[0]} < $bitPerLength) {
$bitScores->{$origDef}->{$parts[0]} = $bitPerLength;
}
}
close IN;
open IN, "$ARGV[0]";
my $fasta_file = new FAlite(\*IN); # or any other filehandle
my $seqs = {};
while (my $entry = $fasta_file->nextEntry) {
my $def = $entry->def;
$def =~ s/^>//g;
$seqs->{$def} = $entry->seq;
}
close IN;
open IN, "$ARGV[1]";
$fasta_file = new FAlite(\*IN);
my $lengths = {};
while (my $entry = $fasta_file->nextEntry) {
my $def = $entry->def;
$def =~ s/^>//g;
$lengths->{$def} = length($entry->seq);
}
foreach my $def (keys %$seqs) {
unless (exists $bitScores->{$def}) {
print ">$def\n";
print "$seqs->{$def}\n";
next;
}
my @letters = split //, $seqs->{$def};
foreach my $hit (keys %{$bitScores->{$def}}) {
$hit =~ m/coords=(\d+)/;
my $start = $1;
my $length = $lengths->{$hit};
for (my $i = $start; $i < ($start + $length); $i++) {
$letters[$i] = "N";
}
}
my $outStr = join "", @letters;
print ">$def\n";
print "$outStr\n";
}