-
Notifications
You must be signed in to change notification settings - Fork 8
/
site_bls.pl
executable file
·57 lines (47 loc) · 1.3 KB
/
site_bls.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/perl
use DBI;
use Bio::AlignIO;
use allfxns;
$home = $ENV{DIR};
$species = shift;
$region = shift;
$klen = shift;
$bin = shift;
$seqpos = shift;
$file = shift;
$code = (split /\./, (split /\//, $file)[-1])[0];
$klen--;
$refspec = "dm6";
if ($bin eq "all"){
open TREE, "<$tree";
$treestring = do { local($/); <TREE> }; #slurp up nh tree file
close TREE;
} else{
$treestring = `tail -1 ../Fig2/$species/$region/trees/bin$bin.REV.mod | tail -c+7`;
}
$mmnewick = parse_tree($treestring);
$in = Bio::AlignIO->new(-file => $file, -format => "fasta");
$aln = $in->next_aln;
$refseq = $aln->get_seq_by_id($refspec)->seq; $refseq =~ tr/-//d;
$len = length($refseq);
$diff = 1;
$i=$seqpos;
$pos = $aln->column_from_residue_number($refspec,$i);
$endpos = $aln->column_from_residue_number($refspec,$i+$klen);
$endloc = $aln->column_from_residue_number($refspec,$len);
$refbase = $aln->get_seq_by_id($refspec)->subseq($pos, $endpos);
$refbase =~ s/[^ACTGN]//g;
(%leaves, %totleaves, $numspec) = ((), (), 0);
foreach $seq ($aln->each_seq){
$spec = $seq->id;
$base = $seq->subseq(max($pos-10,1), min($endpos+10,$endloc));
$base =~ s/[^ACTGN]//g;
if ($base =~ $refbase){
$leaves{$spec} = 1;
$numspec+=1;
$lastspec = $spec;
}
}
$bls = 0;
$bls = sprintf("%.3f", BLS($mmnewick, \%leaves));
print "$bls\t$refbase\n";