-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathfilter_insertion.pl
executable file
·130 lines (111 loc) · 2.57 KB
/
filter_insertion.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/perl
use warnings; use strict;
use Getopt::Std;
my $help = "$0
-i : insertion list in bed format
-l : postion list of te homo in ref genome
-n : in the form /t=3/TS=1/TE=1/ , the minimum requried:
t:total reads supporting insertion /3/
CS:clipped reads cover TE start site /0/
CE:clipped reads cover TE end site /0/
cs:cross reads cover TE start /0/
ce:cross reads cover TE end /0/
TS:total reads cover TE start /1/
TE:total reads cover TE end /1/
-q : degault <1>, the minimum required average mapping quality
-d : default <2,200>, the reads depth range
-b : the treshhold of NB tag default : 100;
-h : help message
";
die $help unless ( @ARGV);
my %opt;
getopts("i:l:b:c:n:q:d:r:h",\%opt);
die $help if($opt{h});
######## parameters ###########
my $nb = $opt{b}?$opt{b}:100;
my $ins_file = $opt{i};
my $lst = $opt{l};
my $sr = $opt{n}? $opt{n} : '/t=3/TS=1/TE=1/';
my %paras = parse_sr($sr);
my $MQ = defined($opt{q})?$opt{q}:1;
my ($DP_L,$DP_H) = $opt{d}?(split ',',$opt{d}):(split ',',"2,200");
open INS, "$ins_file" or die $!;
## all homo in hash %homos
my %homos;
if($lst){
open LST, $lst or die $!;
while(<LST>){
chomp;
my($chr,$s,$e,$te) = split /\t/;
$s = $s - $nb;
$e = $e + $nb;
foreach my $i ($s..$e){
$homos{$te}{$chr}{$i} = 1;
}
}
}
while(<INS>){
my $boo = 1;
my($chr,$s,$e,$t,$rest) = (split /\t/, $_,5);
my@tags = split /;/, $t;
### parse the rest key and values ###
#####################################
my %other;
foreach my $r (@tags){
my($k,$v) = split /=/,$r;
$other{$k} = $v;
}
# filter support reads number
if(exists $other{SR}){
my($cf,$tot,$r1,$r2,$r3,$r4) = split /,/,$other{SR};
if($tot < $paras{t} or $r1 < $paras{CS} or $r2 < $paras{CE} or $r3 < $paras{cs} or $r4 < $paras{ce}
or ($r1+$r3) < $paras{TS} or ($r2+$r4) < $paras{TE}){
$boo = 0;
}
}
# filter eveage mapping valeu
if(exists $other{MQ} and $other{MQ} < $MQ){
$boo = 0;
}
# filter bg depth
if(exists $other{DP} ){
my $dp = $other{DP};
if ($dp < $DP_L or $dp > $DP_H){
$boo = 0;
}
}
# mark ins near known site
if ($lst){
my $near = 0 ;
for my $i ($s..$e){
if ($i ~~ %{$homos{$other{NM}}{$chr}}){
$near = 1;
}
}
if($near){
$t .= ";NB=Y";
}else{
$t .= ";NB=N";
}
}
print join "\t", ($chr,$s,$e,$t,$rest) if $boo;
}
sub parse_sr{
my $p = shift @_;
my %paras = (
t => 0,
CS => 0,
CE => 0,
cs => 0,
ce => 0,
TS => 0,
TE => 0,
);
my @ps = split /\//,$p;
foreach my $t (@ps){
next unless($t);
my($k,$v) = split /=/,$t;
$paras{$k} = $v;
}
return %paras;
}