-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_ring_ref.pl
executable file
·104 lines (88 loc) · 3.07 KB
/
create_ring_ref.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/perl
use strict;
use utf8;
use Getopt::Long;
my $bam_file = '';
my $gtf_file = '';
my $output_file = '';
my $samtools = "samtools";
my $genome= '';
my $help = '';
my $taglen = 30;
my $result = GetOptions("gtf=s" => \$gtf_file,
"out=s" => \$output_file,
"sam=s" => \$samtools,
"help" => \$help,
"ref=s"=> \$genome,
"taglen=i"=>\$taglen,
);
if ($help){print "\n[-g|--gtf] - input GTF file\n[-o|--out] - output FASTA file\n[-r|--ref] Reference genome sequence in FASTA format\n[-h|--help] - print this help\n[-s|--sam] samtools binary you use\n[-t|--taglen] integer parameter that describes tag length (default = 30)\n\n";exit(0)}
open my $gtf, '<', "$gtf_file" or die "GTF File doesn't exist!";
sub reverse_read {my @read = reverse (split ('', $_[0]));
foreach (@read){if ($_ eq 'A') {$_ = 'T'}
elsif ($_ eq 'T') {$_ = 'A'}
elsif ($_ eq 'G') {$_ = 'C'}
elsif ($_ eq 'C') {$_ = 'G'}
else {}
}
return join ('',@read)
}
#variables
##
my $tag;
my $previous_tag;
my ($chr, $start,$end, $chain,$tag);
my %tags;
my $i;
while (my $line = <$gtf>){
if ($line =~ /^[^#]/){
$line =~/^(\S+)\s+\S+\s+exon\s+(\d+)\s+(\d+)\s+\S+\s+([+-]).*transcript_id\s+"([^"]+)";/;
#base parameters
$chr = $1;
$start = $2;
$end = $3;
$chain = $4;
#$tag = $start."-".$end.".".$chain;
$tag = $5;
$tags{"$tag"}{'chain'} = $chain if $tags{"$tag"}{'chain'} eq '';
my $j = 1;
my $exon_exists = 1;
while ($exon_exists){
if ($tags{"$tag"}{$j} =~/[AGTCagtc]/){$j++;
next}
else {$exon_exists = 0;
my @string = split ("\n",`$samtools faidx $genome ${chr}:${start}-${end}`);shift @string;
$tags{"$tag"}{$j} = join('',@string);
}
}
}
}
#write output
my @transcript_ids = keys %tags;
open my $output, ">","$output_file";
foreach my $id (@transcript_ids){
my $exon_number = scalar(keys %{$tags{$id}}) - 1;
if ($tags{$id}{'chain'} eq '+'){
for (my $i =$exon_number ; $i > 0;$i--){#>/>=
my $start = substr $tags{$id}{$i}, -$taglen;
for (my $j = $i ; $j>0;$j--){
my $end = substr $tags{$id}{$j},0,$taglen;
print $output ">$id"."_exon"."$i"."_exon"."$j\n";
print $output $start.$end."\n";
}
}
}
elsif ($tags{$id}{'chain'} eq '-'){
for (my $i =1 ; $i <= $exon_number;$i++){ #1/0
my $start = reverse_read(substr $tags{$id}{$i}, 0,$taglen);
my $number_start = $exon_number -$i+1;
for (my $j = $i;$j<=$exon_number;$j++){
my $end =reverse_read(substr $tags{$id}{$j},-$taglen);
my $number_end = $exon_number - $j + 1;
print $output ">$id"."_exon"."$number_start"."_exon"."$number_end\n";
print $output $start.$end."\n";
}
}
}
else {}
}