-
Notifications
You must be signed in to change notification settings - Fork 0
/
RMDL_pipeline_RE2.sh
70 lines (46 loc) · 1.99 KB
/
RMDL_pipeline_RE2.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
## RepeatModeler pipeline to align genomic hits to the consensus sequences of the satDNAs produced by RepeatExplorer2 (on Galaxy)
## job example for Astrapia rothschildi but it was done for all the species for which there was an assembly available
#!/bin/bash
#SBATCH -J RMDL_pipeline_RE2_astRot
#SBATCH -o RMDL_pipeline_RE2_astRot.output
#SBATCH -e RMDL_pipeline_RE2_astRot.error
#SBATCH --mail-user valentina.peona90@gmail.com
#SBATCH --mail-type=ALL
#SBATCH -t 02:00:00
#SBATCH -A snic2020-5-680
#SBATCH -p core
#SBATCH -n 2
SAMPLE=astRot
DIR=/proj/sllstore2017073/private/Valentina/2021SatCorvides/Intermediate/RE2/RMDL/$SAMPLE
mkdir -p $DIR
cd $DIR
REF=/home/vpeona/sllstore2017073/private/Valentina/2021SatCorvides/Data/Genomes/${SAMPLE}.fasta
CONS=/home/vpeona/sllstore2017073/private/Valentina/2021SatCorvides/Intermediate/RE2/Consensi/${SAMPLE}_RE2_satellite.fasta
ml purge
ml bioinfo-tools blast/2.9.0+ MAFFT/7.310
unset $MAFFT_BINARIES
makeblastdb -in $REF -out $REF -dbtype nucl -parse_seqids
awk '{print$1;}' $CONS > temp
CONS=`basename $CONS`
CONS=${CONS%.*}.clean.fasta
mv temp $CONS
sed -i 's/\//_/g' $CONS
perl /proj/sllstore2017073/private/scripts/repeatModelerPipeline4.pl $REF $REF $CONS
module purge
module load bioinfo-tools T-Coffee/11.00.8cbe486
export CACHE_4_TCOFFEE=/home/vpeona/bin/
rm *emp.out
mkdir final
cd aligned; for i in $(ls *.fa); do name=`ls $i | cut -f1 -d "."`; cat $i | perl -ne 'chomp;s/>\s+/>/;if(/>(\S+)/){$id{$1}++;$id2=$1;}if($id{$id2}==1){print "$_\n"}' >../final/$name.fa; done; cd ../
cd final; for i in $(ls *.fa); do name=`ls $i | cut -f1 -d "."`; t_coffee -other_pg seq_reformat -in $i -action +rm_gap 95 >$name.gaps95.fa; done; cd ../
cd /home/vpeona/sllstore2017073/private/Valentina/2021SatCorvides/Code/RMDL_pipeline_jobs
SAMPLE=astRot
for SUB in dreAlb manKer parBre parLaw parRub ptiInt ptiMag
do
sed "s/${SAMPLE}/${SUB}/g" RMDL_pipeline_RE2_${SAMPLE}.sh > RMDL_pipeline_RE2_${SUB}.sh
done
for JOB in $(ls RMSK*.sh )
do
chmod +x $JOB
sbatch $JOB
done