-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAlign.sh
149 lines (123 loc) · 3.62 KB
/
Align.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/bin/bash
# 2022
# This script is for the alignment and pre-processing of WES and targetted NGS data.
echo ''
echo ''
echo ' _______________D_E_L_V_E_______________ '
echo ' / |'
echo ' \ Christoper Simon Trethewey |'
echo ' / ><><><><><><><><><><><><>< |'
echo ' \ Alignment & Pre-processing |'
echo ' / ><><><><><><><><><><><><>< |'
echo ' \ |'
echo ' / version 3.00 |'
echo ' \ |'
echo ' \ |'
echo ' / BWA /0.7.17 |'
echo ' \ Picard/2.6.0 |'
echo ' / Samtools/1.8 |'
echo ' \ GATK/4.1.5.0 |'
echo ' /____________________________A_L_I_G_N__|'
echo ''
echo ''
SAMPLELIST=$1
DATADIR=$2
GENOME=$3
if [ $# -eq 0 ]
then
echo ' **undefined sample list and fastq directory**'
echo ''
echo 'exiting'
echo ''
echo ''
echo ''
echo 'usage: Align.sh /path/to/SAMPLELIST.txt /path/to/FQfiles/'
echo ''
echo ''
echo ''
echo ''
echo 'exiting'
exit 1
fi
mkdir ../$DATADIR SAM
mkdir ../$DATADIR TEMP
mkdir ../$DATADIR RPT
SAMDIR=$PWD/SAM
IDs=$(cat $SAMPLELIST)
echo "#####################################################"
echo ""
echo "PATIENTIDS specified in SAMPLELIST:"
echo ""
echo "$IDs"
echo ""
echo "#####################################################"
for ID in $IDs
do
echo""
echo '(1) aligning'
echo $ID
echo 'to UCSC-hg19'
echo ''
bwa mem -M -t 32 /Delve/RESOURCE/$GENOME/*.fa \
$DATADIR/${ID}_1.fq.gz $DATADIR/${ID}_2.fq.gz > $SAMDIR/$ID.sam;
echo "(2) sorting and indexing..."
echo $ID
echo''
samtools view -@ 16 -bS $SAMDIR/$ID.sam | \
samtools sort -@ 16 -o $PWD/TEMP/${ID}_SI.bam
samtools index -@ 16 $PWD/TEMP/${ID}_SI.bam
echo "(3) removing duplicate reads..."
echo $ID
echo''
java -Xms128m -Xmx1024m -jar /Delve/programs/picard/2.6.0/picard.jar \
MarkDuplicates \
I=$PWD/TEMP/${ID}_SI.bam \
O=$PWD/TEMP/${ID}_dD.bam \
M=$PWD/RPT/${ID}_Picard_MarkDuplicates_metrics.txt \
CREATE_INDEX=true \
TMP_DIR=$PWD/TEMP/${ID}_TMP
echo "(4) sorting and indexing..."
echo $ID
echo''
samtools sort -@ 16 $PWD/TEMP/${ID}_dD.bam -o $PWD/TEMP/${ID}_dD_SI.bam
samtools index -@ 16 $PWD/TEMP/${ID}_dD_SI.bam
echo "(5) updating read groups for sample"
echo $ID
echo''
java -Xms128m -Xmx1024m -jar /Delve/programs/picard/2.6.0/picard.jar \
AddOrReplaceReadGroups \
I=$PWD/TEMP/${ID}_dD_SI.bam \
O=$PWD/TEMP/${ID}_dD-RG.bam \
RGID=$ID \
RGLB=$ID \
RGPL=ILLUMINA \
RGPU=Hiseq \
RGSM=$ID
echo "(6) sorting and indexing..."
echo $ID
echo''
samtools sort -@ 16 $PWD/TEMP/${ID}_dD-RG.bam -o $PWD/TEMP/${ID}_dD-RG_SI.bam
samtools index -@ 16 $PWD/TEMP/${ID}_dD-RG_SI.bam
echo "(7) generating BQSR table for sample"
echo $ID
echo''
gatk BaseRecalibrator \
-I $PWD/TEMP/${ID}_dD-RG_SI.bam \
--known-sites /Delve/RESOURCE/$GENOME/*dbsnp_138.sites_sorted.vcf \
-O $PWD/TEMP/${ID}_BQSR.table \
-R /Delve/RESOURCE/$GENOME/*.fa
echo "(8) applying BQSR to sample"
echo $ID
echo''
gatk ApplyBQSR \
-bqsr $PWD/TEMP/${ID}_BQSR.table \
-I $PWD/TEMP/${ID}_dD-RG_SI.bam \
-O $PWD/TEMP/${ID}_dD-RG-BQSR.bam
echo "(9) final sort and index for sample"
echo $ID
echo''
samtools sort -@ 16 $PWD/TEMP/${ID}_dD-RG-BQSR.bam -o $PWD/TEMP/${ID}_dD-RG-BQSR_FINAL.bam
samtools index -@ 16 $PWD/TEMP/${ID}_dD-RG-BQSR_FINAL.bam
mv $PWD/TEMP/${ID}_dD-RG-BQSR_FINAL.bam $PWD/${ID}_dD-RG-BQSR_FINAL.bam
mv $PWD/TEMP/${ID}_dD-RG-BQSR_FINAL.bam.bai $PWD/${ID}_dD-RG-BQSR_FINAL.bam.bai
done