-
Notifications
You must be signed in to change notification settings - Fork 24
/
runDigiNorm_pe.sh
executable file
·120 lines (104 loc) · 3.17 KB
/
runDigiNorm_pe.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/bin/bash
# runs digital normalization for the reads using khmer
# configured to run only on paied-end data
# run it as:
# runDigiNorm_pe.sh <read1.fq> <read2.fq>
if [ $# -lt 1 ] ; then
echo "usage: runDigiNorm_pe.sh <read1.fq> <read2.fq>"
echo ""
echo "runs digital normalization for the paired-end reads"
echo "paired-end reads can either be gzipped or uncompressed"
echo ""
exit 0
fi
################################
##### CHANGE SETTINGS HERE #####
################################
ksize=32 #max kmer is 32
numHashes=4
cutoff=20
memory=100e9 #for using 100G
###############################
##### SETTINGS END HERE #######
###############################
module load trimmomatic
module load python
module load fastx-toolkit/0.0.14
KHMER=$(dirname $(which normalize-by-median.py))
R1="$1"
R2="$2"
name=$(basename ${R1} |cut -f 1-3 -d "_")
#trim
echo "running trimmomatic on ${name}";
java -jar $TRIMMOMATIC_HOME/trimmomatic.jar PE \
${R1} ${R2} \
${name}_s1_pe ${name}_s1_se ${name}_s2_pe ${name}_s2_se \
ILLUMINACLIP:${TRIMMOMATIC_HOME}/adapters/TruSeq3-PE.fa:2:30:10 \
LEADING:3 \
TRAILING:3 \
SLIDINGWINDOW:4:15 \
MINLEN:36 || {
echo >&2 trimming failed for $FILE
exit 1
}
cat ${name}_s1_se ${name}_s2_se | gzip -9c > ${name}.se.fq.gz
#interleave
echo "interleaving reads for ${name}";
python ${KHMER}/interleave-reads.py \
--output ${name}_interleaved.fq \
${name}_s1_pe ${name}_s2_pe || {
echo >&2 interleaving failed for $FILE
exit 1
}
#quality filter
echo "running quality filtering for ${name}";
fastq_quality_filter \
-Q33 \
-q 30 \
-p 50 \
-i ${name}_interleaved.fq \
-o ${name}_interleaved_qc.fq || {
echo >&2 filtering failed for $FILE
exit 1
}
#normalize
echo "normalizing reads for ${name}";
python ${KHMER}/normalize-by-median.py \
--ksize $ksize \
--n_tables $numHashes \
--cutoff $cutoff \
--max-memory-usage $memory \
--report ${name}.report \
--out ${name}_C${cutoff}_normalized.fq \
${name}_interleaved_qc.fq || {
echo >&2 normalizing failed for $FILE
exit 1
}
#extract paired reads
echo "extracting paired reads for ${name}";
python ${KHMER}/extract-paired-reads.py \
--output-paired ${name}_C${cutoff}_normalized.fq.pe \
--output-single ${name}_C${cutoff}_normalized.fq.se \
${name}_C${cutoff}_normalized.fq || {
echo >&2 extaction failed for $FILE
exit 1
}
#split reads
echo "splitting paired reads for ${name}";
python ${KHMER}/split-paired-reads.py \
--output-first ${name}_normalized_C${cutoff}_R1.fq \
--output-second ${name}_normalized_C${cutoff}_R2.fq \
${name}_C${cutoff}_normalized.fq.pe || {
echo >&2 splitting failed for $FILE
exit 1
}
#cleanup
mkdir -p normalized_C${cutoff};
mv ${name}_normalized_C${cutoff}_R[12].fq normalized__C${cutoff}/
mkdir -p khmer_intermediary_files/{report_files,fastq_files,se_files}
mv ${name}.report khmer_intermediary_files/report_files/
mv ${name}_s[12]_[ps]e khmer_intermediary_files/fastq_files/
mv ${name}_C20_normalized.fq* khmer_intermediary_files/fastq_files/
mv ${name}.se.fq.gz khmer_intermediary_files/se_files/
mv ${name}_interleaved_qc.fq ${name}_interleaved.fq khmer_intermediary_files/fastq_files/
echo "all done!";