-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Panagiotis Moulos
committed
Feb 14, 2022
1 parent
2657737
commit f02ea34
Showing
12 changed files
with
993 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
#!/bin/bash | ||
|
||
# Setup paths | ||
HOME_PATH=/home/user/analysis | ||
FASTQ_PATH=$HOME_PATH/fastq | ||
mkdir -p $ FASTQ_PATH | ||
cd $CWD | ||
|
||
# Download raw data from 1000 genomes project | ||
cd $FASTQ_PATH | ||
|
||
# HG00119 | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099967/SRR099967_1.fastq.gz | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099967/SRR099967_2.fastq.gz | ||
mv SRR099967_1.fastq.gz HG00119_1.fastq.gz | ||
mv SRR099967_2.fastq.gz HG00119_2.fastq.gz | ||
|
||
# HG00133 | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099969/SRR099969_1.fastq.gz | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099969/SRR099969_2.fastq.gz | ||
mv SRR099969_1.fastq.gz HG00133_1.fastq.gz | ||
mv SRR099969_2.fastq.gz HG00133_2.fastq.gz | ||
|
||
# HG00145 | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099957/SRR099957_1.fastq.gz | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099957/SRR099957_2.fastq.gz | ||
mv SRR099957_1.fastq.gz HG00145_1.fastq.gz | ||
mv SRR099957_2.fastq.gz HG00145_2.fastq.gz | ||
|
||
# HG00239 | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099958/SRR099958_1.fastq.gz | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099958/SRR099958_2.fastq.gz | ||
cd $DATA_PATH | ||
|
||
# HG00119 | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099967/SRR099967_1.fastq.gz | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099967/SRR099967_2.fastq.gz | ||
mv SRR099967_1.fastq.gz HG00119_1.fastq.gz | ||
mv SRR099967_2.fastq.gz HG00119_2.fastq.gz | ||
|
||
# HG00133 | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099969/SRR099969_1.fastq.gz | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099969/SRR099969_2.fastq.gz | ||
mv SRR099969_1.fastq.gz HG00133_1.fastq.gz | ||
mv SRR099969_2.fastq.gz HG00133_2.fastq.gz | ||
|
||
# HG00145 | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099957/SRR099957_1.fastq.gz | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099957/SRR099957_2.fastq.gz | ||
mv SRR099957_1.fastq.gz HG00145_1.fastq.gz | ||
mv SRR099957_2.fastq.gz HG00145_2.fastq.gz | ||
|
||
# HG00239 | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099958/SRR099958_1.fastq.gz | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099958/SRR099958_2.fastq.gz | ||
mv SRR099958_1.fastq.gz HG00239_1.fastq.gz | ||
mv SRR099958_2.fastq.gz HG00239_2.fastq.gz | ||
|
||
# HG00258 | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099954/SRR099954_1.fastq.gz | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099954/SRR099954_2.fastq.gz | ||
mv SRR099954_1.fastq.gz HG00258_1.fastq.gz | ||
mv SRR099954_2.fastq.gz HG00258_2.fastq.gz | ||
|
||
# HG00265 | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099968/SRR099968_1.fastq.gz | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099968/SRR099968_2.fastq.gz | ||
mv SRR099968_1.fastq.gz HG00265_1.fastq.gz | ||
mv SRR099968_1.fastq.gz HG00265_2.fastq.gz | ||
|
||
cd $CWD |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#!/bin/bash | ||
|
||
HOME_PATH=/home/user/analysis | ||
FASTQ_PATH=$HOME_PATH/fastq | ||
TRIMGALORE_COMMAND=$TRIMGALORE_PATH/trim_galore | ||
CUTADAPT_COMMAND=$CUTADAPT_PATH/cutadapt | ||
TRIMGALORE_OUTPUT=$HOME_PATH/fastq_qual | ||
CORES=4 | ||
|
||
if [ ! -d $TRIMGALORE_OUTPUT ] | ||
then | ||
mkdir -p $TRIMGALORE_OUTPUT | ||
fi | ||
|
||
for FILE in $FASTQ_PATH/*_1.fastq.gz | ||
do | ||
BASE=`basename $FILE | sed s/_1\.fastq\.gz//` | ||
echo "Processing $BASE" | ||
mkdir -p $TRIMGALORE_OUTPUT | ||
F1=$FASTQ_PATH/$BASE"_1.fastq.gz" | ||
F2=$FASTQ_PATH/$BASE"_2.fastq.gz" | ||
$TRIMGALORE_COMMAND \ | ||
--quality 30 \ | ||
--length 50 \ | ||
--output_dir $TRIMGALORE_OUTPUT/$BASE \ | ||
--path_to_cutadapt $CUTADAPT_COMMAND \ | ||
--cores 4 \ | ||
--paired \ | ||
--fastqc \ | ||
--trim-n $F1 $F2 | ||
|
||
mv $TRIMGALORE_OUTPUT/$BASE"_1_val_1.fq.gz" \ | ||
$TRIMGALORE_OUTPUT/$BASE"_1.fastq.gz" | ||
mv $TRIMGALORE_OUTPUT/$BASE"_2_val_2.fq.gz" \ | ||
$TRIMGALORE_OUTPUT/$BASE"_2.fastq.gz" | ||
mv $TRIMGALORE_OUTPUT/$BASE"_1_val_1_fastqc.html" \ | ||
$TRIMGALORE_OUTPUT/$BASE"_1_fastqc.html" | ||
mv $TRIMGALORE_OUTPUT/$BASE"_1_val_1_fastqc.zip" \ | ||
$TRIMGALORE_OUTPUT/$BASE"_1_fastqc.zip" | ||
mv $TRIMGALORE_OUTPUT/$BASE"_2_val_2_fastqc.html" \ | ||
$TRIMGALORE_OUTPUT/$BASE"_2_fastqc.html" | ||
mv $TRIMGALORE_OUTPUT/$BASE"_2_val_2_fastqc.zip" \ | ||
$TRIMGALORE_OUTPUT/$BASE"_2_fastqc.zip" | ||
done | ||
|
||
## For single-end reads | ||
#for FILE in $FASTQ_PATH/*.fastq.gz | ||
#do | ||
# BASE=`basename $FILE | sed s/\.fastq\.gz//` | ||
# echo "Processing $BASE" | ||
# mkdir -p $TRIMGALORE_OUTPUT | ||
# F=$FASTQ_PATH/$BASE".fastq.gz" | ||
# $TRIMGALORE_COMMAND \ | ||
# --quality 30 \ | ||
# --length 50 \ | ||
# --output_dir $TRIMGALORE_OUTPUT/$BASE \ | ||
# --path_to_cutadapt $CUTADAPT_COMMAND \ | ||
# --cores 4 \ | ||
# --fastqc \ | ||
# --trim-n $F | ||
# | ||
# mv $TRIMGALORE_OUTPUT/$BASE"_val.fq.gz" \ | ||
# $TRIMGALORE_OUTPUT/$BASE".fastq.gz" | ||
# mv $TRIMGALORE_OUTPUT/$BASE"_val_fastqc.html" \ | ||
# $TRIMGALORE_OUTPUT/$BASE"_fastqc.html" | ||
# mv $TRIMGALORE_OUTPUT/$BASE"_val_fastqc.zip" \ | ||
# $TRIMGALORE_OUTPUT/$BASE"_fastqc.zip" | ||
#done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#!/bin/bash | ||
|
||
cd $RESOURCES_PATH/hs37d5 | ||
$BWA_PATH/bwa index hs37d5.fa | ||
$SAMTOOLS_PATH/samtools faidx hs37d5.fa | ||
$SAMTOOLS_PATH/samtools dict hs37d5.fa > hs37d5.dict | ||
cd $CWD |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
#!/bin/bash | ||
|
||
HOME_PATH=/home/user/analysis | ||
# Change the path below with the quality-controlled data directory | ||
# if trimming performed (see commented line below) | ||
FASTQ_PATH=$HOME_PATH/fastq | ||
#FASTQ_PATH=$HOME_PATH/fastq_qual | ||
BAM_PATH=$HOME_PATH/bam | ||
THREADS=24 | ||
BWA_INDEX=$RESOURCES_PATH/hs37d5/hs37d5.fa | ||
|
||
if [ -d $BAM_PATH ] | ||
then | ||
mkdir -p $BAM_PATH | ||
fi | ||
|
||
for FILE in `ls $FASTQ_PATH/*_1.fastq.gz` | ||
do | ||
BASE=`basename $FILE | sed s/_1\.fastq\.gz//` | ||
F1=$FASTQ_PATH/$BASE"_1.fastq.gz" | ||
F2=$FASTQ_PATH/$BASE"_2.fastq.gz" | ||
|
||
RG="@RG\tID:"$BASE"\tSM:"$BASE"\tLB:WES\tPL:ILLUMINA" | ||
|
||
$BWA_PATH/bwa mem -t $THREADS -R $RG $BWA_INDEX $F1 $F2 | \ | ||
$SAMTOOLS_PATH/samtools view -bS -o $BAM_PATH/$BASE".uns" - | ||
done | ||
|
||
## For single-end reads | ||
#for FILE in `ls $FASTQ_PATH/*.fastq.gz` | ||
#do | ||
# BASE=`basename $FILE | sed s/\.fastq\.gz//` | ||
# F=$FASTQ_PATH/$BASE".fastq.gz" | ||
# | ||
# RG="@RG\tID:"$BASE"\tSM:"$BASE"\tLB:WES\tPL:ILLUMINA" | ||
# | ||
# $BWA_PATH/bwa mem -t $THREADS -R $RG $BWA_INDEX $F | \ | ||
# $SAMTOOLS_PATH/samtools view -bS -o $BAM_PATH/$BASE".uns" - | ||
#done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#!/bin/bash | ||
|
||
CAPTURE_KIT=$HOME_PATH/resources/panel/Agilent_SureSelect_All_Exon_V2.bed | ||
BAM_PATH=$HOME_PATH/bam | ||
REPORT=$HOME_PATH/reports/finalbamstats.txt | ||
mkdir $HOME_PATH/reports | ||
|
||
printf "%s\t%s\t%s\t%s\t%s\t%s%s\t%s\t%s\t%s\t%s\t%s\t%s\n" "name" \ | ||
"total reads" "total reads pairs" "aligned reads" \ | ||
"properly paired aligned pairs" "uniquely aligned reads (q>20)" \ | ||
"properly paired uniquely aligned reads" "chimeric reads" \ | ||
"reads overlapping targets" "total bases" "aligned bases" \ | ||
"uniquely aligned bases" "bases overlapping targets" > $REPORT | ||
|
||
for FILE in `ls $BAM_PATH/*_fixmate.bam` | ||
do | ||
SAMPLE=`basename $FILE | sed s/_fixmate\.bam//` | ||
echo "Processing $SAMPLE" | ||
|
||
BAM=$BAM_PATH/$SAMPLE".bam" | ||
|
||
printf "%s\t" $SAMPLE >> $REPORT | ||
|
||
echo " total reads" | ||
printf "%d\t" `$SAMTOOLS_PATH/samtools view -c -F2048 $BAM` >> $REPORT | ||
|
||
echo " total read pairs" | ||
printf "%d\t" `$SAMTOOLS_PATH/samtools view -c -F2048 $BAM | awk '{print $1/2}'` \ | ||
>> $REPORT | ||
|
||
echo " aligned reads" | ||
printf "%d\t" `$SAMTOOLS_PATH/samtools view -c -F2052 $BAM` >> $REPORT | ||
|
||
echo " properly paired aligned pairs" | ||
printf "%d\t" `$SAMTOOLS_PATH/samtools view -c -f66 -F2048 $BAM` \ | ||
>> $REPORT | ||
|
||
echo " uniquely aligned reads (q>20)" | ||
printf "%d\t" `$SAMTOOLS_PATH/samtools view -c -F2052 -q20 $BAM` >> \ | ||
$REPORT | ||
|
||
echo " properly paired uniquely aligned reads" | ||
printf "%d\t" `$SAMTOOLS_PATH/samtools view -c -f66 -F2048 -q20 $BAM` \ | ||
>> $REPORT | ||
|
||
echo " chimeric reads" | ||
printf "%d\t" ` | ||
$SAMTOOLS_PATH/samtools flagstat $BAM | \ | ||
perl -e 'my @in;' \ | ||
-e 'while(<>) { chomp $_; push(@in,$_); }' \ | ||
-e 'my @tmp = split("\\\+",pop(@in));' \ | ||
-e '$tmp[0] =~ s/\s+$//;' \ | ||
-e 'print STDOUT $tmp[0];' | ||
` >> $REPORT | ||
|
||
echo " reads overlapping targets" | ||
printf "%d\t" ` | ||
$BEDTOOLS_PATH/bedtools intersect -a $CAPTURE_KIT -b $BAM -c | \ | ||
awk 'BEGIN {tot=0}{tot+=$4} END {print tot}' | ||
` >> $REPORT | ||
|
||
echo " total bases" | ||
printf "%d\t" ` | ||
$SAMTOOLS_PATH/samtools view $BAM | cut -f10 | \ | ||
awk 'BEGIN {tr=0}{tr+=length($0)} END {print tr}' | ||
` >> $REPORT | ||
|
||
echo " aligned bases" | ||
printf "%d\t" ` | ||
$SAMTOOLS_PATH/samtools view -F2052 $BAM | cut -f10 | \ | ||
awk 'BEGIN {tr=0}{tr+=length($0)} END {print tr}' | ||
` >> $REPORT | ||
|
||
echo " uniquely aligned bases" | ||
printf "%d\t" ` | ||
$SAMTOOLS_PATH/samtools view -F2052 -q20 $BAM | cut -f10 | \ | ||
awk 'BEGIN {tr=0}{tr+=length($0)} END {print tr}' | ||
` >> $REPORT | ||
|
||
echo " bases overlapping targets" | ||
printf "%d\n" ` | ||
$BEDTOOLS_PATH/bedtools coverage -a $CAPTURE_KIT -b $BAM -d | \ | ||
awk 'BEGIN {tr=0} {tr+=$5} END {print tr}' | ||
` >> $REPORT | ||
|
||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#!/bin/bash | ||
|
||
BAM_PATH=$HOME_PATH/bam | ||
TRACKS_PATH=$HOME_PATH/tracks | ||
GENOME_SIZE=$BEDTOOLS_PATH/../genomes/human.hg19.genome | ||
|
||
if [ -d $TRACKS_PATH ] | ||
then | ||
mkdir -p $TRACKS_PATH | ||
fi | ||
|
||
for FILE in `ls $BAM_PATH/*_fixmate.bam` | ||
do | ||
SAMPLE=`basename $FILE | sed s/_fixmate\.bam//` | ||
echo "Processing $SAMPLE" | ||
$BEDTOOLS_PATH/bedtools genomecov -bg \ | ||
-ibam $BAM_PATH/$SAMPLE/$SAMPLE".bam" | \ | ||
grep -vP 'chrU|rand|hap|loc|cox|GL|NC|hs37d5' | \ | ||
awk '{print "chr"$1"\t"$2"\t"$3"\t"$4}' | \ | ||
sed s/chrMT/chrM/g | \ | ||
sort -k1,1 -k2g,2 > $TRACKS_PATH/$SAMPLE".bedGraph" & | ||
done | ||
|
||
wait | ||
|
||
for FILE in `ls $TRACKS_PATH/*.bedGraph` | ||
do | ||
echo "Processing $FILE" | ||
SAMPLE=`basename $FILE | sed s/\.bedGraph//` | ||
$UCSCTOOLS_PATH/bedGraphToBigWig $FILE $GENOME_SIZE $TRACKS_PATH/$SAMPLE".bigWig" & | ||
done | ||
|
||
wait | ||
|
||
rm $TRACKS_PATH/*.bedGraph |
Oops, something went wrong.