Skip to content

Commit

Permalink
Renaming according to order
Browse files Browse the repository at this point in the history
  • Loading branch information
Panagiotis Moulos committed Feb 14, 2022
1 parent 2657737 commit f02ea34
Show file tree
Hide file tree
Showing 12 changed files with 993 additions and 0 deletions.
71 changes: 71 additions & 0 deletions 03-data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/bin/bash

# Setup paths
HOME_PATH=/home/user/analysis
FASTQ_PATH=$HOME_PATH/fastq
mkdir -p $ FASTQ_PATH
cd $CWD

# Download raw data from 1000 genomes project
cd $FASTQ_PATH

# HG00119
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099967/SRR099967_1.fastq.gz
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099967/SRR099967_2.fastq.gz
mv SRR099967_1.fastq.gz HG00119_1.fastq.gz
mv SRR099967_2.fastq.gz HG00119_2.fastq.gz

# HG00133
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099969/SRR099969_1.fastq.gz
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099969/SRR099969_2.fastq.gz
mv SRR099969_1.fastq.gz HG00133_1.fastq.gz
mv SRR099969_2.fastq.gz HG00133_2.fastq.gz

# HG00145
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099957/SRR099957_1.fastq.gz
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099957/SRR099957_2.fastq.gz
mv SRR099957_1.fastq.gz HG00145_1.fastq.gz
mv SRR099957_2.fastq.gz HG00145_2.fastq.gz

# HG00239
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099958/SRR099958_1.fastq.gz
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099958/SRR099958_2.fastq.gz
cd $DATA_PATH

# HG00119
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099967/SRR099967_1.fastq.gz
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099967/SRR099967_2.fastq.gz
mv SRR099967_1.fastq.gz HG00119_1.fastq.gz
mv SRR099967_2.fastq.gz HG00119_2.fastq.gz

# HG00133
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099969/SRR099969_1.fastq.gz
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099969/SRR099969_2.fastq.gz
mv SRR099969_1.fastq.gz HG00133_1.fastq.gz
mv SRR099969_2.fastq.gz HG00133_2.fastq.gz

# HG00145
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099957/SRR099957_1.fastq.gz
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099957/SRR099957_2.fastq.gz
mv SRR099957_1.fastq.gz HG00145_1.fastq.gz
mv SRR099957_2.fastq.gz HG00145_2.fastq.gz

# HG00239
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099958/SRR099958_1.fastq.gz
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099958/SRR099958_2.fastq.gz
mv SRR099958_1.fastq.gz HG00239_1.fastq.gz
mv SRR099958_2.fastq.gz HG00239_2.fastq.gz

# HG00258
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099954/SRR099954_1.fastq.gz
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099954/SRR099954_2.fastq.gz
mv SRR099954_1.fastq.gz HG00258_1.fastq.gz
mv SRR099954_2.fastq.gz HG00258_2.fastq.gz

# HG00265
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099968/SRR099968_1.fastq.gz
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099968/SRR099968_2.fastq.gz
mv SRR099968_1.fastq.gz HG00265_1.fastq.gz
mv SRR099968_1.fastq.gz HG00265_2.fastq.gz

cd $CWD
68 changes: 68 additions & 0 deletions 05-trimgalore.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/bin/bash

HOME_PATH=/home/user/analysis
FASTQ_PATH=$HOME_PATH/fastq
TRIMGALORE_COMMAND=$TRIMGALORE_PATH/trim_galore
CUTADAPT_COMMAND=$CUTADAPT_PATH/cutadapt
TRIMGALORE_OUTPUT=$HOME_PATH/fastq_qual
CORES=4

if [ ! -d $TRIMGALORE_OUTPUT ]
then
mkdir -p $TRIMGALORE_OUTPUT
fi

for FILE in $FASTQ_PATH/*_1.fastq.gz
do
BASE=`basename $FILE | sed s/_1\.fastq\.gz//`
echo "Processing $BASE"
mkdir -p $TRIMGALORE_OUTPUT
F1=$FASTQ_PATH/$BASE"_1.fastq.gz"
F2=$FASTQ_PATH/$BASE"_2.fastq.gz"
$TRIMGALORE_COMMAND \
--quality 30 \
--length 50 \
--output_dir $TRIMGALORE_OUTPUT/$BASE \
--path_to_cutadapt $CUTADAPT_COMMAND \
--cores 4 \
--paired \
--fastqc \
--trim-n $F1 $F2

mv $TRIMGALORE_OUTPUT/$BASE"_1_val_1.fq.gz" \
$TRIMGALORE_OUTPUT/$BASE"_1.fastq.gz"
mv $TRIMGALORE_OUTPUT/$BASE"_2_val_2.fq.gz" \
$TRIMGALORE_OUTPUT/$BASE"_2.fastq.gz"
mv $TRIMGALORE_OUTPUT/$BASE"_1_val_1_fastqc.html" \
$TRIMGALORE_OUTPUT/$BASE"_1_fastqc.html"
mv $TRIMGALORE_OUTPUT/$BASE"_1_val_1_fastqc.zip" \
$TRIMGALORE_OUTPUT/$BASE"_1_fastqc.zip"
mv $TRIMGALORE_OUTPUT/$BASE"_2_val_2_fastqc.html" \
$TRIMGALORE_OUTPUT/$BASE"_2_fastqc.html"
mv $TRIMGALORE_OUTPUT/$BASE"_2_val_2_fastqc.zip" \
$TRIMGALORE_OUTPUT/$BASE"_2_fastqc.zip"
done

## For single-end reads
#for FILE in $FASTQ_PATH/*.fastq.gz
#do
# BASE=`basename $FILE | sed s/\.fastq\.gz//`
# echo "Processing $BASE"
# mkdir -p $TRIMGALORE_OUTPUT
# F=$FASTQ_PATH/$BASE".fastq.gz"
# $TRIMGALORE_COMMAND \
# --quality 30 \
# --length 50 \
# --output_dir $TRIMGALORE_OUTPUT/$BASE \
# --path_to_cutadapt $CUTADAPT_COMMAND \
# --cores 4 \
# --fastqc \
# --trim-n $F
#
# mv $TRIMGALORE_OUTPUT/$BASE"_val.fq.gz" \
# $TRIMGALORE_OUTPUT/$BASE".fastq.gz"
# mv $TRIMGALORE_OUTPUT/$BASE"_val_fastqc.html" \
# $TRIMGALORE_OUTPUT/$BASE"_fastqc.html"
# mv $TRIMGALORE_OUTPUT/$BASE"_val_fastqc.zip" \
# $TRIMGALORE_OUTPUT/$BASE"_fastqc.zip"
#done
7 changes: 7 additions & 0 deletions 06-refindex.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

cd $RESOURCES_PATH/hs37d5
$BWA_PATH/bwa index hs37d5.fa
$SAMTOOLS_PATH/samtools faidx hs37d5.fa
$SAMTOOLS_PATH/samtools dict hs37d5.fa > hs37d5.dict
cd $CWD
39 changes: 39 additions & 0 deletions 07-alignment.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/bin/bash

HOME_PATH=/home/user/analysis
# Change the path below with the quality-controlled data directory
# if trimming performed (see commented line below)
FASTQ_PATH=$HOME_PATH/fastq
#FASTQ_PATH=$HOME_PATH/fastq_qual
BAM_PATH=$HOME_PATH/bam
THREADS=24
BWA_INDEX=$RESOURCES_PATH/hs37d5/hs37d5.fa

if [ -d $BAM_PATH ]
then
mkdir -p $BAM_PATH
fi

for FILE in `ls $FASTQ_PATH/*_1.fastq.gz`
do
BASE=`basename $FILE | sed s/_1\.fastq\.gz//`
F1=$FASTQ_PATH/$BASE"_1.fastq.gz"
F2=$FASTQ_PATH/$BASE"_2.fastq.gz"

RG="@RG\tID:"$BASE"\tSM:"$BASE"\tLB:WES\tPL:ILLUMINA"

$BWA_PATH/bwa mem -t $THREADS -R $RG $BWA_INDEX $F1 $F2 | \
$SAMTOOLS_PATH/samtools view -bS -o $BAM_PATH/$BASE".uns" -
done

## For single-end reads
#for FILE in `ls $FASTQ_PATH/*.fastq.gz`
#do
# BASE=`basename $FILE | sed s/\.fastq\.gz//`
# F=$FASTQ_PATH/$BASE".fastq.gz"
#
# RG="@RG\tID:"$BASE"\tSM:"$BASE"\tLB:WES\tPL:ILLUMINA"
#
# $BWA_PATH/bwa mem -t $THREADS -R $RG $BWA_INDEX $F | \
# $SAMTOOLS_PATH/samtools view -bS -o $BAM_PATH/$BASE".uns" -
#done
86 changes: 86 additions & 0 deletions 09-bamstats.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/bin/bash

CAPTURE_KIT=$HOME_PATH/resources/panel/Agilent_SureSelect_All_Exon_V2.bed
BAM_PATH=$HOME_PATH/bam
REPORT=$HOME_PATH/reports/finalbamstats.txt
mkdir $HOME_PATH/reports

printf "%s\t%s\t%s\t%s\t%s\t%s%s\t%s\t%s\t%s\t%s\t%s\t%s\n" "name" \
"total reads" "total reads pairs" "aligned reads" \
"properly paired aligned pairs" "uniquely aligned reads (q>20)" \
"properly paired uniquely aligned reads" "chimeric reads" \
"reads overlapping targets" "total bases" "aligned bases" \
"uniquely aligned bases" "bases overlapping targets" > $REPORT

for FILE in `ls $BAM_PATH/*_fixmate.bam`
do
SAMPLE=`basename $FILE | sed s/_fixmate\.bam//`
echo "Processing $SAMPLE"

BAM=$BAM_PATH/$SAMPLE".bam"

printf "%s\t" $SAMPLE >> $REPORT

echo " total reads"
printf "%d\t" `$SAMTOOLS_PATH/samtools view -c -F2048 $BAM` >> $REPORT

echo " total read pairs"
printf "%d\t" `$SAMTOOLS_PATH/samtools view -c -F2048 $BAM | awk '{print $1/2}'` \
>> $REPORT

echo " aligned reads"
printf "%d\t" `$SAMTOOLS_PATH/samtools view -c -F2052 $BAM` >> $REPORT

echo " properly paired aligned pairs"
printf "%d\t" `$SAMTOOLS_PATH/samtools view -c -f66 -F2048 $BAM` \
>> $REPORT

echo " uniquely aligned reads (q>20)"
printf "%d\t" `$SAMTOOLS_PATH/samtools view -c -F2052 -q20 $BAM` >> \
$REPORT

echo " properly paired uniquely aligned reads"
printf "%d\t" `$SAMTOOLS_PATH/samtools view -c -f66 -F2048 -q20 $BAM` \
>> $REPORT

echo " chimeric reads"
printf "%d\t" `
$SAMTOOLS_PATH/samtools flagstat $BAM | \
perl -e 'my @in;' \
-e 'while(<>) { chomp $_; push(@in,$_); }' \
-e 'my @tmp = split("\\\+",pop(@in));' \
-e '$tmp[0] =~ s/\s+$//;' \
-e 'print STDOUT $tmp[0];'
` >> $REPORT

echo " reads overlapping targets"
printf "%d\t" `
$BEDTOOLS_PATH/bedtools intersect -a $CAPTURE_KIT -b $BAM -c | \
awk 'BEGIN {tot=0}{tot+=$4} END {print tot}'
` >> $REPORT

echo " total bases"
printf "%d\t" `
$SAMTOOLS_PATH/samtools view $BAM | cut -f10 | \
awk 'BEGIN {tr=0}{tr+=length($0)} END {print tr}'
` >> $REPORT

echo " aligned bases"
printf "%d\t" `
$SAMTOOLS_PATH/samtools view -F2052 $BAM | cut -f10 | \
awk 'BEGIN {tr=0}{tr+=length($0)} END {print tr}'
` >> $REPORT

echo " uniquely aligned bases"
printf "%d\t" `
$SAMTOOLS_PATH/samtools view -F2052 -q20 $BAM | cut -f10 | \
awk 'BEGIN {tr=0}{tr+=length($0)} END {print tr}'
` >> $REPORT

echo " bases overlapping targets"
printf "%d\n" `
$BEDTOOLS_PATH/bedtools coverage -a $CAPTURE_KIT -b $BAM -d | \
awk 'BEGIN {tr=0} {tr+=$5} END {print tr}'
` >> $REPORT

done
35 changes: 35 additions & 0 deletions 10-signal.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash

BAM_PATH=$HOME_PATH/bam
TRACKS_PATH=$HOME_PATH/tracks
GENOME_SIZE=$BEDTOOLS_PATH/../genomes/human.hg19.genome

if [ -d $TRACKS_PATH ]
then
mkdir -p $TRACKS_PATH
fi

for FILE in `ls $BAM_PATH/*_fixmate.bam`
do
SAMPLE=`basename $FILE | sed s/_fixmate\.bam//`
echo "Processing $SAMPLE"
$BEDTOOLS_PATH/bedtools genomecov -bg \
-ibam $BAM_PATH/$SAMPLE/$SAMPLE".bam" | \
grep -vP 'chrU|rand|hap|loc|cox|GL|NC|hs37d5' | \
awk '{print "chr"$1"\t"$2"\t"$3"\t"$4}' | \
sed s/chrMT/chrM/g | \
sort -k1,1 -k2g,2 > $TRACKS_PATH/$SAMPLE".bedGraph" &
done

wait

for FILE in `ls $TRACKS_PATH/*.bedGraph`
do
echo "Processing $FILE"
SAMPLE=`basename $FILE | sed s/\.bedGraph//`
$UCSCTOOLS_PATH/bedGraphToBigWig $FILE $GENOME_SIZE $TRACKS_PATH/$SAMPLE".bigWig" &
done

wait

rm $TRACKS_PATH/*.bedGraph
Loading

0 comments on commit f02ea34

Please sign in to comment.