-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
aa71e72
commit 07b8a57
Showing
90 changed files
with
2,713 additions
and
307 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/bin/bash | ||
|
||
# before running this script make sure to | ||
# mamba activate nextflow | ||
|
||
# create output directory | ||
mkdir -p results/fetchngs | ||
|
||
# FIX!! | ||
# run the pipeline | ||
nextflow run nf-core/fetchngs \ | ||
-profile singularity \ | ||
--max_memory '16.GB' --max_cpus 8 \ | ||
--input SAMPLES \ | ||
--outdir results/fetchngs \ | ||
--nf_core_pipeline viralrecon |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#!/bin/bash | ||
|
||
# before running this script make sure to | ||
# mamba activate nextflow | ||
|
||
# create output directory | ||
mkdir -p results/bacqc | ||
|
||
# FIX!! | ||
# run the pipeline | ||
nextflow run avantonder/bacQC \ | ||
-r main \ | ||
-resume -profile singularity \ | ||
--max_memory '16.GB' --max_cpus 8 \ | ||
--input FIX_SAMPLESHEET \ | ||
--outdir results/bacqc \ | ||
--kraken2db databases/minikraken2_v1_8GB \ | ||
--brackendb databases/minikraken2_v1_8GB \ | ||
--genome_size FIX_GENOME_SIZE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#!/bin/bash | ||
|
||
# before running this script make sure to | ||
# mamba activate nextflow | ||
|
||
# create output directory | ||
mkdir -p results/bactmap | ||
|
||
# FIX!! | ||
# run the pipeline | ||
nextflow run nf-core/bactmap \ | ||
-resume -profile singularity \ | ||
--max_memory '16.GB' --max_cpus 8 \ | ||
--input FIX_SAMPLESHEET \ | ||
--outdir results/bactmap \ | ||
--reference FIX_REFERENCE_FASTA \ | ||
--genome_size 4.3M |
52 changes: 52 additions & 0 deletions
52
course_files/scripts/M_tuberculosis/04-pseudogenome_check.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#!/bin/bash | ||
|
||
# before running this script make sure to | ||
# mamba activate seqtk | ||
|
||
#### Settings ##### | ||
|
||
# directory with pseudogenome FASTA | ||
|
||
fasta_dir="results/bactmap/pseudogenomes" | ||
|
||
# output directory for results | ||
outdir="results/bactmap/pseudogenomes_check" | ||
|
||
# path to seqtk_parser.py | ||
parser="scripts/seqtk_parser.py" | ||
|
||
#### End of settings #### | ||
|
||
#### Analysis #### | ||
# WARNING: be careful changing the code below | ||
|
||
# exit upon any error | ||
set -e | ||
|
||
# create output directory | ||
mkdir -p $outdir/seqtk | ||
|
||
# rename aligned_pseudogenomes.fas | ||
mv $fasta_dir/aligned_pseudogenomes.fas $fasta_dir/aligned_pseudogenomes.fasta | ||
|
||
# loop through each pseudogenome | ||
for filepath in $fasta_dir/*.fas | ||
do | ||
# get the sample name | ||
sample=$(basename $filepath) | ||
|
||
# print a message | ||
echo "Processing $sample" | ||
|
||
# run seqtk command | ||
seqtk comp $filepath > ${outdir}/seqtk/${sample}.tsv | ||
done | ||
|
||
# run seqtk_parser.py | ||
python $parser --input_dir $outdir/seqtk | ||
|
||
# move mapping_summary.tsv to results/bactmap/pseudogenomes_check | ||
mv mapping_summary.tsv $outdir | ||
|
||
# rename aligned_pseudogenomes.fas | ||
mv $fasta_dir/aligned_pseudogenomes.fasta $fasta_dir/aligned_pseudogenomes.fas |
33 changes: 33 additions & 0 deletions
33
course_files/scripts/M_tuberculosis/05-mask_pseudogenome.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#!/bin/bash | ||
|
||
# before running this script make sure to | ||
# mamba activate remove_blocks | ||
|
||
#### Settings ##### | ||
|
||
# directory with pseudogenome FASTA | ||
|
||
fasta_dir="results/bactmap/pseudogenomes" | ||
|
||
# output directory for results | ||
outdir="results/bactmap/masked_alignment" | ||
|
||
# path to bed file with masking co-ordinates | ||
bed="resources/masking/MTBC0_Goigetal_regions_toDiscard.bed" | ||
|
||
#### End of settings #### | ||
|
||
#### Analysis #### | ||
# WARNING: be careful changing the code below | ||
|
||
# exit upon any error | ||
set -e | ||
|
||
# create output directory | ||
mkdir -p $outdir | ||
|
||
# copy pseudogenome alignment to output directory | ||
cp $fasta_dir/aligned_pseudogenomes.fas $outdir | ||
|
||
# mask alignment with co-ordinates in bed file | ||
remove_blocks_from_aln.py -a $outdir/aligned_pseudogenomes.fas -t $bed -o $outdir/aligned_pseudogenomes_masked.fas |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#!/bin/bash | ||
|
||
# before running this script make sure to | ||
# mamba activate iqtree | ||
|
||
# create output directory | ||
mkdir -p results/snp-sites/ | ||
mkdir -p results/iqtree/ | ||
|
||
# FIX!! | ||
# extract variable sites | ||
snp-sites FIX_INPUT_PSEUDOGENOMES_FASTA > results/snp-sites/aligned_pseudogenomes_masked_snps.fas | ||
|
||
# FIX!! | ||
# count invariant sites | ||
snp-sites -C FIX_INPUT_PSEUDOGENOMES_FASTA > results/snp-sites/constant_sites.txt | ||
|
||
# FIX!! | ||
# Run iqtree | ||
iqtree \ | ||
-fconst $(cat results/snp-sites/constant_sites.txt) \ | ||
-s FIX_INPUT_SNP_ALIGNMENT \ | ||
--prefix results/iqtree/Nam_TB \ | ||
-nt AUTO \ | ||
-ntmax 8 \ | ||
-mem 8G \ | ||
-m GTR+F+I \ | ||
-bb 1000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#!/bin/bash | ||
|
||
# before running this script make sure to | ||
# mamba activate tb-profiler | ||
|
||
#### Settings ##### | ||
|
||
# directory with pseudogenome FASTA | ||
|
||
fastq_dir="data/reads" | ||
|
||
# output directory for results | ||
outdir="results/tb-profiler" | ||
|
||
# set prefix for collated results | ||
prefix="Nam_TB" | ||
|
||
#### End of settings #### | ||
|
||
#### Analysis #### | ||
# WARNING: be careful changing the code below | ||
|
||
# create output directory | ||
mkdir -p $outdir | ||
|
||
# loop through each set of fastq files | ||
for filepath in $fastq_dir/*_1.fastq.gz | ||
do | ||
# get the sample name | ||
sample=$(basename ${filepath%_1.fastq.gz}) | ||
|
||
# print a message | ||
echo "Processing $sample" | ||
|
||
# run tb-profiler command | ||
tb-profiler profile -1 $filepath -2 ${filepath%_1.fastq.gz}_2.fastq.gz -p $sample -t 8 --csv -d $outdir 2> $outdir/"$sample".log | ||
|
||
# Check if tb-profiler exited with an error | ||
if [ $? -ne 0 ]; then | ||
echo "tb-profiler failed for $sample. See $sample.log for details." | ||
else | ||
echo "tb-profiler completed successfully for $sample." | ||
fi | ||
done | ||
|
||
# run tb-profiler collate | ||
tb-profiler collate -d $outdir/results --prefix $prefix | ||
|
||
# move collated result to tb-profiler results directory | ||
mv ${prefix}.* $outdir |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/bin/bash | ||
|
||
# before running this script make sure to | ||
# mamba activate pairsnp | ||
|
||
# create output directory | ||
mkdir -p results/transmission/ | ||
|
||
# masked variants file to extract pairwise SNP distances from | ||
snp_file="preprocessed/snp-sites/aligned_pseudogenomes_masked_snps.fas" | ||
|
||
# output file | ||
outfile="results/transmission/aligned_pseudogenomes_masked_snps.csv" | ||
|
||
# Run pairsnp | ||
pairsnp $snp_file -c > $outfile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#!/bin/bash | ||
|
||
# before running this script make sure to | ||
# mamba activate treetime | ||
|
||
# create output directory | ||
mkdir -p results/treetime/ | ||
|
||
# Remove outgroup from alignment | ||
seqkit grep -v -p MTBC0 results/bactmap/masked_alignment/aligned_pseudogenomes_masked.fas > results/treetime/aligned_pseudogenomes_masked_no_outgroups.fas | ||
|
||
# Remove outgroup from rooted tree | ||
python remove_outgroup.py -i Nam_TB_rooted.treefile -g MTBC0 -o Nam_TB_rooted_no_outgroup.treefile | ||
|
||
# Run TreeTime | ||
treetime --tree results/treetime/Nam_TB_rooted_no_outgroup.treefile \ | ||
--dates TB_metadata.tsv \ | ||
--name-column sample \ | ||
--date-column Date.sample.collection \ | ||
--aln results/treetime/aligned_pseudogenomes_masked_no_outgroups.fas \ | ||
--outdir results/treetime \ | ||
--report-ambiguous \ | ||
--time-marginal only-final \ | ||
--clock-std-dev 0.00003 \ | ||
--relax 1.0 0 |
Oops, something went wrong.