Skip to content

Commit

Permalink
feat: multiorganism support (#172)
Browse files Browse the repository at this point in the history
  • Loading branch information
mkatsanto authored Feb 21, 2024
1 parent abe1de2 commit ac95347
Show file tree
Hide file tree
Showing 18 changed files with 1,340 additions and 994 deletions.
260 changes: 130 additions & 130 deletions tests/test_integration_workflow/expected_output.files

Large diffs are not rendered by default.

224 changes: 112 additions & 112 deletions tests/test_integration_workflow/expected_output.md5

Large diffs are not rendered by default.

228 changes: 114 additions & 114 deletions tests/test_integration_workflow/expected_output_temp_flag.files

Large diffs are not rendered by default.

180 changes: 90 additions & 90 deletions tests/test_integration_workflow/expected_output_temp_flag.md5

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions tests/test_integration_workflow/test.local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,16 @@ snakemake \
--report="snakemake_report.html"

# Check md5 sum of some output files
find results/ -type f -name \*\.gz -exec gunzip '{}' \;
find results/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \;
find results/homo_sapiens/ -type f -name \*\.gz -exec gunzip '{}' \;
find results/homo_sapiens/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \;
md5sum --check "expected_output.md5"

# Checksum file generated with
#find results/ \
#find results/homo_sapiens/ \
# -type f \
# -name \*\.gz \
# -exec gunzip '{}' \;
#find results/ \
#find results/homo_sapiens/ \
# -type f \
# -name \*\.zip \
# -exec sh -c 'unzip -o {} -d $(dirname {})' \;
Expand All @@ -60,15 +60,15 @@ md5sum --check "expected_output.md5"
echo "Verifying STAR output"
result=$(bedtools intersect -F 1 -v -bed \
-a ../input_files/synthetic.mate_1.bed \
-b results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/map_genome/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.Aligned.sortedByCoord.out.bam \
-b results/homo_sapiens/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/map_genome/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.Aligned.sortedByCoord.out.bam \
| wc -l)
if [ $result != "0" ]; then
echo "Alignments for mate 1 reads are not consistent with ground truth"
exit 1
fi
result=$(bedtools intersect -F 1 -v -bed \
-a <(cat ../input_files/synthetic.mate_1.bed ../input_files/synthetic.mate_2.bed) \
-b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.Aligned.sortedByCoord.out.bam \
-b results/homo_sapiens/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.Aligned.sortedByCoord.out.bam \
| wc -l)
if [ $result != "0" ]; then
echo "Alignments for mate 1 reads are not consistent with ground truth"
Expand All @@ -78,8 +78,8 @@ fi
# Check whether Salmon assigns reads to expected genes
echo "Verifying Salmon output"
diff \
<(cat results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \
<(cat results/homo_sapiens/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \
<(cat ../input_files/synthetic.mate_1.bed | cut -f7 | sort | uniq -c | sort -k2nr | awk '{printf($2"\t"$1"\n")}')
diff \
<(cat results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \
<(cat ../input_files/synthetic.mate_1.bed | cut -f7 | sort | uniq -c | sort -k2nr | awk '{printf($2"\t"$1"\n")}')
<(cat results/homo_sapiens/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \
<(cat ../input_files/synthetic.mate_1.bed | cut -f7 | sort | uniq -c | sort -k2nr | awk '{printf($2"\t"$1"\n")}')
16 changes: 8 additions & 8 deletions tests/test_integration_workflow/test.slurm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,16 @@ snakemake \
--report="snakemake_report.html"

# Check md5 sum of some output files
find results/ -type f -name \*\.gz -exec gunzip '{}' \;
find results/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \;
find results/homo_sapiens/ -type f -name \*\.gz -exec gunzip '{}' \;
find results/homo_sapiens/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \;
md5sum --check "expected_output.md5"

# Checksum file generated with
# find results/ \
# find results/homo_sapiens/ \
# -type f \
# -name \*\.gz \
# -exec gunzip '{}' \;
# find results/ \
# find results/homo_sapiens/ \
# -type f \
# -name \*\.zip \
# -exec sh -c 'unzip -o {} -d $(dirname {})' \;
Expand All @@ -60,15 +60,15 @@ md5sum --check "expected_output.md5"
echo "Verifying STAR output"
result=$(bedtools intersect -F 1 -v -bed \
-a ../input_files/synthetic.mate_1.bed \
-b results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/map_genome/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.Aligned.sortedByCoord.out.bam \
-b results/homo_sapiens/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/map_genome/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.Aligned.sortedByCoord.out.bam \
| wc -l)
if [ $result != "0" ]; then
echo "Alignments for mate 1 reads are not consistent with ground truth"
exit 1
fi
result=$(bedtools intersect -F 1 -v -bed \
-a <(cat ../input_files/synthetic.mate_1.bed ../input_files/synthetic.mate_2.bed) \
-b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.Aligned.sortedByCoord.out.bam \
-b results/homo_sapiens/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.Aligned.sortedByCoord.out.bam \
| wc -l)
if [ $result != "0" ]; then
echo "Alignments for mate 1 reads are not consistent with ground truth"
Expand All @@ -78,8 +78,8 @@ fi
# Check whether Salmon assigns reads to expected genes
echo "Verifying Salmon output"
diff \
<(cat results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \
<(cat results/homo_sapiens/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \
<(cat ../input_files/synthetic.mate_1.bed | cut -f7 | sort | uniq -c | sort -k2nr | awk '{printf($2"\t"$1"\n")}')
diff \
<(cat results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \
<(cat results/homo_sapiens/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \
<(cat ../input_files/synthetic.mate_1.bed | cut -f7 | sort | uniq -c | sort -k2nr | awk '{printf($2"\t"$1"\n")}')
12 changes: 6 additions & 6 deletions tests/test_integration_workflow/test.temp.flag.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ snakemake \
--report="snakemake_report.html"

# Check md5 sum of some output files
find results/ -type f -name \*\.gz -exec gunzip '{}' \;
find results/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \;
find results/homo_sapiens/ -type f -name \*\.gz -exec gunzip '{}' \;
find results/homo_sapiens/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \;
md5sum --check "expected_output_temp_flag.md5"

# Check whether STAR produces expected alignments
Expand All @@ -48,15 +48,15 @@ md5sum --check "expected_output_temp_flag.md5"
echo "Verifying STAR output"
result=$(bedtools intersect -F 1 -v -bed \
-a ../input_files/synthetic.mate_1.bed \
-b results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/map_genome/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.Aligned.sortedByCoord.out.bam \
-b results/homo_sapiens/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/map_genome/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.Aligned.sortedByCoord.out.bam \
| wc -l)
if [ $result != "0" ]; then
echo "Alignments for mate 1 reads are not consistent with ground truth"
exit 1
fi
result=$(bedtools intersect -F 1 -v -bed \
-a <(cat ../input_files/synthetic.mate_1.bed ../input_files/synthetic.mate_2.bed) \
-b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.Aligned.sortedByCoord.out.bam \
-b results/homo_sapiens/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.Aligned.sortedByCoord.out.bam \
| wc -l)
if [ $result != "0" ]; then
echo "Alignments for mate 1 reads are not consistent with ground truth"
Expand All @@ -66,9 +66,9 @@ fi
# Check whether Salmon assigns reads to expected genes
echo "Verifying Salmon output"
diff \
<(cat results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \
<(cat results/homo_sapiens/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \
<(cat ../input_files/synthetic.mate_1.bed | cut -f7 | sort | uniq -c | sort -k2nr | awk '{printf($2"\t"$1"\n")}')
diff \
<(cat results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \
<(cat results/homo_sapiens/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \
<(cat ../input_files/synthetic.mate_1.bed | cut -f7 | sort | uniq -c | sort -k2nr | awk '{printf($2"\t"$1"\n")}')

Loading

0 comments on commit ac95347

Please sign in to comment.