Skip to content

Commit

Permalink
Merge pull request #10 from fsantilli/main
Browse files Browse the repository at this point in the history
  • Loading branch information
ftabaro authored May 14, 2024
2 parents 8a18bb6 + 5b454ae commit 929199a
Show file tree
Hide file tree
Showing 118 changed files with 2,746 additions and 687 deletions.
11 changes: 11 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
*.fastq filter=lfs diff=lfs merge=lfs -text
*.fq filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.bam filter=lfs diff=lfs merge=lfs -text
*.bai filter=lfs diff=lfs merge=lfs -text
*.bed filter=lfs diff=lfs merge=lfs -text
*.rds filter=lfs diff=lfs merge=lfs -text
*.gff3 filter=lfs diff=lfs merge=lfs -text
*.gff filter=lfs diff=lfs merge=lfs -text
*.gtf filter=lfs diff=lfs merge=lfs -text
**/salmonTE_quant/* filter=lfs diff=lfs merge=lfs -text
**/salmonTE_test/* filter=lfs diff=lfs merge=lfs -text
**/STAR/* filter=lfs diff=lfs merge=lfs -text
*.html filter=lfs diff=lfs merge=lfs -text
48 changes: 32 additions & 16 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
directory: tests
snakefile: workflow/Snakefile
args: "--configfile tests/config.yaml --lint"
run-workflow-complete:
run-unit-tests:
runs-on: ubuntu-latest
needs:
- linting
Expand All @@ -56,26 +56,42 @@ jobs:
- name: Setup snakemake
run: |
mamba install -c conda-forge -c bioconda snakemake=7.32.4 apptainer=1.2.5
- name: List tests
- name: Pytest
run: |
ls -R tests
- name: Run workflow
pytest .tests/unit
run-integration-tests:
runs-on: ubuntu-latest
needs:
- linting
- formatting
steps:
- name: Checkout repository with submodules
uses: actions/checkout@v3
with:
submodules: recursive
lfs: true
- name: Setup Python 3.11
uses: actions/setup-python@v2
with:
python-version: 3.11
- name: Add Conda to system path
run: |
ROOT=$(realpath .)
snakemake \
--directory tests \
--configfile tests/config.yaml \
--profile tests/profile \
--snakefile workflow/Snakefile \
--singularity-args="--bind $ROOT --bind $HOME"
- name: Test reporting
echo $CONDA/bin >> $GITHUB_PATH
- name: Install dependencies
run: |
conda install -c conda-forge mamba
- name: Setup snakemake
run: |
mamba install -c conda-forge -c bioconda snakemake=7.32.4 apptainer=1.2.5
- name: Run workflow
run: |
ROOT=$(realpath .)
snakemake \
--directory tests \
--configfile tests/config.yaml \
--profile tests/profile \
--directory .tests/integration \
--configfile .tests/integration/config.yaml \
--profile .tests/integration/profile \
--snakefile workflow/Snakefile \
--report report.zip \
--all-temp \
--singularity-args="--bind $ROOT --bind $HOME"
17 changes: 13 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,17 @@
**/workdir
profile/slurm
tests4/
tests/.wget-hsts
tests/*.log
tests/*.out
**/.wget-hsts
**/*.log
**/*.out
run-test.sh
test.sh
test.sh
**/__pycache__
.pytest_cache/
.tests/unit-temp
deseq2_emergency_dump.rda
mydata/
profile/slurm-francesco/
report.zip
snakemake_run
snakemake_test
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
13 changes: 9 additions & 4 deletions tests/config.yaml → .tests/integration/config.yaml
Original file line number Diff line number Diff line change
@@ -1,27 +1,29 @@
sequencing_libraries:
- name: GSE130735-subset
protocol: pe
# "sample_sheet" is a csv mapping FILENAMES without extensions to GENOTYPE
# E.g.
# Name,Filename,Genotype
# s01,some_complicate_filename_s01,WT
# s02,some_complicate_filename_s02,WT
# s03,some_complicate_filename_s03,KO
# s04,some_complicate_filename_s04,KO
sample_sheet: sample-sheet.csv
# s04,some_complicate_filename_s04,KO
sample_sheet: resources/sample-sheet.csv
trimmomatic: "ILLUMINACLIP:$CONDA_PREFIX/share/trimmomatic/adapters/TruSeq3-PE.fa:1:15:10 SLIDINGWINDOW:8:22 MAXINFO:20:0.6 LEADING:22 TRAILING:20 MINLEN:50"
star: "--seedSearchStartLmax 30 --outFilterMismatchNoverReadLmax 0.04 --winAnchorMultimapNmax 40"
bamCoverage: "--binSize 10 --normalizeUsing None"
deseq2:
test: Wald
variable: genotype
reference_level: WT


disable_TE_analysis: false
disable_tRNA_analysis: false

globals:
# <reads_folder>/<library-name> is the expected pattern
reads_folder: .
reads_folder: .tests/integration

results_folder: results/
qc_folder: results/qc
Expand All @@ -42,7 +44,10 @@ genome:

# URL to GtRNAdb files
gtrnadb_url: http://gtrnadb.ucsc.edu/genomes/eukaryota/Mmusc10/mm10-tRNAs.tar.gz
#gtrnadb_bed: ../../../data/references/tRNA/mm10-tRNAs.bed

# for testing purpose, focus on chr19 only
selected_chromosomes:
- chr19

deseq2:
working_directory: .
Expand Down
9 changes: 9 additions & 0 deletions .tests/integration/profile/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# use-conda: True
# use-apptainer: True
software-deployment-method:
- conda
- apptainer
show-failed-logs: True
cores: 2
conda-cleanup-pkgs: cache
# all-temp: True
7 changes: 7 additions & 0 deletions .tests/integration/sample-sheet-original.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name,filename_1,filename_2,genotype
SRX5795112_SRR9016958,SRX5795112_SRR9016958_1,SRX5795112_SRR9016958_2,WT
SRX5795113_SRR9016959,SRX5795113_SRR9016959_1,SRX5795113_SRR9016959_2,WT
SRX5795114_SRR9016960,SRX5795114_SRR9016960_1,SRX5795114_SRR9016960_2,WT
SRX5795117_SRR9016963,SRX5795117_SRR9016963_1,SRX5795117_SRR9016963_2,KO
SRX5795118_SRR9016964,SRX5795118_SRR9016964_1,SRX5795118_SRR9016964_2,KO
SRX5795119_SRR9016965,SRX5795119_SRR9016965_1,SRX5795119_SRR9016965_2,KO
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
name,filename_1,filename_2,genotype
SRX5795112_SRR9016958,SRX5795112_SRR9016958_1,SRX5795112_SRR9016958_2,WT
SRX5795113_SRR9016959,SRX5795113_SRR9016959_1,SRX5795113_SRR9016959_2,WT
SRX5795117_SRR9016963,SRX5795117_SRR9016963_1,SRX5795117_SRR9016963_2,KO
SRX5795118_SRR9016964,SRX5795118_SRR9016964_1,SRX5795118_SRR9016964_2,KO
48 changes: 48 additions & 0 deletions .tests/unit/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""
Common code for unit testing of rules generated with Snakemake 8.9.0.
"""

from pathlib import Path
import subprocess as sp
import os


class OutputChecker:
def __init__(self, data_path, expected_path, workdir):
self.data_path = data_path
self.expected_path = expected_path
self.workdir = workdir

def check(self):
input_files = set(
(Path(path) / f).relative_to(self.data_path)
for path, subdirs, files in os.walk(self.data_path)
for f in files
)
expected_files = set(
(Path(path) / f).relative_to(self.expected_path)
for path, subdirs, files in os.walk(self.expected_path)
for f in files
)
unexpected_files = set()
for path, subdirs, files in os.walk(self.workdir):
for f in files:
f = (Path(path) / f).relative_to(self.workdir)
if str(f).startswith(".snakemake"):
continue
if f in expected_files:
self.compare_files(self.workdir / f, self.expected_path / f)
elif f in input_files:
# ignore input files
pass
else:
unexpected_files.add(f)
if unexpected_files:
raise ValueError(
"Unexpected files:\n{}".format(
"\n".join(sorted(map(str, unexpected_files)))
)
)

def compare_files(self, generated_file, expected_file):
sp.check_output(["cmp", generated_file, expected_file])
5 changes: 5 additions & 0 deletions .tests/unit/deseq2/data/resources/sample-sheet.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
name,filename_1,filename_2,genotype
SRX5795112_SRR9016958,SRX5795112_SRR9016958_1,SRX5795112_SRR9016958_2,WT
SRX5795113_SRR9016959,SRX5795113_SRR9016959_1,SRX5795113_SRR9016959_2,WT
SRX5795117_SRR9016963,SRX5795117_SRR9016963_1,SRX5795117_SRR9016963_2,KO
SRX5795118_SRR9016964,SRX5795118_SRR9016964_1,SRX5795118_SRR9016964_2,KO
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
name,filename_1,filename_2,genotype
SRX5795112_SRR9016958,SRX5795112_SRR9016958_1,SRX5795112_SRR9016958_2,WT
SRX5795113_SRR9016959,SRX5795113_SRR9016959_1,SRX5795113_SRR9016959_2,WT
SRX5795117_SRR9016963,SRX5795117_SRR9016963_1,SRX5795117_SRR9016963_2,KO
SRX5795118_SRR9016964,SRX5795118_SRR9016964_1,SRX5795118_SRR9016964_2,KO
5 changes: 5 additions & 0 deletions .tests/unit/deseq2_tRNA/data/resources/sample-sheet.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
name,filename_1,filename_2,genotype
SRX5795112_SRR9016958,SRX5795112_SRR9016958_1,SRX5795112_SRR9016958_2,WT
SRX5795113_SRR9016959,SRX5795113_SRR9016959_1,SRX5795113_SRR9016959_2,WT
SRX5795117_SRR9016963,SRX5795117_SRR9016963_1,SRX5795117_SRR9016963_2,KO
SRX5795118_SRR9016964,SRX5795118_SRR9016964_1,SRX5795118_SRR9016964_2,KO
Empty file.
Empty file.
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
name,filename_1,filename_2,genotype
SRX5795112_SRR9016958,SRX5795112_SRR9016958_1,SRX5795112_SRR9016958_2,WT
SRX5795113_SRR9016959,SRX5795113_SRR9016959_1,SRX5795113_SRR9016959_2,WT
SRX5795117_SRR9016963,SRX5795117_SRR9016963_1,SRX5795117_SRR9016963_2,KO
SRX5795118_SRR9016964,SRX5795118_SRR9016964_1,SRX5795118_SRR9016964_2,KO
5 changes: 5 additions & 0 deletions .tests/unit/fastqc_markdup/data/resources/sample-sheet.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
name,filename_1,filename_2,genotype
SRX5795112_SRR9016958,SRX5795112_SRR9016958_1,SRX5795112_SRR9016958_2,WT
SRX5795113_SRR9016959,SRX5795113_SRR9016959_1,SRX5795113_SRR9016959_2,WT
SRX5795117_SRR9016963,SRX5795117_SRR9016963_1,SRX5795117_SRR9016963_2,KO
SRX5795118_SRR9016964,SRX5795118_SRR9016964_1,SRX5795118_SRR9016964_2,KO
Git LFS file not shown
Git LFS file not shown
5 changes: 5 additions & 0 deletions .tests/unit/multiqc_markdup/data/resources/sample-sheet.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
name,filename_1,filename_2,genotype
SRX5795112_SRR9016958,SRX5795112_SRR9016958_1,SRX5795112_SRR9016958_2,WT
SRX5795113_SRR9016959,SRX5795113_SRR9016959_1,SRX5795113_SRR9016959_2,WT
SRX5795117_SRR9016963,SRX5795117_SRR9016963_1,SRX5795117_SRR9016963_2,KO
SRX5795118_SRR9016964,SRX5795118_SRR9016964_1,SRX5795118_SRR9016964_2,KO
5 changes: 5 additions & 0 deletions .tests/unit/multiqc_raw/data/resources/sample-sheet.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
name,filename_1,filename_2,genotype
SRX5795112_SRR9016958,SRX5795112_SRR9016958_1,SRX5795112_SRR9016958_2,WT
SRX5795113_SRR9016959,SRX5795113_SRR9016959_1,SRX5795113_SRR9016959_2,WT
SRX5795117_SRR9016963,SRX5795117_SRR9016963_1,SRX5795117_SRR9016963_2,KO
SRX5795118_SRR9016964,SRX5795118_SRR9016964_1,SRX5795118_SRR9016964_2,KO
5 changes: 5 additions & 0 deletions .tests/unit/multiqc_star/data/resources/sample-sheet.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
name,filename_1,filename_2,genotype
SRX5795112_SRR9016958,SRX5795112_SRR9016958_1,SRX5795112_SRR9016958_2,WT
SRX5795113_SRR9016959,SRX5795113_SRR9016959_1,SRX5795113_SRR9016959_2,WT
SRX5795117_SRR9016963,SRX5795117_SRR9016963_1,SRX5795117_SRR9016963_2,KO
SRX5795118_SRR9016964,SRX5795118_SRR9016964_1,SRX5795118_SRR9016964_2,KO
5 changes: 5 additions & 0 deletions .tests/unit/multiqc_trim/data/resources/sample-sheet.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
name,filename_1,filename_2,genotype
SRX5795112_SRR9016958,SRX5795112_SRR9016958_1,SRX5795112_SRR9016958_2,WT
SRX5795113_SRR9016959,SRX5795113_SRR9016959_1,SRX5795113_SRR9016959_2,WT
SRX5795117_SRR9016963,SRX5795117_SRR9016963_1,SRX5795117_SRR9016963_2,KO
SRX5795118_SRR9016964,SRX5795118_SRR9016964_1,SRX5795118_SRR9016964_2,KO
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
40 changes: 40 additions & 0 deletions .tests/unit/test_build_trna_coverage_matrix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import os
import sys

import subprocess as sp
from tempfile import TemporaryDirectory
import shutil
from pathlib import Path, PurePosixPath

sys.path.insert(0, os.path.dirname(__file__))

import utils


def test_build_trna_coverage_matrix():

cwd = Path().resolve()

with TemporaryDirectory() as tmpdir:
workdir = Path(tmpdir) / "workdir"
data_path = PurePosixPath(".tests/unit/build_trna_coverage_matrix/data")
expected_path = PurePosixPath(".tests/unit/build_trna_coverage_matrix/expected")

# Copy data to the temporary workdir.
shutil.copytree(data_path, workdir)
utils.prepenv(workdir)

# dbg
print("results/tRNA_coverage/GSE130735-subset/tRNA_matrix.txt", file=sys.stderr)

# Run the test job.
cmd = utils.get_cmd(
workdir, "results/tRNA_coverage/GSE130735-subset/tRNA_matrix.txt", cwd
)
sp.check_output(cmd)

# Check the output byte by byte using cmp.
# To modify this behavior, you can inherit from common.OutputChecker in here
# and overwrite the method `compare_files(generated_file, expected_file),
# also see common.py.
utils.OutputChecker(data_path, expected_path, workdir).check()
Loading

0 comments on commit 929199a

Please sign in to comment.