diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 4bcb559a..ed49905d 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -82,6 +82,12 @@ If you wish to contribute a new step, please use the following coding standards: 9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. 10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. +### Things to consider regarding displaying results for a new tool + +- If a MultiQC module exist for the tool, use the standard settings for it to start with. +- If no Multiqc module exists, the results of the tool should be made available in the results directory. +- If a tool doesn’t produce output files, the stdout should be channeled into a output file that can be accessible from the outdir of the pipeline. + ### Default values Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml new file mode 100644 index 00000000..233897ed --- /dev/null +++ b/.github/workflows/nf-test.yml @@ -0,0 +1,24 @@ +name: nf-test + +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Nextflow latest-edge + uses: nf-core/setup-nextflow@v2 + with: + version: "latest-edge" + + - name: Install nf-test + run: | + wget -qO- https://get.nf-test.com | bash + sudo mv nf-test /usr/local/bin/ + + - name: Run Tests + run: nf-test test --ci tests diff --git a/.gitignore b/.gitignore index a42ce016..9e307203 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ testing/ testing* *.pyc null/ +.nf-test +.nf-test.log diff --git a/.nf-core.yml b/.nf-core.yml index fb856008..3765b89d 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,5 +1,7 @@ bump_version: null -lint: null +lint: + files_unchanged: + - .github/CONTRIBUTING.md nf_core_version: 3.0.2 org_path: null repository_type: pipeline diff --git a/CHANGELOG.md b/CHANGELOG.md index 58a53ef4..a4b5e286 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,15 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c ### `Added` +- [#20](https://github.com/nf-core/seqinspector/pull/20) Use tags to generate group reports +- [#13](https://github.com/nf-core/seqinspector/pull/13) Generate reports per run, per project and per lane. +- [#49](https://github.com/nf-core/seqinspector/pull/49) Merge with template 3.0.2. +- [#56](https://github.com/nf-core/seqinspector/pull/56) Added SeqFu stats module. +- [#50](https://github.com/nf-core/seqinspector/pull/50) Add an optional subsampling step. +- [#51](https://github.com/nf-core/seqinspector/pull/51) Add nf-test to CI. +- [#63](https://github.com/nf-core/seqinspector/pull/63) Contribution guidelines added about displaying results for new tools +- [#53](https://github.com/nf-core/seqinspector/pull/53) Add FastQ-Screen database multiplexing and limit scope of nf-test in CI. + ### `Fixed` ### `Dependencies` diff --git a/CITATIONS.md b/CITATIONS.md index ecbfb16e..208cfa1e 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -14,10 +14,20 @@ > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. +- [SeqFu](https://telatin.github.io/seqfu2/) + +> Telatin A, Fariselli P, Birolo G. SeqFu: A Suite of Utilities for the Robust and Reproducible Manipulation of Sequence Files. Bioengineering 2021, 8, 59. doi.org/10.3390/bioengineering8050059 + +- [FastQ Screen](https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/) + +> Wingett SW and Andrews S. FastQ Screen: A tool for multi-genome mapping and quality control [version 2; referees: 4 approved]. F1000Research 2018, 7:1338 (https://doi.org/10.12688/f1000research.15931.2) + - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. +- [Seqtk](https://github.com/lh3/seqtk) + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/README.md b/README.md index 2b5e2c85..6cf36dcc 100644 --- a/README.md +++ b/README.md @@ -31,34 +31,28 @@ workflows use the "tube map" design for that. See https://nf-co.re/docs/contributing/design_guidelines#examples for examples. --> -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +1. Subsample reads ([`Seqtk`](https://github.com/lh3/seqtk)) +2. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +3. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) ## Usage > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - - Now, you can run the pipeline using: - - ```bash nextflow run nf-core/seqinspector \ -profile \ @@ -79,11 +73,11 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/seqinspector was originally written by Adrien Coulier. +nf-core/seqinspector was originally written by the Swedish [@NationalGenomicsInfrastructure](https://github.com/NationalGenomicsInfrastructure/). We thank the following people for their extensive assistance in the development of this pipeline: - +- [@mahesh-panchal](https://github.com/mahesh-panchal) ## Contributions and Support diff --git a/assets/example_fastq_screen_references.csv b/assets/example_fastq_screen_references.csv new file mode 100644 index 00000000..59f0cdf6 --- /dev/null +++ b/assets/example_fastq_screen_references.csv @@ -0,0 +1,4 @@ +name,dir,basename,aligner +Ecoli,s3://ngi-igenomes/igenomes/Escherichia_coli_K_12_MG1655/NCBI/2001-10-15/Sequence/Bowtie2Index/,genome,bowtie2 +PhiX,s3://ngi-igenomes/igenomes/PhiX/Illumina/RTA/Sequence/Bowtie2Index/,genome,bowtie2 +Scerevisiae,s3://ngi-igenomes/igenomes/Saccharomyces_cerevisiae/NCBI/build3.1/Sequence/Bowtie2Index/,genome,bowtie2 diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 5f653ab7..ba2542dd 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,7 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, +sample,fastq_1,fastq_2,rundir,tags +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane1 +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A2_S2_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A2_S2_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane1 +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A3_S3_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A3_S3_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane2 +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group1 +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group2 +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group3 diff --git a/assets/schema_fastq_screen_references.json b/assets/schema_fastq_screen_references.json new file mode 100644 index 00000000..9a938d99 --- /dev/null +++ b/assets/schema_fastq_screen_references.json @@ -0,0 +1,35 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/seqinspector/master/assets/schema_fastq_screen_references.json", + "title": "nf-core/seqinspector pipeline - params.fastq_screen_references schema", + "description": "Schema for the file provided with params.fastq_screen_references", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "The reference name as referred to by FastQ Screen." + }, + "dir": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+$", + "errorMessage": "Path to the dir containing the aligner reference and index. Can be remote." + }, + "basename": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "The shared basename of the reference and index files contained in the dir." + }, + "aligner": { + "type": "string", + "enum": ["bowtie", "bowtie2", "bwa", "minimap2"], + "errorMessage": "Specify the aligner to use for the mapping. Valid arguments are 'bowtie', bowtie2' (default), 'bwa' or 'minimap2'." + } + }, + "required": ["name", "dir", "basename", "aligner"] + } +} diff --git a/assets/schema_input.json b/assets/schema_input.json index d7d48374..97ec6177 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -11,7 +11,7 @@ "type": "string", "pattern": "^\\S+$", "errorMessage": "Sample name must be provided and cannot contain spaces", - "meta": ["id"] + "meta": ["sample"] }, "fastq_1": { "type": "string", @@ -26,8 +26,24 @@ "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "rundir": { + "type": "string", + "format": "path", + "exists": true, + "errorMessage": "Run directory must be a path", + "meta": ["rundir"] + }, + "tags": { + "type": "string", + "pattern": "^([A-Za-z0-9_-]+:)*([A-Za-z0-9_-]+)$", + "errorMessage": "Tags must be separated by colons and only consist of lowercase letters, numbers, underscores and hyphens.", + "meta": ["tags"] } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "fastq_1"], + "dependentRequired": { + "fastq_2": ["fastq_1"] + } } } diff --git a/conf/modules.config b/conf/modules.config index d266a387..4a653ed0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,16 +18,54 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] + withName: SEQTK_SAMPLE { + ext.args = '-s100' + } + withName: FASTQC { ext.args = '--quiet' } - withName: 'MULTIQC' { + + withName: 'SEQFU_STATS' { + ext.args = '' + publishDir = [ + path: { "${params.outdir}/seqfu_stats" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'MULTIQC_GLOBAL' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ - path: { "${params.outdir}/multiqc" }, + path: { "${params.outdir}/multiqc/global_report" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: 'MULTIQC_PER_TAG' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc/group_reports" }, + mode: params.publish_dir_mode, + saveAs: { + filename -> + switch (filename) { + case 'versions.yml': + null + break + case ~/\[TAG:.+\]_multiqc_(report\.html|plots|data)/: + def tag = (filename =~ /\[TAG:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] + def new_filename = filename.replaceFirst( + "(?.*)\\[TAG:${tag}\\]_(?multiqc_(report\\.html|plots|data).*)", + '${prefix}${suffix}') + "${tag}/${new_filename}" + break + default: + filename + } + } + ] + } } diff --git a/conf/test.config b/conf/test.config index 1838ae52..0ebc405a 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,7 +25,8 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/NovaSeq6000/samplesheet.csv' + fastq_screen_references = "${projectDir}/assets/example_fastq_screen_references.csv" // Genome references genome = 'R64-1-1' diff --git a/docs/output.md b/docs/output.md index e8665e59..dc26159f 100644 --- a/docs/output.md +++ b/docs/output.md @@ -6,16 +6,29 @@ This document describes the output produced by the pipeline. Most of the plots a The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: +- [Seqtk](#seqtk) - Subsample a specific number of reads per sample - [FastQC](#fastqc) - Raw read QC +- [SeqFu Stats](#seqfu_stats) - Statistics for FASTA or FASTQ files +- [Fastqscreen](#fastqscreen) - mapping against a set of references for basic contamination QC - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +### Seqtk + +
+Output files + +- `seqtk/` + - `*_fastq`: FastQ file after being subsampled to the sample_size value. + +
+ +[Seqtk](https://github.com/lh3/seqtk) samples sequences by number. + ### FastQC
@@ -29,15 +42,69 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). +### FASTQSCREEN + +
+Output files + +- `fastqscreen/` + - `*_screen.html`: Interactive graphical fastqscreen report which summaries the mapping of your sequences against each of your libraries. + - `*_screen.pdf`: Static graphical fastqscreen report which summaries the mapping of your sequences against each of your libraries. + - `*_screen.txt` : text based fastqscreen report which summaries the mapping of your sequences against each of your libraries. + +
+ +[Fastqscreen](https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/) allows you to set up a standard set of libraries against which all of your sequences can be searched. Your search libraries might contain the genomes of all of the organisms you work on, along with PhiX, Vectors or other contaminants commonly seen in sequencing experiments. + +It requires a `.csv` detailing: + +- the working name of the reference +- the name of the aligner used to generate its index (which is also the aligner and index used by the tool) +- the file basename of the reference and its index (e.g. the reference `genoma.fa` and its index `genome.bt2` have the basename `genome`) +- the path to a dir where the reference and index files both reside. + +See `assets/example_fastq_screen_references.csv` for example. + +The `.csv` is provided as a pipeline parameter `fastq_screen_references`. The `.csv` is used to construct a `FastQ Screen` configuration file within the context of the process work directory in order to properly mount the references. + +### SeqFu Stats + +
+Output files + +- `seqfu/` + - `*.tsv`: Tab-separated file containing quality metrics. + - `*_mqc.txt`: File containing the same quality metrics as the TSV file, ready to be read by MultiQC. + +
+ +[SeqFu](https://telatin.github.io/seqfu2/) is general-purpose program to manipulate and parse information from FASTA/FASTQ files, supporting gzipped input files. Includes functions to interleave and de-interleave FASTQ files, to rename sequences and to count and print statistics on sequence lengths. In this pipeline, the `seqfu stats` module is used to produce general quality metrics statistics. + ### MultiQC +nf-core/seqinspector will generate the following MultiQC reports: + +- one global reports including all the samples listed in the samplesheet +- one group report per unique tag. These reports compile samples that share the same tag. +
Output files - `multiqc/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. + - `global_report` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + - `group_reports` + - `tag1/` + - `multiqc_report.html` + - `multiqc_data/` + - `multiqc_plots/` + - `tag2/` + - `multiqc_report.html` + - `multiqc_data/` + - `multiqc_plots/` + - ...
diff --git a/docs/usage.md b/docs/usage.md index bbc141ef..31ab91ef 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,47 +10,44 @@ ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. ```bash --input '[path to samplesheet file]' ``` -### Multiple runs of the same sample +### Full samplesheet -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: +The following simple run dir structure... -```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz +``` +run_dir +├── sample1_lane1_group1_r1.fq.gz +├── sample2_lane1_group1_r1.fq.gz +├── sample3_lane2_group2_r1.fq.gz +└── sample4_lane2_group3_r1.fq.gz ``` -### Full samplesheet - -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. - -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. +...would be represented in the following samplesheet (shown as .tsv for readability) ```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +sample fastq_1 fastq_2 rundir tags +sample1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir project1:group1 +sample2 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir project1:group1 +sample3 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir project1:group2 +sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir control + ``` | Column | Description | | --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | | `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz" (optional). | +| `rundir` | Path to the runfolder containing extra information about the sequencing run (optional). | +| `tags` | Colon-separated list of tags to group samples in special reports. | -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +Another [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. ## Running the pipeline @@ -96,6 +93,12 @@ genome: 'GRCh37' You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). +Optionally, the `sample_size` parameter allows you to subset a random number of reads to be analysed. Note that it refers to an absolute number. + +```bash +nextflow run nf-core/seqinspector --input ./samplesheet.csv --outdir ./results --sample_size 1000000 -profile docker +``` + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: diff --git a/main.nf b/main.nf index 58e593b1..85027a4b 100644 --- a/main.nf +++ b/main.nf @@ -15,7 +15,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { SEQINSPECTOR } from './workflows/seqinspector' +include { SEQINSPECTOR } from './workflows/seqinspector' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_seqinspector_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_seqinspector_pipeline' include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_seqinspector_pipeline' @@ -29,7 +29,7 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_seqi // TODO nf-core: Remove this line if you don't need a FASTA file // This is an example of how to use getGenomeAttribute() to fetch parameters // from igenomes.config using `--genome` -params.fasta = getGenomeAttribute('fasta') +// params.fasta = getGenomeAttribute('fasta') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -54,7 +54,9 @@ workflow NFCORE_SEQINSPECTOR { samplesheet ) emit: - multiqc_report = SEQINSPECTOR.out.multiqc_report // channel: /path/to/multiqc_report.html + global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html + grouped_reports = SEQINSPECTOR.out.grouped_reports // channel: /path/to/multiqc_report.html + } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -93,7 +95,7 @@ workflow { params.outdir, params.monochrome_logs, params.hook_url, - NFCORE_SEQINSPECTOR.out.multiqc_report + NFCORE_SEQINSPECTOR.out.global_report, ) } diff --git a/modules.json b/modules.json index 8e632d50..b4530ee1 100644 --- a/modules.json +++ b/modules.json @@ -10,10 +10,31 @@ "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, + "fastqe": { + "branch": "master", + "git_sha": "cab0b8fdc4f785810f49423d8141a0773c21eb3b", + "installed_by": ["modules"] + }, + "fastqscreen/fastqscreen": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"], + "patch": "modules/nf-core/fastqscreen/fastqscreen/fastqscreen-fastqscreen.diff" + }, "multiqc": { "branch": "master", "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", "installed_by": ["modules"] + }, + "seqfu/stats": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "seqtk/sample": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] } } }, @@ -21,7 +42,7 @@ "nf-core": { "utils_nextflow_pipeline": { "branch": "master", - "git_sha": "3aa0aec1d52d492fe241919f0c6100ebf0074082", + "git_sha": "56372688d8979092cafbe0c5c3895b491166ca1c", "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { @@ -31,7 +52,7 @@ }, "utils_nfschema_plugin": { "branch": "master", - "git_sha": "bbd5a41f4535a8defafe6080e00ea74c45f4f96c", + "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", "installed_by": ["subworkflows"] } } diff --git a/modules/nf-core/fastqe/environment.yml b/modules/nf-core/fastqe/environment.yml new file mode 100644 index 00000000..0b98f6fc --- /dev/null +++ b/modules/nf-core/fastqe/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::fastqe=0.3.3" diff --git a/modules/nf-core/fastqe/main.nf b/modules/nf-core/fastqe/main.nf new file mode 100644 index 00000000..05f40ccf --- /dev/null +++ b/modules/nf-core/fastqe/main.nf @@ -0,0 +1,48 @@ +process FASTQE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastqe:0.3.3--pyhdfd78af_0': + 'biocontainers/fastqe:0.3.3--pyhdfd78af_0' }" + + input: + tuple val(meta), path(fastq) + + output: + tuple val(meta), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '0.3.3' + """ + fastqe \\ + $args \\ + $fastq \\ + --output ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqe: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '0.3.3' + """ + touch ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqe: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastqe/meta.yml b/modules/nf-core/fastqe/meta.yml new file mode 100644 index 00000000..e34c109e --- /dev/null +++ b/modules/nf-core/fastqe/meta.yml @@ -0,0 +1,50 @@ +name: "fastqe" +description: fastqe is a bioinformatics command line tool that uses emojis to represent and analyze genomic data. +keywords: + - quality control + - fastq + - emoji + - visualization +tools: + - fastqe: + description: "A tool for visualizing FASTQ file quality using emoji" + homepage: "https://github.com/fastqe/fastqe" + documentation: "https://github.com/fastqe/fastqe#readme" + tool_dev_url: "https://github.com/fastqe/fastqe" + doi: "10.21105/joss.02400" + licence: ["MIT"] + identifier: "biotools:fastqe" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - fastq: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + +output: + - tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.tsv": + type: file + description: Text file containing emoji + pattern: "*.tsv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" diff --git a/modules/nf-core/fastqe/tests/main.nf.test b/modules/nf-core/fastqe/tests/main.nf.test new file mode 100644 index 00000000..1fee9ed2 --- /dev/null +++ b/modules/nf-core/fastqe/tests/main.nf.test @@ -0,0 +1,100 @@ +// nf-core modules test fastqe +nextflow_process { + + name "Test Process FASTQE" + script "../main.nf" + process "FASTQE" + + tag "modules" + tag "modules_nfcore" + tag "fastqe" + + test("sarscov2 single-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 paired-end [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/fastqe/tests/main.nf.test.snap b/modules/nf-core/fastqe/tests/main.nf.test.snap new file mode 100644 index 00000000..195bafb0 --- /dev/null +++ b/modules/nf-core/fastqe/tests/main.nf.test.snap @@ -0,0 +1,142 @@ +{ + "sarscov2 single-end [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,dbbf45cfa1fab97d81e8de840958233b" + ], + "tsv": [ + [ + { + "id": "test", + "single_end": true + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,dbbf45cfa1fab97d81e8de840958233b" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:18:03.83777" + }, + "sarscov2 paired-end [fastq]": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,aceceddba6e4b5a968d54a6db46238da" + ] + ], + "1": [ + "versions.yml:md5,dbbf45cfa1fab97d81e8de840958233b" + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,aceceddba6e4b5a968d54a6db46238da" + ] + ], + "versions": [ + "versions.yml:md5,dbbf45cfa1fab97d81e8de840958233b" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:16:30.591658" + }, + "sarscov2 paired-end [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,dbbf45cfa1fab97d81e8de840958233b" + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,dbbf45cfa1fab97d81e8de840958233b" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:18:09.501789" + }, + "sarscov2 single-end [fastq]": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.tsv:md5,16d66ef19b5f2694aaeae9c3727c3a61" + ] + ], + "1": [ + "versions.yml:md5,dbbf45cfa1fab97d81e8de840958233b" + ], + "tsv": [ + [ + { + "id": "test", + "single_end": true + }, + "test.tsv:md5,16d66ef19b5f2694aaeae9c3727c3a61" + ] + ], + "versions": [ + "versions.yml:md5,dbbf45cfa1fab97d81e8de840958233b" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:16:23.822551" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqscreen/fastqscreen/environment.yml b/modules/nf-core/fastqscreen/fastqscreen/environment.yml new file mode 100644 index 00000000..7d4c8922 --- /dev/null +++ b/modules/nf-core/fastqscreen/fastqscreen/environment.yml @@ -0,0 +1,14 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fastq-screen=0.16.0 + - bioconda::perl-gdgraph=1.54 + # - gzip + # - SAMtools + # - bowtie + # - bowtie2 + # - bwa + # - bismark diff --git a/modules/nf-core/fastqscreen/fastqscreen/fastqscreen-fastqscreen.diff b/modules/nf-core/fastqscreen/fastqscreen/fastqscreen-fastqscreen.diff new file mode 100644 index 00000000..2fbd94b3 --- /dev/null +++ b/modules/nf-core/fastqscreen/fastqscreen/fastqscreen-fastqscreen.diff @@ -0,0 +1,97 @@ +Changes in module 'nf-core/fastqscreen/fastqscreen' +Changes in 'fastqscreen/fastqscreen/environment.yml': +--- modules/nf-core/fastqscreen/fastqscreen/environment.yml ++++ modules/nf-core/fastqscreen/fastqscreen/environment.yml +@@ -4,5 +4,11 @@ + - conda-forge + - bioconda + dependencies: +- - "bioconda::fastq-screen=0.15.3" ++ - bioconda::fastq-screen=0.16.0 + - bioconda::perl-gdgraph=1.54 ++ # - gzip ++ # - SAMtools ++ # - bowtie ++ # - bowtie2 ++ # - bwa ++ # - bismark + +Changes in 'fastqscreen/fastqscreen/main.nf': +--- modules/nf-core/fastqscreen/fastqscreen/main.nf ++++ modules/nf-core/fastqscreen/fastqscreen/main.nf +@@ -4,12 +4,12 @@ + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +- 'https://depot.galaxyproject.org/singularity/fastq-screen:0.15.3--pl5321hdfd78af_0': +- 'biocontainers/fastq-screen:0.15.3--pl5321hdfd78af_0'}" ++ 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/fc/fc53eee7ca23c32220a9662fbb63c67769756544b6d74a1ee85cf439ea79a7ee/data' : ++ 'community.wave.seqera.io/library/fastq-screen_perl-gdgraph:5c1786a5d5bc1309'}" + + input: +- tuple val(meta), path(reads) // .fastq files +- path database ++ tuple val(meta), path(reads, arity: '1..2') ++ tuple val(ref_names), path(ref_dirs, name:"ref*"), val(ref_basenames), val(ref_aligners) + + output: + tuple val(meta), path("*.txt") , emit: txt +@@ -24,31 +24,35 @@ + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: "" ++ def config_content = ref_names.withIndex().collect { name, i -> "DATABASE ${name} ./${ref_dirs[i]}/${ref_basenames[i]} ${ref_aligners[i]}" }.join('\n') ++ """ ++ echo '${config_content}' > fastq_screen.conf + +- """ +- fastq_screen --threads ${task.cpus} \\ +- --aligner bowtie2 \\ +- --conf ${database}/fastq_screen.conf \\ ++ fastq_screen \\ ++ --conf fastq_screen.conf \\ ++ --threads ${task.cpus} \\ + $reads \\ +- $args \\ ++ $args + +- cat <<-END_VERSIONS > versions.yml +- "${task.process}": +- fastqscreen: \$(echo \$(fastq_screen --version 2>&1) | sed 's/^.*FastQ Screen v//; s/ .*\$//') +- END_VERSIONS ++ mv *_screen.txt ${prefix}_screen.txt ++ mv *_screen.html ${prefix}_screen.html ++ mv *_screen.png ${prefix}_screen.png ++ ++ fastq_screen_version=\$(fastq_screen --version 2>&1 | sed 's/^.*FastQ Screen v//; s/ .*\$//') ++ echo "\\\"${task.process}\\\":" > versions.yml ++ echo " fastqscreen: \$fastq_screen_version" >> versions.yml + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ +- touch test_1_screen.html +- touch test_1_screen.png +- touch test_1_screen.txt ++ touch ${prefix}_screen.html ++ touch ${prefix}_screen.png ++ touch ${prefix}_screen.txt + +- cat <<-END_VERSIONS > versions.yml +- "${task.process}": +- fastqscreen: \$(echo \$(fastq_screen --version 2>&1) | sed 's/^.*FastQ Screen v//; s/ .*\$//') +- END_VERSIONS ++ fastq_screen_version=\$(fastq_screen --version 2>&1 | sed 's/^.*FastQ Screen v//; s/ .*\$//') ++ echo "\\\"${task.process}\\\":" > versions.yml ++ echo " fastqscreen: \$fastq_screen_version" >> versions.yml + """ + + } + +'modules/nf-core/fastqscreen/fastqscreen/meta.yml' is unchanged +'modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test' is unchanged +'modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap' is unchanged +'modules/nf-core/fastqscreen/fastqscreen/tests/nextflow.config' is unchanged +'modules/nf-core/fastqscreen/fastqscreen/tests/tags.yml' is unchanged +************************************************************ diff --git a/modules/nf-core/fastqscreen/fastqscreen/main.nf b/modules/nf-core/fastqscreen/fastqscreen/main.nf new file mode 100644 index 00000000..2eba8390 --- /dev/null +++ b/modules/nf-core/fastqscreen/fastqscreen/main.nf @@ -0,0 +1,58 @@ +process FASTQSCREEN_FASTQSCREEN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/fc/fc53eee7ca23c32220a9662fbb63c67769756544b6d74a1ee85cf439ea79a7ee/data' : + 'community.wave.seqera.io/library/fastq-screen_perl-gdgraph:5c1786a5d5bc1309'}" + + input: + tuple val(meta), path(reads, arity: '1..2') + tuple val(ref_names), path(ref_dirs, name:"ref*"), val(ref_basenames), val(ref_aligners) + + output: + tuple val(meta), path("*.txt") , emit: txt + tuple val(meta), path("*.png") , emit: png , optional: true + tuple val(meta), path("*.html") , emit: html + tuple val(meta), path("*.fastq.gz"), emit: fastq, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: "" + def config_content = ref_names.withIndex().collect { name, i -> "DATABASE ${name} ./${ref_dirs[i]}/${ref_basenames[i]} ${ref_aligners[i]}" }.join('\n') + """ + echo '${config_content}' > fastq_screen.conf + + fastq_screen \\ + --conf fastq_screen.conf \\ + --threads ${task.cpus} \\ + $reads \\ + $args + + mv *_screen.txt ${prefix}_screen.txt + mv *_screen.html ${prefix}_screen.html + mv *_screen.png ${prefix}_screen.png + + fastq_screen_version=\$(fastq_screen --version 2>&1 | sed 's/^.*FastQ Screen v//; s/ .*\$//') + echo "\\\"${task.process}\\\":" > versions.yml + echo " fastqscreen: \$fastq_screen_version" >> versions.yml + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_screen.html + touch ${prefix}_screen.png + touch ${prefix}_screen.txt + + fastq_screen_version=\$(fastq_screen --version 2>&1 | sed 's/^.*FastQ Screen v//; s/ .*\$//') + echo "\\\"${task.process}\\\":" > versions.yml + echo " fastqscreen: \$fastq_screen_version" >> versions.yml + """ + +} diff --git a/modules/nf-core/fastqscreen/fastqscreen/meta.yml b/modules/nf-core/fastqscreen/fastqscreen/meta.yml new file mode 100644 index 00000000..39c86b4f --- /dev/null +++ b/modules/nf-core/fastqscreen/fastqscreen/meta.yml @@ -0,0 +1,78 @@ +name: fastqscreen_fastqscreen +description: Align reads to multiple reference genomes using fastq-screen +keywords: + - align + - map + - fasta + - fastq + - genome + - reference +tools: + - "fastqscreen": + description: "FastQ Screen allows you to screen a library of sequences in FastQ + format against a set of sequence databases so you can see if the composition + of the library matches with what you expect." + homepage: "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/" + documentation: "https://stevenwingett.github.io/FastQ-Screen/" + tool_dev_url: "https://github.com/StevenWingett/FastQ-Screen/archive/refs/tags/v0.15.3.zip" + doi: "10.5281/zenodo.5838377" + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - - database: + type: directory + description: fastq screen database folder containing config file and index folders + pattern: "FastQ_Screen_Genomes" +output: + - txt: + - meta: + type: map + description: Groovy Map containing sample information + - "*.txt": + type: file + description: TXT file containing alignment statistics + pattern: "*.txt" + - png: + - meta: + type: map + description: Groovy Map containing sample information + - "*.png": + type: file + description: PNG file with graphical representation of alignments + pattern: "*.png" + - html: + - meta: + type: map + description: Groovy Map containing sample information + - "*.html": + type: file + description: HTML file containing mapping results as a table and graphical representation + pattern: "*.html" + - fastq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastq.gz": + type: file + description: FastQ file containing reads that did not align to any database (optional) + pattern: "*.fastq.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@snesic" + - "@JPejovicApis" diff --git a/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test new file mode 100644 index 00000000..71230a22 --- /dev/null +++ b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test @@ -0,0 +1,117 @@ +nextflow_process { + + name "Test Process FASTQSCREEN_FASTQSCREEN" + script "../main.nf" + process "FASTQSCREEN_FASTQSCREEN" + + tag "modules" + tag "modules_nfcore" + tag "bowtie2/build" + tag "fastqscreen" + tag "fastqscreen/buildfromindex" + tag "fastqscreen/fastqscreen" + tag "buildfromindex" + tag "modules_fastqscreen" + + setup { + + run("BOWTIE2_BUILD") { + script "../../../bowtie2/build/main.nf" + process { + """ + input[0] = Channel.from([ + [[id: "sarscov2"], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)], + [[id: "human"] , file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)] + ]) + """ + } + } + + run("FASTQSCREEN_BUILDFROMINDEX") { + script "../../../fastqscreen/buildfromindex/main.nf" + process { + """ + input[0] = BOWTIE2_BUILD.out.index.map{meta, index -> meta.id}.collect() + input[1] = BOWTIE2_BUILD.out.index.map{meta, index -> index}.collect() + """ + } + } + } + + test("sarscov2 - human") { + + when { + process { + """ + input[0] = [[ id:'test', single_end:true ], + [file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + input[1] = FASTQSCREEN_BUILDFROMINDEX.out.database + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match() }, + { assert file(process.out.txt.get(0).get(1)).exists() }, + { assert file(process.out.png.get(0).get(1)).exists() }, + { assert file(process.out.html.get(0).get(1)).exists() } + ) + } + + } + + test("sarscov2 - human - tags") { + config './nextflow.config' + when { + process { + """ + input[0] = [[ id:'test', single_end:false ], + [file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] + ] + input[1] = FASTQSCREEN_BUILDFROMINDEX.out.database + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.version, + process.out.txt, + process.out.fastq, + path(process.out.html.get(0).get(1)).readLines()[0..10], + path(process.out.png.get(0).get(1)).exists() + ).match() } + ) + } + + } + + test("sarscov2 - human - stub") { + + options "-stub" + when { + process { + """ + input[0] = [[ id:'test', single_end:true ], + [file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + input[1] = FASTQSCREEN_BUILDFROMINDEX.out.database + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap new file mode 100644 index 00000000..2afffdea --- /dev/null +++ b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap @@ -0,0 +1,132 @@ +{ + "sarscov2 - human": { + "content": null, + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-31T05:42:29.972454812" + }, + "sarscov2 - human - tags": { + "content": [ + null, + [ + [ + { + "id": "test", + "single_end": false + }, + "test_1_screen.txt:md5,b0b0ea58bc26ebaa4d573a85e7898f25" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.tagged.fastq.gz:md5,f742b162c43ce28f80b89608d5c47f3d", + "test_1.tagged_filter.fastq.gz:md5,28527a76bb0bb3fce0ee76afe01e90aa" + ] + ] + ], + [ + "", + "", + "", + "", + "", + "", + "\t", + "\tFastQ Screen Processing Report - test_1.fastq.gz", + "\t