diff --git a/.gitignore b/.gitignore index a42ce016..9e307203 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ testing/ testing* *.pyc null/ +.nf-test +.nf-test.log diff --git a/CHANGELOG.md b/CHANGELOG.md index 58a53ef4..b0b12de1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c ### `Added` +- [#20](https://github.com/nf-core/seqinspector/pull/20) Use tags to generate group reports +- [#13](https://github.com/nf-core/seqinspector/pull/13) Generate reports per run, per project and per lane. +- [#49](https://github.com/nf-core/seqinspector/pull/49) Merge with template 3.0.2. + ### `Fixed` ### `Dependencies` diff --git a/README.md b/README.md index 2b5e2c85..f647af08 100644 --- a/README.md +++ b/README.md @@ -39,26 +39,19 @@ > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - - Now, you can run the pipeline using: - - ```bash nextflow run nf-core/seqinspector \ -profile \ @@ -79,11 +72,11 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/seqinspector was originally written by Adrien Coulier. +nf-core/seqinspector was originally written by the Swedish [@NationalGenomicsInfrastructure](https://github.com/NationalGenomicsInfrastructure/). We thank the following people for their extensive assistance in the development of this pipeline: - +- [@mahesh-panchal](https://github.com/mahesh-panchal) ## Contributions and Support diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 5f653ab7..ba2542dd 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,7 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, +sample,fastq_1,fastq_2,rundir,tags +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane1 +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A2_S2_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A2_S2_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane1 +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A3_S3_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A3_S3_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane2 +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group1 +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group2 +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group3 diff --git a/assets/schema_input.json b/assets/schema_input.json index d7d48374..97ec6177 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -11,7 +11,7 @@ "type": "string", "pattern": "^\\S+$", "errorMessage": "Sample name must be provided and cannot contain spaces", - "meta": ["id"] + "meta": ["sample"] }, "fastq_1": { "type": "string", @@ -26,8 +26,24 @@ "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "rundir": { + "type": "string", + "format": "path", + "exists": true, + "errorMessage": "Run directory must be a path", + "meta": ["rundir"] + }, + "tags": { + "type": "string", + "pattern": "^([A-Za-z0-9_-]+:)*([A-Za-z0-9_-]+)$", + "errorMessage": "Tags must be separated by colons and only consist of lowercase letters, numbers, underscores and hyphens.", + "meta": ["tags"] } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "fastq_1"], + "dependentRequired": { + "fastq_2": ["fastq_1"] + } } } diff --git a/conf/modules.config b/conf/modules.config index d266a387..c8838224 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -21,13 +21,38 @@ process { withName: FASTQC { ext.args = '--quiet' } - withName: 'MULTIQC' { + + withName: 'MULTIQC_GLOBAL' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ - path: { "${params.outdir}/multiqc" }, + path: { "${params.outdir}/multiqc/global_report" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: 'MULTIQC_PER_TAG' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc/group_reports" }, + mode: params.publish_dir_mode, + saveAs: { + filename -> + switch (filename) { + case 'versions.yml': + null + break + case ~/\[TAG:.+\]_multiqc_(report\.html|plots|data)/: + def tag = (filename =~ /\[TAG:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] + def new_filename = filename.replaceFirst( + "(?.*)\\[TAG:${tag}\\]_(?multiqc_(report\\.html|plots|data).*)", + '${prefix}${suffix}') + "${tag}/${new_filename}" + break + default: + filename + } + } + ] + } } diff --git a/conf/test.config b/conf/test.config index 1838ae52..76a7ad03 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,7 +25,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/NovaSeq6000/samplesheet.csv' // Genome references genome = 'R64-1-1' diff --git a/docs/output.md b/docs/output.md index e8665e59..e14c3ad6 100644 --- a/docs/output.md +++ b/docs/output.md @@ -6,8 +6,6 @@ This document describes the output produced by the pipeline. Most of the plots a The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: @@ -31,13 +29,29 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d ### MultiQC +nf-core/seqinspector will generate the following MultiQC reports: + +- one global reports including all the samples listed in the samplesheet +- one group report per unique tag. These reports compile samples that share the same tag. +
Output files - `multiqc/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. + - `global_report` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + - `group_reports` + - `tag1/` + - `multiqc_report.html` + - `multiqc_data/` + - `multiqc_plots/` + - `tag2/` + - `multiqc_report.html` + - `multiqc_data/` + - `multiqc_plots/` + - ...
diff --git a/docs/usage.md b/docs/usage.md index bbc141ef..d75e0fbd 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,47 +10,44 @@ ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. ```bash --input '[path to samplesheet file]' ``` -### Multiple runs of the same sample +### Full samplesheet -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: +The following simple run dir structure... -```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz +``` +run_dir +├── sample1_lane1_group1_r1.fq.gz +├── sample2_lane1_group1_r1.fq.gz +├── sample3_lane2_group2_r1.fq.gz +└── sample4_lane2_group3_r1.fq.gz ``` -### Full samplesheet - -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. - -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. +...would be represented in the following samplesheet (shown as .tsv for readability) ```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +sample fastq_1 fastq_2 rundir tags +sample1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir project1:group1 +sample2 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir project1:group1 +sample3 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir project1:group2 +sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir control + ``` | Column | Description | | --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | | `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz" (optional). | +| `rundir` | Path to the runfolder containing extra information about the sequencing run (optional). | +| `tags` | Colon-separated list of tags to group samples in special reports. | -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +Another [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. ## Running the pipeline diff --git a/main.nf b/main.nf index 58e593b1..85027a4b 100644 --- a/main.nf +++ b/main.nf @@ -15,7 +15,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { SEQINSPECTOR } from './workflows/seqinspector' +include { SEQINSPECTOR } from './workflows/seqinspector' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_seqinspector_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_seqinspector_pipeline' include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_seqinspector_pipeline' @@ -29,7 +29,7 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_seqi // TODO nf-core: Remove this line if you don't need a FASTA file // This is an example of how to use getGenomeAttribute() to fetch parameters // from igenomes.config using `--genome` -params.fasta = getGenomeAttribute('fasta') +// params.fasta = getGenomeAttribute('fasta') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -54,7 +54,9 @@ workflow NFCORE_SEQINSPECTOR { samplesheet ) emit: - multiqc_report = SEQINSPECTOR.out.multiqc_report // channel: /path/to/multiqc_report.html + global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html + grouped_reports = SEQINSPECTOR.out.grouped_reports // channel: /path/to/multiqc_report.html + } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -93,7 +95,7 @@ workflow { params.outdir, params.monochrome_logs, params.hook_url, - NFCORE_SEQINSPECTOR.out.multiqc_report + NFCORE_SEQINSPECTOR.out.global_report, ) } diff --git a/modules.json b/modules.json index 8e632d50..bf8f5f02 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,11 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "bbmap/clumpify": { + "branch": "master", + "git_sha": "a1abf90966a2a4016d3c3e41e228bfcbd4811ccc", + "installed_by": ["modules"] + }, "fastqc": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", diff --git a/modules/nf-core/bbmap/clumpify/environment.yml b/modules/nf-core/bbmap/clumpify/environment.yml new file mode 100644 index 00000000..a2f65506 --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bbmap=39.10 diff --git a/modules/nf-core/bbmap/clumpify/main.nf b/modules/nf-core/bbmap/clumpify/main.nf new file mode 100644 index 00000000..fc6a85ad --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/main.nf @@ -0,0 +1,38 @@ +process BBMAP_CLUMPIFY { + tag "$meta.id" + label 'process_single' + label 'process_high_memory' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bbmap:39.10--h92535d8_0': + 'biocontainers/bbmap:39.10--h92535d8_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path('*.fastq.gz'), emit: reads + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def raw = meta.single_end ? "in=$reads" : "in1=${reads[0]} in2=${reads[1]}" + def clumped = meta.single_end ? "out=${prefix}.clumped.fastq.gz" : "out1=${prefix}_1.clumped.fastq.gz out2=${prefix}_2.clumped.fastq.gz" + """ + clumpify.sh \\ + $raw \\ + $clumped \\ + $args \\ + &> ${prefix}.clumpify.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bbmap/clumpify/meta.yml b/modules/nf-core/bbmap/clumpify/meta.yml new file mode 100644 index 00000000..7db435cd --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/meta.yml @@ -0,0 +1,56 @@ +name: bbmap_clumpify +description: Create 30% Smaller, Faster Gzipped Fastq Files. And remove duplicates +keywords: + - clumping fastqs + - smaller fastqs + - deduping + - fastq +tools: + - bbmap: + description: BBMap is a short read aligner, as well as various other bioinformatic + tools. + homepage: https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/clumpify-guide/ + documentation: https://www.biostars.org/p/225338/ + licence: ["UC-LBL license (see package)"] + identifier: biotools:bbmap +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastq.gz": + type: file + description: The reordered/clumped (and if necessary deduped) fastq reads + pattern: "*.clumped.fastq.gz" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: Clumpify log file + pattern: "*clumpify.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@tamuanand" +maintainers: + - "@tamuanand" diff --git a/modules/nf-core/bbmap/clumpify/tests/main.nf.test b/modules/nf-core/bbmap/clumpify/tests/main.nf.test new file mode 100644 index 00000000..f43b8767 --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/tests/main.nf.test @@ -0,0 +1,72 @@ + +nextflow_process { + + name "Test Process BBMAP_CLUMPIFY" + script "../main.nf" + process "BBMAP_CLUMPIFY" + + tag "modules" + tag "modules_nfcore" + tag "bbmap" + tag "bbmap/clumpify" + + test("test-bbmap-clumpify-single-end") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.reads, + file(process.out.log[0][1]).name, + process.out.versions + ).match() + } + ) + } + } + + test("test-bbmap-clumpify-paired-end") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.reads, + file(process.out.log[0][1]).name, + process.out.versions + ).match() + } + ) + } + } + +} diff --git a/modules/nf-core/bbmap/clumpify/tests/main.nf.test.snap b/modules/nf-core/bbmap/clumpify/tests/main.nf.test.snap new file mode 100644 index 00000000..e84c345f --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/tests/main.nf.test.snap @@ -0,0 +1,49 @@ +{ + "test-bbmap-clumpify-paired-end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.clumped.fastq.gz:md5,27e51643262c1ef3905c4be184c3814c", + "test_2.clumped.fastq.gz:md5,c70ab7bbd44d6b6fadd6a1a79ef1648f" + ] + ] + ], + "test.clumpify.log", + [ + "versions.yml:md5,fdf0404f694fca43bcf9be6458d927cd" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T16:16:59.10822554" + }, + "test-bbmap-clumpify-single-end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.clumped.fastq.gz:md5,27e51643262c1ef3905c4be184c3814c" + ] + ], + "test.clumpify.log", + [ + "versions.yml:md5,fdf0404f694fca43bcf9be6458d927cd" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T16:16:36.9005326" + } +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 1c1ca6b1..50c1ecbb 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,6 +15,7 @@ params { // References genome = null + fasta = null igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 00000000..6969c085 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,8 @@ +config { + + testsDir "tests" + workDir ".nf-test" + configFile "tests/nextflow.config" + profile "test,docker" + +} diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index 8384b833..a84e064f 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -66,7 +66,7 @@ workflow PIPELINE_INITIALISATION { // // Custom validation for pipeline parameters // - validateInputParameters() + validateInputParameters() // Runs additional validation that is not done by $projectDir/nextflow_schema.json // // Create channel from input file provided through params.input @@ -74,24 +74,51 @@ workflow PIPELINE_INITIALISATION { Channel .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + .toList() + .flatMap { it.withIndex().collect { entry, idx -> entry + "${idx+1}" } } .map { - meta, fastq_1, fastq_2 -> + meta, fastq_1, fastq_2, idx -> + def tags = meta.tags ? meta.tags.tokenize(":") : [] + def updated_meta = meta + [ id:"${meta.sample}_${idx}", tags:tags ] if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + return [ + updated_meta.id, + updated_meta + [ single_end:true ], + [ fastq_1 ] + ] } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + return [ + updated_meta.id, + updated_meta + [ single_end:false ], + [ fastq_1, fastq_2 ] + ] } } .groupTuple() - .map { samplesheet -> - validateInputSamplesheet(samplesheet) - } .map { - meta, fastqs -> - return [ meta, fastqs.flatten() ] + validateInputSamplesheet(it) // Applies additional group validation checks that schema_input.json cannot do. } + .transpose() // Replace the map below + // .map { + // meta, fastqs -> + // return [ meta, fastqs.flatten() ] + // } .set { ch_samplesheet } + ch_samplesheet + .map { + meta, fastqs -> meta.tags + } + .flatten() + .unique() + .map { tag_name -> [tag_name.toLowerCase(), tag_name] } + .groupTuple() + .map { + tag_lowercase, tags -> + assert tags.size() == 1 : + "Tag name collision: " + tags.join(", ") + } + emit: samplesheet = ch_samplesheet versions = ch_versions @@ -153,7 +180,9 @@ workflow PIPELINE_COMPLETION { // Check and validate pipeline parameters // def validateInputParameters() { - genomeExistsError() + // genomeExistsError() + + // TODO: Add code to further validate pipeline parameters here } // diff --git a/tests/MiSeq.main.nf.test b/tests/MiSeq.main.nf.test new file mode 100644 index 00000000..8fbff4a3 --- /dev/null +++ b/tests/MiSeq.main.nf.test @@ -0,0 +1,30 @@ +nextflow_pipeline { + + name "Test Workflow main.nf on MiSeq data" + script "../main.nf" + tag "seqinspector" + tag "PIPELINE" + + test("MiSeq data test") { + + when { + config "./MiSeq.main.nf.test.config" + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_software_versions.txt"), + ).match() + } + ) + } + } +} diff --git a/tests/MiSeq.main.nf.test.config b/tests/MiSeq.main.nf.test.config new file mode 100644 index 00000000..073a9774 --- /dev/null +++ b/tests/MiSeq.main.nf.test.config @@ -0,0 +1,7 @@ +// Load the basic test config +includeConfig 'nextflow.config' + +// Load the correct samplesheet for that test +params { + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/MiSeq/samplesheet.csv' +} diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap new file mode 100644 index 00000000..4613d525 --- /dev/null +++ b/tests/MiSeq.main.nf.test.snap @@ -0,0 +1,15 @@ +{ + "MiSeq data test": { + "content": [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,7b1b7fd457b60404768045b148d4c0a8", + "multiqc_general_stats.txt:md5,5b28a83b14cb2fe88d084d08900ebdbf", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-28T13:18:10.3675973" + } +} \ No newline at end of file diff --git a/tests/NovaSeq6000.main.nf.test b/tests/NovaSeq6000.main.nf.test new file mode 100644 index 00000000..d050399e --- /dev/null +++ b/tests/NovaSeq6000.main.nf.test @@ -0,0 +1,50 @@ +nextflow_pipeline { + + name "Test Workflow main.nf on NovaSeq6000 data" + script "../main.nf" + tag "seqinspector" + tag "PIPELINE" + + test("NovaSeq6000 data test") { + + when { + config "./NovaSeq6000.main.nf.test.config" + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/lane1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/lane1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/lane1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/lane1/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/group1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/group1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/group1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/group1/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/group2/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/group2/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/group2/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/group2/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_software_versions.txt"), + ).match() + }, + ) + } + } +} diff --git a/tests/NovaSeq6000.main.nf.test.config b/tests/NovaSeq6000.main.nf.test.config new file mode 100644 index 00000000..cad5edd9 --- /dev/null +++ b/tests/NovaSeq6000.main.nf.test.config @@ -0,0 +1,7 @@ +// Load the basic test config +includeConfig 'nextflow.config' + +// Load the correct samplesheet for that test +params { + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/NovaSeq6000/samplesheet.csv' +} diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap new file mode 100644 index 00000000..ee3c22b7 --- /dev/null +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "NovaSeq6000 data test": { + "content": [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,3730f9046b20ac5c17a86db0a33f8d5d", + "multiqc_general_stats.txt:md5,25abe0f6a35eb4a3b056fc3cf5c13732", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,8284e25ccc21041cf3b5a32eb6a51e78", + "multiqc_general_stats.txt:md5,90ee35137492b80aab36ef67f72d8921", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,f38ffdc112c73af3a41ed15848a3761f", + "multiqc_general_stats.txt:md5,d62a2fc39e674d98783d408791803148", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,7ff71ceb8ecdf086331047f8860c3347", + "multiqc_general_stats.txt:md5,2f09b8f199ac40cf67ba50843cebd29c", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,519ff344a896ac369bba4d5c5b8be7b5", + "multiqc_general_stats.txt:md5,6a1c16f068d7ba3a9225a17eb570ed9a", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-28T13:19:13.226135825" + } +} \ No newline at end of file diff --git a/tests/PromethION.main.nf.test b/tests/PromethION.main.nf.test new file mode 100644 index 00000000..8fec8b33 --- /dev/null +++ b/tests/PromethION.main.nf.test @@ -0,0 +1,30 @@ +nextflow_pipeline { + + name "Test Workflow main.nf on PromethION data" + script "../main.nf" + tag "seqinspector" + tag "PIPELINE" + + test("PromethION data test") { + + when { + config "./PromethION.main.nf.test.config" + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_software_versions.txt"), + ).match() + }, + ) + } + } +} diff --git a/tests/PromethION.main.nf.test.config b/tests/PromethION.main.nf.test.config new file mode 100644 index 00000000..e1498a49 --- /dev/null +++ b/tests/PromethION.main.nf.test.config @@ -0,0 +1,7 @@ +// Load the basic test config +includeConfig 'nextflow.config' + +// Load the correct samplesheet for that test +params { + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/PromethION/samplesheet.csv' +} diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap new file mode 100644 index 00000000..026a8cd2 --- /dev/null +++ b/tests/PromethION.main.nf.test.snap @@ -0,0 +1,15 @@ +{ + "PromethION data test": { + "content": [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,1a4b472e13cadc770832b0e20d1de7b0", + "multiqc_general_stats.txt:md5,409cefc7f17f95d176ced6032bf8fb32", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-28T13:19:57.261730412" + } +} \ No newline at end of file diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 00000000..8d9ef461 --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,21 @@ +/* +======================================================================================== + Nextflow config file for running tests +======================================================================================== +*/ + +params { + config_profile_name = 'nf-test profile' + config_profile_description = 'Configuration profile to use for nf-test.' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '3.GB' + max_time = '2.h' + + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + + validationSchemaIgnoreParams = 'genomes,igenomes_base,pipelines_testdata_base_path' + + +} diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 4a9df1ca..59f19e5e 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -3,12 +3,17 @@ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqinspector_pipeline' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { BBMAP_CLUMPIFY } from '../modules/nf-core/bbmap/clumpify/main' + + +include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_TAG } from '../modules/nf-core/multiqc/main' + +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqinspector_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -22,17 +27,29 @@ workflow SEQINSPECTOR { ch_samplesheet // channel: samplesheet read in from --input main: - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + ch_multiqc_extra_files = Channel.empty() + ch_multiqc_reports = Channel.empty() + // // MODULE: Run FastQC // FASTQC ( ch_samplesheet ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + // + // MODULE: Run BBMAP_CLUMPIFY + // + BBMAP_CLUMPIFY ( + ch_samplesheet + ) + ch_versions = ch_versions.mix(BBMAP_CLUMPIFY.out.versions) + + // // Collate and save software versions // @@ -48,46 +65,93 @@ workflow SEQINSPECTOR { // // MODULE: MultiQC // - ch_multiqc_config = Channel.fromPath( - "$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? + ch_multiqc_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : - Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? + Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() - summary_params = paramsSummaryMap( + summary_params = paramsSummaryMap( workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_files = ch_multiqc_files.mix( - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_workflow_summary = Channel.value( + paramsSummaryMultiqc(summary_params)) ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) ch_methods_description = Channel.value( methodsDescriptionText(ch_multiqc_custom_methods_description)) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix( + ch_multiqc_extra_files = ch_multiqc_extra_files.mix( + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_collated_versions) + ch_multiqc_extra_files = ch_multiqc_extra_files.mix( ch_methods_description.collectFile( name: 'methods_description_mqc.yaml', sort: true ) ) - MULTIQC ( - ch_multiqc_files.collect(), + MULTIQC_GLOBAL ( + ch_multiqc_files + .map { meta, file -> file } + .mix(ch_multiqc_extra_files) + .collect(), + ch_multiqc_config.toList(), + Channel.empty().toList(), + ch_multiqc_logo.toList(), + Channel.empty().toList(), + Channel.empty().toList() + ) + + ch_tags = ch_multiqc_files + .map { meta, sample -> meta.tags } + .flatten() + .unique() + + multiqc_extra_files_per_tag = ch_tags + .combine(ch_multiqc_extra_files) + + // Group samples by tag + tagged_mqc_files = ch_tags + .combine(ch_multiqc_files) + .filter { sample_tag, meta, sample -> sample_tag in meta.tags } + .map { sample_tag, meta, sample -> [sample_tag, sample] } + .mix(multiqc_extra_files_per_tag) + .groupTuple() + .tap { mqc_by_tag } + .collectFile { + sample_tag, samples -> + def prefix_tag = "[TAG:${sample_tag}]" + [ + "${prefix_tag}_multiqc_extra_config.yml", + """ + |output_fn_name: \"${prefix_tag}_multiqc_report.html\" + |data_dir_name: \"${prefix_tag}_multiqc_data\" + |plots_dir_name: \"${prefix_tag}_multiqc_plots\" + """.stripMargin() + ] + } + .map { file -> [ (file =~ /\[TAG:(.+)\]/)[0][1], file ] } + .join(mqc_by_tag) + .multiMap { sample_tag, config, samples -> + samples_per_tag: samples + config: config + } + + MULTIQC_PER_TAG( + tagged_mqc_files.samples_per_tag, ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), + tagged_mqc_files.config, ch_multiqc_logo.toList(), [], [] ) - emit:multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] - + emit: + global_report = MULTIQC_GLOBAL.out.report.toList() // channel: /path/to/multiqc_report.html + grouped_reports = MULTIQC_PER_TAG.out.report.toList() // channel: [ /path/to/multiqc_report.html ] + versions = ch_versions // channel: [ path(versions.yml) ] } /*