From 56e01a0357334b461662398db8c89fd6340a5b73 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Tue, 19 Mar 2024 10:09:55 +0100 Subject: [PATCH 01/60] Update assets/schema_input.json --- assets/schema_input.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index fc4fdb96..332031c2 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -41,7 +41,7 @@ }, "rundir": { "type": "string", - "format": "file-path", + "format": "directory-path", "exists": true, "errorMessage": "Run directory must be a path", "meta": ["rundir"] From 6c94332bf1ba3738e39c80031f6d83436a4832b4 Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Wed, 20 Mar 2024 09:16:32 +0100 Subject: [PATCH 02/60] grop instead of project in a single place Co-authored-by: Adrien Coulier --- subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index e1f8e4de..cdbb6400 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -84,7 +84,7 @@ workflow PIPELINE_INITIALISATION { .fromSamplesheet("input") // Validates samplesheet against $projectDir/assets/schema_input.json. Path to validation schema is defined by $projectDir/nextflow_schema.json .map { meta, fastq_1, fastq_2 -> - def id_string = "${meta.sample}_${meta.group ?: "ungrouped"}_${meta.lane}" + def id_string = "${meta.sample}_${meta.project ?: "ungrouped"}_${meta.lane}" def updated_meta = meta + [ id: id_string ] if (!fastq_2) { return [ updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] From 4779844462856d017d7ecdcba13cf018198caf3b Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Wed, 20 Mar 2024 09:51:59 +0100 Subject: [PATCH 03/60] Updated test profile input --- conf/test.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index fbbffdd6..38e9ee32 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,7 +22,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + input = 'https://raw.githubusercontent.com/KarNair/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv' // Genome references genome = 'R64-1-1' From 28e0137e65c001682ce5e3257ea6fe6248ccf85e Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Wed, 20 Mar 2024 14:51:35 +0100 Subject: [PATCH 04/60] Update assets/schema_input.json Co-authored-by: Karthik Nair <35717861+KarNair@users.noreply.github.com> --- assets/schema_input.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 332031c2..9fb321b5 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -41,7 +41,7 @@ }, "rundir": { "type": "string", - "format": "directory-path", + "format": "path", "exists": true, "errorMessage": "Run directory must be a path", "meta": ["rundir"] From a31040e055e38f8a530a99fcc865b2bf9f2e14b9 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Mon, 18 Mar 2024 15:16:00 +0100 Subject: [PATCH 05/60] Generate reports per lane, group and rundir --- conf/modules.config | 20 ++++++- main.nf | 5 +- workflows/seqinspector.nf | 120 ++++++++++++++++++++++++++++++++++---- 3 files changed, 133 insertions(+), 12 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index e3ea8fa6..296ca786 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -35,7 +35,25 @@ process { publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { + filename -> + switch (filename) { + case 'versions.yml': + null + break + case ~/L\d+_multiqc_(report\.html|plots|data)/: + "lanes/L${(filename =~ /L(\d+)_multiqc_(report\.html|plots|data)/)[0][1]}/${filename}" + break + case ~/G-.+_multiqc_(report\.html|plots|data)/: + "groups/G-${(filename =~ /G-(.+)_multiqc_(report\.html|plots|data)/)[0][1]}/${filename}" + break + case ~/D-.+_multiqc_(report\.html|plots|data)/: + "rundirs/D-${(filename =~ /D-(.+)_multiqc_(report\.html|plots|data)/)[0][1]}/${filename}" + break + default: + filename + } + } ] } diff --git a/main.nf b/main.nf index 1e9c2ede..58afd1fe 100644 --- a/main.nf +++ b/main.nf @@ -58,7 +58,10 @@ workflow NFCORE_SEQINSPECTOR { ) emit: - multiqc_report = SEQINSPECTOR.out.multiqc_report // channel: /path/to/multiqc_report.html + global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html + lane_reports = SEQINSPECTOR.out.lane_reports // channel: /path/to/multiqc_report.html + group_report = SEQINSPECTOR.out.group_reports // channel: /path/to/multiqc_report.html + rundir_report = SEQINSPECTOR.out.rundir_reports // channel: /path/to/multiqc_report.html } /* diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 9ae3384b..b6fd3bc4 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -5,7 +5,10 @@ */ include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_LANE } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_GROUP } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_RUNDIR } from '../modules/nf-core/multiqc/main' include { paramsSummaryMap } from 'plugin/nf-validation' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' @@ -26,6 +29,8 @@ workflow SEQINSPECTOR { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() + ch_multiqc_extra_files = Channel.empty() + ch_multiqc_reports = Channel.empty() // // MODULE: Run FastQC @@ -33,7 +38,7 @@ workflow SEQINSPECTOR { FASTQC ( ch_samplesheet ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) // @@ -46,26 +51,121 @@ workflow SEQINSPECTOR { // // MODULE: MultiQC // - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + ch_multiqc_config = params.multiqc_config ? + Channel.fromPath(params.multiqc_config, checkIfExists: true) : + Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) + ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_collated_versions) + ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) MULTIQC ( - ch_multiqc_files.collect(), + ch_multiqc_files + .map { meta, file -> file } + .mix(ch_multiqc_extra_files) + .collect(), ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), + Channel.empty().toList(), + ch_multiqc_logo.toList() + ) + + multiqc_extra_files = ch_multiqc_extra_files.toList() + + // Generate reports by lane + lane_mqc_files = ch_multiqc_files + .map { meta, sample -> [ "L${meta.lane}", meta, sample ] } + .groupTuple() + .tap { mqc_by_lane } + .collectFile{ + lane, meta, samples -> [ + "${lane}_multiqc_extra_config.yml", + "output_fn_name: \"${lane}_multiqc_report.html\"\ndata_dir_name: \"${lane}_multiqc_data\"\nplots_dir_name: \"${lane}_multiqc_plots\"" + ] + } + .map { file -> def fileparts = file.name.split("_") + [ fileparts[0], file ] + } + .join(mqc_by_lane) + .multiMap { lane, config, meta , samples_per_lane -> + samples_per_lane: samples_per_lane + config: config + } + + MULTIQC_PER_LANE( + lane_mqc_files.samples_per_lane + .map { samples -> samples + multiqc_extra_files.value }, + ch_multiqc_config.toList(), + lane_mqc_files.config, + ch_multiqc_logo.toList() + ) + + // Generate reports by group + group_mqc_files = ch_multiqc_files + .filter { meta, sample -> meta.group } + .map { meta, sample -> [ "G-${meta.group}", meta, sample ] } + .groupTuple() + .tap { mqc_by_group } + .collectFile{ + group, meta, samples -> [ + "${group}_multiqc_extra_config.yml", + "output_fn_name: \"${group}_multiqc_report.html\"\ndata_dir_name: \"${group}_multiqc_data\"\nplots_dir_name: \"${group}_multiqc_plots\"" + ] + } + .map { file -> def fileparts = file.name.split("_") + [ fileparts[0], file ] + } + .join(mqc_by_group) + .multiMap { group, config, meta , samples_per_group -> + samples_per_group: samples_per_group + config: config + } + + MULTIQC_PER_GROUP( + group_mqc_files.samples_per_group + .map { samples -> samples + multiqc_extra_files.value }, + ch_multiqc_config.toList(), + group_mqc_files.config, + ch_multiqc_logo.toList() + ) + + // Generate reports by rundir + rundir_mqc_files = ch_multiqc_files + .filter { meta, sample -> meta.rundir } + .map { meta, sample -> [ "D-${meta.rundir.name}", meta, sample ] } + .groupTuple() + .tap { mqc_by_rundir } + .collectFile{ + rundir, meta, samples -> [ + "${rundir}_multiqc_extra_config.yml", + "output_fn_name: \"${rundir}_multiqc_report.html\"\ndata_dir_name: \"${rundir}_multiqc_data\"\nplots_dir_name: \"${rundir}_multiqc_plots\"" + ] + } + .map { file -> def fileparts = file.name.split("_") + [ fileparts[0], file ] + } + .join(mqc_by_rundir) + .multiMap { rundir, config, meta , samples_per_rundir -> + samples_per_rundir: samples_per_rundir + config: config + } + + MULTIQC_PER_RUNDIR( + rundir_mqc_files.samples_per_rundir + .map { samples -> samples + multiqc_extra_files.value }, + ch_multiqc_config.toList(), + rundir_mqc_files.config, ch_multiqc_logo.toList() ) emit: - multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + global_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + lane_reports = MULTIQC_PER_LANE.out.report.toList() // channel: [ /path/to/multiqc_report.html ] + group_reports = MULTIQC_PER_GROUP.out.report.toList() // channel: [ /path/to/multiqc_report.html ] + rundir_reports = MULTIQC_PER_RUNDIR.out.report.toList() // channel: [ /path/to/multiqc_report.html ] versions = ch_versions // channel: [ path(versions.yml) ] } From 0da58701e706f23001df33bf997bf671ea37a787 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Thu, 28 Mar 2024 16:21:45 +0100 Subject: [PATCH 06/60] Improve formatting --- conf/modules.config | 2 +- main.nf | 6 ++-- workflows/seqinspector.nf | 71 +++++++++++++++++++++++++-------------- 3 files changed, 50 insertions(+), 29 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 296ca786..8b7a9c69 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -52,7 +52,7 @@ process { break default: filename - } + } } ] } diff --git a/main.nf b/main.nf index 58afd1fe..1ee043e9 100644 --- a/main.nf +++ b/main.nf @@ -58,9 +58,9 @@ workflow NFCORE_SEQINSPECTOR { ) emit: - global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html - lane_reports = SEQINSPECTOR.out.lane_reports // channel: /path/to/multiqc_report.html - group_report = SEQINSPECTOR.out.group_reports // channel: /path/to/multiqc_report.html + global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html + lane_reports = SEQINSPECTOR.out.lane_reports // channel: /path/to/multiqc_report.html + group_report = SEQINSPECTOR.out.group_reports // channel: /path/to/multiqc_report.html rundir_report = SEQINSPECTOR.out.rundir_reports // channel: /path/to/multiqc_report.html } diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index b6fd3bc4..f0a269a0 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -4,15 +4,17 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { MULTIQC as MULTIQC_PER_LANE } from '../modules/nf-core/multiqc/main' -include { MULTIQC as MULTIQC_PER_GROUP } from '../modules/nf-core/multiqc/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' + +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_LANE } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_GROUP } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_RUNDIR } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-validation' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqinspector_pipeline' + +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqinspector_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -27,10 +29,10 @@ workflow SEQINSPECTOR { main: - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() ch_multiqc_extra_files = Channel.empty() - ch_multiqc_reports = Channel.empty() + ch_multiqc_reports = Channel.empty() // // MODULE: Run FastQC @@ -45,23 +47,42 @@ workflow SEQINSPECTOR { // Collate and save software versions // softwareVersionsToYAML(ch_versions) - .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true) - .set { ch_collated_versions } + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_pipeline_software_mqc_versions.yml', + sort: true, + newLine: true + ).set { ch_collated_versions } // // MODULE: MultiQC // - ch_multiqc_config = params.multiqc_config ? + ch_multiqc_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_collated_versions) - ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) + ch_multiqc_logo = params.multiqc_logo ? + Channel.fromPath(params.multiqc_logo, checkIfExists: true) : + Channel.empty() + + summary_params = paramsSummaryMap( + workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value( + paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? + file(params.multiqc_methods_description, checkIfExists: true) : + file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_extra_files = ch_multiqc_extra_files.mix( + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_collated_versions) + ch_multiqc_extra_files = ch_multiqc_extra_files.mix( + ch_methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: false + ) + ) MULTIQC ( ch_multiqc_files @@ -163,10 +184,10 @@ workflow SEQINSPECTOR { emit: global_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html - lane_reports = MULTIQC_PER_LANE.out.report.toList() // channel: [ /path/to/multiqc_report.html ] - group_reports = MULTIQC_PER_GROUP.out.report.toList() // channel: [ /path/to/multiqc_report.html ] + lane_reports = MULTIQC_PER_LANE.out.report.toList() // channel: [ /path/to/multiqc_report.html ] + group_reports = MULTIQC_PER_GROUP.out.report.toList() // channel: [ /path/to/multiqc_report.html ] rundir_reports = MULTIQC_PER_RUNDIR.out.report.toList() // channel: [ /path/to/multiqc_report.html ] - versions = ch_versions // channel: [ path(versions.yml) ] + versions = ch_versions // channel: [ path(versions.yml) ] } /* From d233d8f4052208b4e572ddb1e8d561387df1cce5 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Thu, 28 Mar 2024 16:56:51 +0100 Subject: [PATCH 07/60] Improve output sorting --- conf/modules.config | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 8b7a9c69..3b2fc025 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -42,13 +42,25 @@ process { null break case ~/L\d+_multiqc_(report\.html|plots|data)/: - "lanes/L${(filename =~ /L(\d+)_multiqc_(report\.html|plots|data)/)[0][1]}/${filename}" + def lane = (filename =~ /L(\d+)_multiqc_(report\.html|plots|data)/)[0][1] + def new_filename = filename.replaceFirst( + "(?.*)L${lane}_(?multiqc_(report\\.html|plots|data).*)", + '${prefix}${suffix}') + "lanes/L${lane}/${new_filename}" break case ~/G-.+_multiqc_(report\.html|plots|data)/: - "groups/G-${(filename =~ /G-(.+)_multiqc_(report\.html|plots|data)/)[0][1]}/${filename}" + def group = (filename =~ /G-(.+)_multiqc_(report\.html|plots|data)/)[0][1] + def new_filename = filename.replaceFirst( + "(?.*)G-${group}_(?multiqc_(report\\.html|plots|data).*)", + '${prefix}${suffix}') + "groups/${group}/${new_filename}" break case ~/D-.+_multiqc_(report\.html|plots|data)/: - "rundirs/D-${(filename =~ /D-(.+)_multiqc_(report\.html|plots|data)/)[0][1]}/${filename}" + def rundir = (filename =~ /D-(.+)_multiqc_(report\.html|plots|data)/)[0][1] + def new_filename = filename.replaceFirst( + "(?.*)D-${rundir}_(?multiqc_(report\\.html|plots|data).*)", + '${prefix}${suffix}') + "rundirs/${rundir}/${new_filename}" break default: filename From 307e43c6a21c77ad30c8f02997c3220568d35fb2 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Mon, 8 Apr 2024 14:11:47 +0200 Subject: [PATCH 08/60] Use `group` instead of `project` --- assets/schema_input.json | 4 ++-- subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 9fb321b5..1648944f 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -19,11 +19,11 @@ "errorMessage": "Lane ID must be a number", "meta": ["lane"] }, - "project": { + "group": { "type": "string", "pattern": "^\\S+$", "errorMessage": "Project ID cannot contain spaces", - "meta": ["project"] + "meta": ["group"] }, "fastq_1": { "type": "string", diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index cdbb6400..e1f8e4de 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -84,7 +84,7 @@ workflow PIPELINE_INITIALISATION { .fromSamplesheet("input") // Validates samplesheet against $projectDir/assets/schema_input.json. Path to validation schema is defined by $projectDir/nextflow_schema.json .map { meta, fastq_1, fastq_2 -> - def id_string = "${meta.sample}_${meta.project ?: "ungrouped"}_${meta.lane}" + def id_string = "${meta.sample}_${meta.group ?: "ungrouped"}_${meta.lane}" def updated_meta = meta + [ id: id_string ] if (!fastq_2) { return [ updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] From 627cf940a2a6b5f7cca7cbb692af1cf74293a289 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Mon, 8 Apr 2024 14:21:59 +0200 Subject: [PATCH 09/60] Fix output channel --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 1ee043e9..9de4f94e 100644 --- a/main.nf +++ b/main.nf @@ -104,7 +104,7 @@ workflow { params.outdir, params.monochrome_logs, params.hook_url, - NFCORE_SEQINSPECTOR.out.multiqc_report + NFCORE_SEQINSPECTOR.out.global_report, ) } From c9ba02866dc35a1b345476645ece7f9a6e48cc8b Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Mon, 8 Apr 2024 14:22:17 +0200 Subject: [PATCH 10/60] Fix linting --- conf/modules.config | 2 +- .../local/utils_nfcore_seqinspector_pipeline/main.nf | 1 - workflows/seqinspector.nf | 6 +++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 3b2fc025..44a35137 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -36,7 +36,7 @@ process { path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, saveAs: { - filename -> + filename -> switch (filename) { case 'versions.yml': null diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index e1f8e4de..9001400e 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -101,7 +101,6 @@ workflow PIPELINE_INITIALISATION { // meta, fastqs -> // return [ meta, fastqs.flatten() ] // } - .view() .set { ch_samplesheet } emit: diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index f0a269a0..815bc40f 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -108,7 +108,7 @@ workflow SEQINSPECTOR { ] } .map { file -> def fileparts = file.name.split("_") - [ fileparts[0], file ] + [ fileparts[0], file ] } .join(mqc_by_lane) .multiMap { lane, config, meta , samples_per_lane -> @@ -137,7 +137,7 @@ workflow SEQINSPECTOR { ] } .map { file -> def fileparts = file.name.split("_") - [ fileparts[0], file ] + [ fileparts[0], file ] } .join(mqc_by_group) .multiMap { group, config, meta , samples_per_group -> @@ -166,7 +166,7 @@ workflow SEQINSPECTOR { ] } .map { file -> def fileparts = file.name.split("_") - [ fileparts[0], file ] + [ fileparts[0], file ] } .join(mqc_by_rundir) .multiMap { rundir, config, meta , samples_per_rundir -> From 2cfc91d449bac7c1a6d22d125fd57e3d8592a51b Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Mon, 8 Apr 2024 15:48:41 +0200 Subject: [PATCH 11/60] Give credits back to NGI --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5e2bc4ca..abe0d4ef 100644 --- a/README.md +++ b/README.md @@ -80,11 +80,11 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/seqinspector was originally written by Adrien Coulier. +nf-core/seqinspector was originally written by the Swedish [@NationalGenomicsInfrastructure](https://github.com/NationalGenomicsInfrastructure/). We thank the following people for their extensive assistance in the development of this pipeline: - +- [@mahesh-panchal](https://github.com/mahesh-panchal) ## Contributions and Support From fbfb02dbba5945fcfcf8d431cccd7a230eea5ff5 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 9 Apr 2024 09:26:32 +0200 Subject: [PATCH 12/60] Fix file names --- conf/modules.config | 18 +++++++++--------- workflows/seqinspector.nf | 24 ++++++++++++++++++------ 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 44a35137..b2e48f3d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -41,24 +41,24 @@ process { case 'versions.yml': null break - case ~/L\d+_multiqc_(report\.html|plots|data)/: - def lane = (filename =~ /L(\d+)_multiqc_(report\.html|plots|data)/)[0][1] + case ~/\[LANE:\d+\]_multiqc_(report\.html|plots|data)/: + def lane = (filename =~ /\[LANE:(\d+)\]_multiqc_(report\.html|plots|data)/)[0][1] def new_filename = filename.replaceFirst( - "(?.*)L${lane}_(?multiqc_(report\\.html|plots|data).*)", + "(?.*)\\[LANE:${lane}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') "lanes/L${lane}/${new_filename}" break - case ~/G-.+_multiqc_(report\.html|plots|data)/: - def group = (filename =~ /G-(.+)_multiqc_(report\.html|plots|data)/)[0][1] + case ~/\[GROUP:.+\]_multiqc_(report\.html|plots|data)/: + def group = (filename =~ /\[GROUP:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] def new_filename = filename.replaceFirst( - "(?.*)G-${group}_(?multiqc_(report\\.html|plots|data).*)", + "(?.*)\\[GROUP:${group}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') "groups/${group}/${new_filename}" break - case ~/D-.+_multiqc_(report\.html|plots|data)/: - def rundir = (filename =~ /D-(.+)_multiqc_(report\.html|plots|data)/)[0][1] + case ~/\[RUNDIR:.+\]_multiqc_(report\.html|plots|data)/: + def rundir = (filename =~ /\[RUNDIR:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] def new_filename = filename.replaceFirst( - "(?.*)D-${rundir}_(?multiqc_(report\\.html|plots|data).*)", + "(?.*)\\[RUNDIR:${rundir}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') "rundirs/${rundir}/${new_filename}" break diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 815bc40f..2acafc3f 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -98,13 +98,17 @@ workflow SEQINSPECTOR { // Generate reports by lane lane_mqc_files = ch_multiqc_files - .map { meta, sample -> [ "L${meta.lane}", meta, sample ] } + .map { meta, sample -> [ "[LANE:${meta.lane}]", meta, sample ] } .groupTuple() .tap { mqc_by_lane } .collectFile{ lane, meta, samples -> [ "${lane}_multiqc_extra_config.yml", - "output_fn_name: \"${lane}_multiqc_report.html\"\ndata_dir_name: \"${lane}_multiqc_data\"\nplots_dir_name: \"${lane}_multiqc_plots\"" + """ + |output_fn_name: \"${lane}_multiqc_report.html\" + |data_dir_name: \"${lane}_multiqc_data\" + |plots_dir_name: \"${lane}_multiqc_plots\" + """.stripMargin() ] } .map { file -> def fileparts = file.name.split("_") @@ -127,13 +131,17 @@ workflow SEQINSPECTOR { // Generate reports by group group_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.group } - .map { meta, sample -> [ "G-${meta.group}", meta, sample ] } + .map { meta, sample -> [ "[GROUP:${meta.group}]", meta, sample ] } .groupTuple() .tap { mqc_by_group } .collectFile{ group, meta, samples -> [ "${group}_multiqc_extra_config.yml", - "output_fn_name: \"${group}_multiqc_report.html\"\ndata_dir_name: \"${group}_multiqc_data\"\nplots_dir_name: \"${group}_multiqc_plots\"" + """ + |output_fn_name: \"${group}_multiqc_report.html\" + |data_dir_name: \"${group}_multiqc_data\" + |plots_dir_name: \"${group}_multiqc_plots\" + """.stripMargin() ] } .map { file -> def fileparts = file.name.split("_") @@ -156,13 +164,17 @@ workflow SEQINSPECTOR { // Generate reports by rundir rundir_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.rundir } - .map { meta, sample -> [ "D-${meta.rundir.name}", meta, sample ] } + .map { meta, sample -> [ "[RUNDIR:${meta.rundir.name}]", meta, sample ] } .groupTuple() .tap { mqc_by_rundir } .collectFile{ rundir, meta, samples -> [ "${rundir}_multiqc_extra_config.yml", - "output_fn_name: \"${rundir}_multiqc_report.html\"\ndata_dir_name: \"${rundir}_multiqc_data\"\nplots_dir_name: \"${rundir}_multiqc_plots\"" + """ + |output_fn_name: \"${rundir}_multiqc_report.html\" + |data_dir_name: \"${rundir}_multiqc_data\" + |plots_dir_name: \"${rundir}_multiqc_plots\" + """.stripMargin() ] } .map { file -> def fileparts = file.name.split("_") From 8a19929d03832ecaf95a50962898882de04e6884 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Fri, 3 May 2024 09:50:42 +0200 Subject: [PATCH 13/60] Set up tests --- .gitignore | 1 + nf-test.config | 8 ++++++++ tests/main.nf.test | 20 +++++++++++++++++++ tests/nextflow.config | 5 +++++ tests/workflows/seqinspector.nf.test | 30 ++++++++++++++++++++++++++++ 5 files changed, 64 insertions(+) create mode 100644 nf-test.config create mode 100644 tests/main.nf.test create mode 100644 tests/nextflow.config create mode 100644 tests/workflows/seqinspector.nf.test diff --git a/.gitignore b/.gitignore index 5124c9ac..089a4079 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ results/ testing/ testing* *.pyc +.nf-test diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 00000000..6969c085 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,8 @@ +config { + + testsDir "tests" + workDir ".nf-test" + configFile "tests/nextflow.config" + profile "test,docker" + +} diff --git a/tests/main.nf.test b/tests/main.nf.test new file mode 100644 index 00000000..72498b5b --- /dev/null +++ b/tests/main.nf.test @@ -0,0 +1,20 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "main.nf" + + test("Should run without failures") { + + when { + params { + outdir = "tests/results" + } + } + + then { + assert workflow.success + } + + } + +} diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 00000000..c19b1ad0 --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,5 @@ +/* +======================================================================================== + Nextflow config file for running tests +======================================================================================== +*/ diff --git a/tests/workflows/seqinspector.nf.test b/tests/workflows/seqinspector.nf.test new file mode 100644 index 00000000..bbb529dd --- /dev/null +++ b/tests/workflows/seqinspector.nf.test @@ -0,0 +1,30 @@ +nextflow_workflow { + + name "Test Workflow SEQINSPECTOR" + script "workflows/seqinspector.nf" + workflow "SEQINSPECTOR" + + test("Should run without failures pipeline") { + + when { + params { + // define parameters here. Example: + // outdir = "tests/results" + } + workflow { + """ + // define inputs of the workflow here. Example: + // input[0] = file("https://raw.githubusercontent.com/nf-core/test-datasets/e47966b63444ec0fcdef23bfc410eeca22535ac7/testdata/MiSeq/samplesheet.csv") + input[0] = file("assets/samplesheet.csv") + """ + } + } + + then { + assert workflow.success + assert snapshot(workflow.out).match() + } + + } + +} From eba628ef9389a13214e6c9663965d6ea56d7e994 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 3 May 2024 12:18:42 +0200 Subject: [PATCH 14/60] point test conf upstream --- conf/test.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index 38e9ee32..2c26a9c1 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,7 +22,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/KarNair/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv' // Genome references genome = 'R64-1-1' From 23f69d1302457fc32295c3c77bd41d443bb96d9e Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 3 May 2024 13:05:47 +0200 Subject: [PATCH 15/60] project -> group --- assets/samplesheet.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index fbe5de2d..fef3b4e6 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,3 @@ -sample,lane,project,fastq_1,fastq_2,rundir +sample,lane,group,fastq_1,fastq_2,rundir SAMPLE_PAIRED_END,1,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir SAMPLE_SINGLE_END,2,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir From 1ebf3f1adc86f3c50020a56f95430b06f3a5000a Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 3 May 2024 13:06:48 +0200 Subject: [PATCH 16/60] project -> group --- assets/schema_input.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 1648944f..7115bfab 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -22,7 +22,7 @@ "group": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Project ID cannot contain spaces", + "errorMessage": "Group ID cannot contain spaces", "meta": ["group"] }, "fastq_1": { From b0bf471667c0547654afd14d668e8f70aab55185 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 3 May 2024 13:53:31 +0200 Subject: [PATCH 17/60] make lane non-compulsory --- assets/schema_input.json | 2 +- workflows/seqinspector.nf | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 7115bfab..c9800d5d 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -47,7 +47,7 @@ "meta": ["rundir"] } }, - "required": ["sample", "lane", "fastq_1"], + "required": ["sample", "fastq_1"], "dependentRequired": { "fastq_2": ["fastq_1"] } diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 2acafc3f..018e079d 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -98,6 +98,7 @@ workflow SEQINSPECTOR { // Generate reports by lane lane_mqc_files = ch_multiqc_files + .filter { meta, sample -> meta.lane } .map { meta, sample -> [ "[LANE:${meta.lane}]", meta, sample ] } .groupTuple() .tap { mqc_by_lane } From 9e0eca39baf5b58760ec4117b4faefde4a512f09 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 3 May 2024 15:59:10 +0200 Subject: [PATCH 18/60] remove unused file --- assets/samplesheet.csv | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 assets/samplesheet.csv diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv deleted file mode 100644 index fef3b4e6..00000000 --- a/assets/samplesheet.csv +++ /dev/null @@ -1,3 +0,0 @@ -sample,lane,group,fastq_1,fastq_2,rundir -SAMPLE_PAIRED_END,1,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir -SAMPLE_SINGLE_END,2,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir From 98e60bf160cb6a4476d55b38a44095e67f79eeea Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 3 May 2024 16:38:52 +0200 Subject: [PATCH 19/60] revamp nf-test, run once for each sequencing platform --- tests/MiSeq.main.nf.test | 23 +++++++++++++++++++++ tests/NovaSeq6000.main.nf.test | 23 +++++++++++++++++++++ tests/PromethION.main.nf.test | 23 +++++++++++++++++++++ tests/main.nf.test | 20 ------------------- tests/nextflow.config | 11 ++++++++++ tests/workflows/seqinspector.nf.test | 30 ---------------------------- 6 files changed, 80 insertions(+), 50 deletions(-) create mode 100644 tests/MiSeq.main.nf.test create mode 100644 tests/NovaSeq6000.main.nf.test create mode 100644 tests/PromethION.main.nf.test delete mode 100644 tests/main.nf.test delete mode 100644 tests/workflows/seqinspector.nf.test diff --git a/tests/MiSeq.main.nf.test b/tests/MiSeq.main.nf.test new file mode 100644 index 00000000..0246a75b --- /dev/null +++ b/tests/MiSeq.main.nf.test @@ -0,0 +1,23 @@ +nextflow_pipeline { + + name "Test Workflow main.nf on MiSeq data" + script "../main.nf" + tag "seqinspector" + tag "PIPELINE" + + test("MiSeq data test") { + + when { + params { + outdir = "tests/results/MiSeq" + input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv" + } + } + + then { + assert workflow.success + } + + } + +} diff --git a/tests/NovaSeq6000.main.nf.test b/tests/NovaSeq6000.main.nf.test new file mode 100644 index 00000000..7c410e57 --- /dev/null +++ b/tests/NovaSeq6000.main.nf.test @@ -0,0 +1,23 @@ +nextflow_pipeline { + + name "Test Workflow main.nf on NovaSeq6000 data" + script "../main.nf" + tag "seqinspector" + tag "PIPELINE" + + test("NovaSeq6000 data test") { + + when { + params { + outdir = "tests/results/NovaSeq6000" + input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/NovaSeq6000/samplesheet.csv" + } + } + + then { + assert workflow.success + } + + } + +} diff --git a/tests/PromethION.main.nf.test b/tests/PromethION.main.nf.test new file mode 100644 index 00000000..9987c953 --- /dev/null +++ b/tests/PromethION.main.nf.test @@ -0,0 +1,23 @@ +nextflow_pipeline { + + name "Test Workflow main.nf on PromethION data" + script "../main.nf" + tag "seqinspector" + tag "PIPELINE" + + test("PromethION data test") { + + when { + params { + outdir = "tests/results/PromethION" + input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/PromethION/samplesheet.csv" + } + } + + then { + assert workflow.success + } + + } + +} diff --git a/tests/main.nf.test b/tests/main.nf.test deleted file mode 100644 index 72498b5b..00000000 --- a/tests/main.nf.test +++ /dev/null @@ -1,20 +0,0 @@ -nextflow_pipeline { - - name "Test Workflow main.nf" - script "main.nf" - - test("Should run without failures") { - - when { - params { - outdir = "tests/results" - } - } - - then { - assert workflow.success - } - - } - -} diff --git a/tests/nextflow.config b/tests/nextflow.config index c19b1ad0..422545be 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -3,3 +3,14 @@ Nextflow config file for running tests ======================================================================================== */ + +params { + config_profile_name = 'nf-test profile' + config_profile_description = 'Configuration profile to use for nf-test.' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '3.GB' + max_time = '2.h' + +} diff --git a/tests/workflows/seqinspector.nf.test b/tests/workflows/seqinspector.nf.test deleted file mode 100644 index bbb529dd..00000000 --- a/tests/workflows/seqinspector.nf.test +++ /dev/null @@ -1,30 +0,0 @@ -nextflow_workflow { - - name "Test Workflow SEQINSPECTOR" - script "workflows/seqinspector.nf" - workflow "SEQINSPECTOR" - - test("Should run without failures pipeline") { - - when { - params { - // define parameters here. Example: - // outdir = "tests/results" - } - workflow { - """ - // define inputs of the workflow here. Example: - // input[0] = file("https://raw.githubusercontent.com/nf-core/test-datasets/e47966b63444ec0fcdef23bfc410eeca22535ac7/testdata/MiSeq/samplesheet.csv") - input[0] = file("assets/samplesheet.csv") - """ - } - } - - then { - assert workflow.success - assert snapshot(workflow.out).match() - } - - } - -} From d95c660d8fbdba413920d45a7f7025e9261989c0 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Mon, 6 May 2024 15:54:05 +0200 Subject: [PATCH 20/60] Update modules and subworkflows --- modules.json | 4 ++-- modules/nf-core/fastqc/main.nf | 6 ++++++ subworkflows/nf-core/utils_nfcore_pipeline/main.nf | 8 +++++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/modules.json b/modules.json index ebbc5dc2..87fe816c 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "fastqc": { "branch": "master", - "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c", + "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", "installed_by": ["modules"] }, "multiqc": { @@ -26,7 +26,7 @@ }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 9e19a74c..d79f1c86 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -25,6 +25,11 @@ process FASTQC { def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } def rename_to = old_new_pairs*.join(' ').join(' ') def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) + """ printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name @@ -33,6 +38,7 @@ process FASTQC { fastqc \\ $args \\ --threads $task.cpus \\ + --memory $fastqc_memory \\ $renamed_files cat <<-END_VERSIONS > versions.yml diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index a8b55d6f..14558c39 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -65,9 +65,15 @@ def checkProfileProvided(nextflow_cli_args) { // Citation string for pipeline // def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + - " ${workflow.manifest.doi}\n\n" + + temp_doi_ref + "\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + From e0527ccb6c235fcb01f5fa26e1b192e705b3f873 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 7 May 2024 16:45:09 +0200 Subject: [PATCH 21/60] Fix multiqc extra files Issue with the previous implementation was that sometimes MULTIQC_PER_LANE would execute before the extra files were collected into `ch_multiqc_extra_files`, causing `null` to be added to the list of files passed to multiqc. --- workflows/seqinspector.nf | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 018e079d..65343a8b 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -94,11 +94,17 @@ workflow SEQINSPECTOR { ch_multiqc_logo.toList() ) - multiqc_extra_files = ch_multiqc_extra_files.toList() - // Generate reports by lane + multiqc_extra_files_per_lane = ch_multiqc_files + .filter { meta, sample -> meta.lane } + .map { meta, sample -> meta.lane } + .unique() + .map { lane -> [lane:lane] } + .cross(ch_multiqc_extra_files) + lane_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.lane } + .mix(multiqc_extra_files_per_lane) .map { meta, sample -> [ "[LANE:${meta.lane}]", meta, sample ] } .groupTuple() .tap { mqc_by_lane } @@ -122,16 +128,23 @@ workflow SEQINSPECTOR { } MULTIQC_PER_LANE( - lane_mqc_files.samples_per_lane - .map { samples -> samples + multiqc_extra_files.value }, + lane_mqc_files.samples_per_lane, ch_multiqc_config.toList(), lane_mqc_files.config, ch_multiqc_logo.toList() ) // Generate reports by group + multiqc_extra_files_per_group = ch_multiqc_files + .filter { meta, sample -> meta.group } + .map { meta, sample -> meta.group } + .unique() + .map { group -> [group:group] } + .cross(ch_multiqc_extra_files) + group_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.group } + .mix(multiqc_extra_files_per_group) .map { meta, sample -> [ "[GROUP:${meta.group}]", meta, sample ] } .groupTuple() .tap { mqc_by_group } @@ -155,16 +168,23 @@ workflow SEQINSPECTOR { } MULTIQC_PER_GROUP( - group_mqc_files.samples_per_group - .map { samples -> samples + multiqc_extra_files.value }, + group_mqc_files.samples_per_group, ch_multiqc_config.toList(), group_mqc_files.config, ch_multiqc_logo.toList() ) // Generate reports by rundir + multiqc_extra_files_per_rundir = ch_multiqc_files + .filter { meta, sample -> meta.rundir } + .map { meta, sample -> meta.rundir } + .unique() + .map { rundir -> [rundir:rundir] } + .cross(ch_multiqc_extra_files) + rundir_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.rundir } + .mix(multiqc_extra_files_per_rundir) .map { meta, sample -> [ "[RUNDIR:${meta.rundir.name}]", meta, sample ] } .groupTuple() .tap { mqc_by_rundir } @@ -188,8 +208,7 @@ workflow SEQINSPECTOR { } MULTIQC_PER_RUNDIR( - rundir_mqc_files.samples_per_rundir - .map { samples -> samples + multiqc_extra_files.value }, + rundir_mqc_files.samples_per_rundir, ch_multiqc_config.toList(), rundir_mqc_files.config, ch_multiqc_logo.toList() From 42159a132e7f73f2594fcb0e66ca26702311cefb Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 10:29:17 +0200 Subject: [PATCH 22/60] Add test snapshots --- tests/MiSeq.main.nf.test | 25 +++++++++++++++--- tests/MiSeq.main.nf.test.snap | 19 ++++++++++++++ tests/NovaSeq6000.main.nf.test | 40 ++++++++++++++++++++++++++--- tests/NovaSeq6000.main.nf.test.snap | 31 ++++++++++++++++++++++ tests/PromethION.main.nf.test | 20 ++++++++++++--- tests/PromethION.main.nf.test.snap | 15 +++++++++++ workflows/seqinspector.nf | 18 +++++-------- 7 files changed, 144 insertions(+), 24 deletions(-) create mode 100644 tests/MiSeq.main.nf.test.snap create mode 100644 tests/NovaSeq6000.main.nf.test.snap create mode 100644 tests/PromethION.main.nf.test.snap diff --git a/tests/MiSeq.main.nf.test b/tests/MiSeq.main.nf.test index 0246a75b..1e72de95 100644 --- a/tests/MiSeq.main.nf.test +++ b/tests/MiSeq.main.nf.test @@ -9,15 +9,32 @@ nextflow_pipeline { when { params { - outdir = "tests/results/MiSeq" + outdir = "$outputDir" input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv" } } then { - assert workflow.success - } + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_software_versions.txt"), - } + path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_software_versions.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt"), + ).match() + } + ) + } + } } diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap new file mode 100644 index 00000000..e222b515 --- /dev/null +++ b/tests/MiSeq.main.nf.test.snap @@ -0,0 +1,19 @@ +{ + "MiSeq data test": { + "content": [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", + "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", + "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", + "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + ], + "timestamp": "2024-05-08T16:29:33.284003" + } +} \ No newline at end of file diff --git a/tests/NovaSeq6000.main.nf.test b/tests/NovaSeq6000.main.nf.test index 7c410e57..f5de9aa3 100644 --- a/tests/NovaSeq6000.main.nf.test +++ b/tests/NovaSeq6000.main.nf.test @@ -9,15 +9,47 @@ nextflow_pipeline { when { params { - outdir = "tests/results/NovaSeq6000" + outdir = "$outputDir" input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/NovaSeq6000/samplesheet.csv" } } then { - assert workflow.success - } + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_software_versions.txt"), - } + path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_software_versions.txt"), + path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt"), + ).match() + }, + ) + } + } } diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap new file mode 100644 index 00000000..a406b891 --- /dev/null +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "NovaSeq6000 data test": { + "content": [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", + "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,9b4fd8a6d6e8a9acabecd592f633472e", + "multiqc_general_stats.txt:md5,8237b88ceb018d3cb1edcea62d10f4a2", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,9246a5b6b7b0410c79049fc3dbd08e92", + "multiqc_general_stats.txt:md5,44328403f423c6f5ac9ee0a8a01e6725", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,84820276fae52d4d492831280ae6207c", + "multiqc_general_stats.txt:md5,dd07799e5e4b9d389f9de49a852c3363", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,59ae05d89453da6f57010ffb6466f902", + "multiqc_general_stats.txt:md5,e4629691992bfe639c01a84b90563334", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", + "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + ], + "timestamp": "2024-05-14T10:17:35.440827" + } +} \ No newline at end of file diff --git a/tests/PromethION.main.nf.test b/tests/PromethION.main.nf.test index 9987c953..de2beb35 100644 --- a/tests/PromethION.main.nf.test +++ b/tests/PromethION.main.nf.test @@ -9,15 +9,27 @@ nextflow_pipeline { when { params { - outdir = "tests/results/PromethION" + outdir = "$outputDir" input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/PromethION/samplesheet.csv" } } then { - assert workflow.success - } + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_software_versions.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt"), + ).match() + }, + ) + } } - } diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap new file mode 100644 index 00000000..24e52bd1 --- /dev/null +++ b/tests/PromethION.main.nf.test.snap @@ -0,0 +1,15 @@ +{ + "PromethION data test": { + "content": [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", + "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", + "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + ], + "timestamp": "2024-05-08T17:17:23.151259" + } +} \ No newline at end of file diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 65343a8b..3e2a08dc 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -100,7 +100,7 @@ workflow SEQINSPECTOR { .map { meta, sample -> meta.lane } .unique() .map { lane -> [lane:lane] } - .cross(ch_multiqc_extra_files) + .combine(ch_multiqc_extra_files) lane_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.lane } @@ -118,9 +118,7 @@ workflow SEQINSPECTOR { """.stripMargin() ] } - .map { file -> def fileparts = file.name.split("_") - [ fileparts[0], file ] - } + .map { file -> [ (file =~ /(\[LANE:.+\])/)[0][1], file ] } .join(mqc_by_lane) .multiMap { lane, config, meta , samples_per_lane -> samples_per_lane: samples_per_lane @@ -140,7 +138,7 @@ workflow SEQINSPECTOR { .map { meta, sample -> meta.group } .unique() .map { group -> [group:group] } - .cross(ch_multiqc_extra_files) + .combine(ch_multiqc_extra_files) group_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.group } @@ -158,9 +156,7 @@ workflow SEQINSPECTOR { """.stripMargin() ] } - .map { file -> def fileparts = file.name.split("_") - [ fileparts[0], file ] - } + .map { file -> [ (file =~ /(\[GROUP:.+\])/)[0][1], file ] } .join(mqc_by_group) .multiMap { group, config, meta , samples_per_group -> samples_per_group: samples_per_group @@ -180,7 +176,7 @@ workflow SEQINSPECTOR { .map { meta, sample -> meta.rundir } .unique() .map { rundir -> [rundir:rundir] } - .cross(ch_multiqc_extra_files) + .combine(ch_multiqc_extra_files) rundir_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.rundir } @@ -198,9 +194,7 @@ workflow SEQINSPECTOR { """.stripMargin() ] } - .map { file -> def fileparts = file.name.split("_") - [ fileparts[0], file ] - } + .map { file -> [ (file =~ /(\[RUNDIR:.+\])/)[0][1], file ] } .join(mqc_by_rundir) .multiMap { rundir, config, meta , samples_per_rundir -> samples_per_rundir: samples_per_rundir From ef61f9f7e32f63235f0bf574a2fd89c1c119ffcf Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 11:08:13 +0200 Subject: [PATCH 23/60] Remove unused module configuration --- conf/base.config | 3 --- conf/modules.config | 8 -------- 2 files changed, 11 deletions(-) diff --git a/conf/base.config b/conf/base.config index 8d7dffc6..aab50f93 100644 --- a/conf/base.config +++ b/conf/base.config @@ -59,7 +59,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } } diff --git a/conf/modules.config b/conf/modules.config index b2e48f3d..da3d2ca2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -22,14 +22,6 @@ process { ext.args = '--quiet' } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - pattern: '*_versions.yml' - ] - } - withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ From 51b01e98ca9560154634b08d541475abeffbc35c Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 11:16:41 +0200 Subject: [PATCH 24/60] Update usage docs and restore example samplesheet --- assets/samplesheet.csv | 3 +++ docs/usage.md | 36 ++++++++++++------------------------ 2 files changed, 15 insertions(+), 24 deletions(-) create mode 100644 assets/samplesheet.csv diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv new file mode 100644 index 00000000..fef3b4e6 --- /dev/null +++ b/assets/samplesheet.csv @@ -0,0 +1,3 @@ +sample,lane,group,fastq_1,fastq_2,rundir +SAMPLE_PAIRED_END,1,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir +SAMPLE_SINGLE_END,2,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir diff --git a/docs/usage.md b/docs/usage.md index f926de73..df2be203 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -16,39 +16,27 @@ You will need to create a samplesheet with information about the samples you wou --input '[path to samplesheet file]' ``` -### Multiple runs of the same sample - -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: - -```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz -``` - ### Full samplesheet -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. - -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. - ```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +sample,lane,group,fastq_1,fastq_2,rundir +CONTROL_REP1,1,,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX +CONTROL_REP2,1,,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX +CONTROL_REP3,1,,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX +TREATMENT_REP1,2,GROUP1,AEG588A4_S4_L003_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX +TREATMENT_REP2,2,GROUP1,AEG588A5_S5_L003_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX +TREATMENT_REP3,2,GROUP2,AEG588A6_S6_L003_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX +TREATMENT_REP3,2,GROUP2,AEG588A6_S6_L004_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX ``` | Column | Description | | --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `lane` | Lane where the sample was processed on an Illumina instrument (optional). | +| `group` | Group the sample belongs too, useful when several groups are pooled together (optional). | +| `rundir` | Path to the runfolder containing extra information about the sequencing run (optional) . | | `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz" (optional). | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. From 7c7f31f715a40c1f5b061099c9cd945214875ab0 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 12:09:51 +0200 Subject: [PATCH 25/60] Update output docs --- docs/output.md | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index 77e4cc1c..f83e1fa0 100644 --- a/docs/output.md +++ b/docs/output.md @@ -6,8 +6,6 @@ This document describes the output produced by the pipeline. Most of the plots a The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: @@ -48,6 +46,31 @@ The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They m - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - `multiqc_plots/`: directory containing static images from the report in various formats. + - `lanes/` [1] + - `L1/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + - `L2/` + - ... + - `groups/` [1] + - `GROUPNAME1/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + - `GROUPNAME2/` + - ... + - `rundir/` [1] + - `RUNDIR1/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + - `RUNDIR2/` + - ... + + +[1] These files will only be generated if `lane`, `group` or `rundir` were specified for some samples. + From 1c4f6e07019fad57bce6d5d3473155b2f3b364b3 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 12:16:38 +0200 Subject: [PATCH 26/60] Update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 58a53ef4..6430addb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c ### `Added` +- [#13](https://github.com/nf-core/seqinspector/pull/13) Generate reports per run, per project and per lane. + ### `Fixed` ### `Dependencies` From 211bfafb21f6f8c6b208d33875ddc9e45552973a Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 12:20:34 +0200 Subject: [PATCH 27/60] Update samplesheet in readme file --- README.md | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index abe0d4ef..edc54ae5 100644 --- a/README.md +++ b/README.md @@ -39,26 +39,19 @@ > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - - Now, you can run the pipeline using: - - ```bash nextflow run nf-core/seqinspector \ -profile \ From df4e9cbcbd1f62990f44dd90d9eb0a2990d0c657 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 12:43:27 +0200 Subject: [PATCH 28/60] Run prettier --- docs/output.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index f83e1fa0..7af7806c 100644 --- a/docs/output.md +++ b/docs/output.md @@ -68,10 +68,8 @@ The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They m - `RUNDIR2/` - ... - [1] These files will only be generated if `lane`, `group` or `rundir` were specified for some samples. - [MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. From 2303db9a4c28fe0b5a907ab73bc3946f3e3daa3e Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar <89784800+kedhammar@users.noreply.github.com> Date: Thu, 16 May 2024 16:00:32 +0200 Subject: [PATCH 29/60] Use testdata base path param in tests/MiSeq.main.nf.test Co-authored-by: Matthias Zepper <6963520+MatthiasZepper@users.noreply.github.com> --- tests/MiSeq.main.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/MiSeq.main.nf.test b/tests/MiSeq.main.nf.test index 1e72de95..a3dc0a5a 100644 --- a/tests/MiSeq.main.nf.test +++ b/tests/MiSeq.main.nf.test @@ -10,7 +10,7 @@ nextflow_pipeline { when { params { outdir = "$outputDir" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv" + input = params.pipelines_testdata_base_path + "seqinspector/testdata/MiSeq/samplesheet.csv" } } From 1ea8ac0dac6c0cb11e098beb3c516ba0bb6cdc49 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar <89784800+kedhammar@users.noreply.github.com> Date: Thu, 16 May 2024 16:01:00 +0200 Subject: [PATCH 30/60] Use testdata base path param in tests/NovaSeq6000.main.nf.test Co-authored-by: Matthias Zepper <6963520+MatthiasZepper@users.noreply.github.com> --- tests/NovaSeq6000.main.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/NovaSeq6000.main.nf.test b/tests/NovaSeq6000.main.nf.test index f5de9aa3..bc687abe 100644 --- a/tests/NovaSeq6000.main.nf.test +++ b/tests/NovaSeq6000.main.nf.test @@ -10,7 +10,7 @@ nextflow_pipeline { when { params { outdir = "$outputDir" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/NovaSeq6000/samplesheet.csv" + input = params.pipelines_testdata_base_path + "seqinspector/testdata/NovaSeq6000/samplesheet.csv" } } From 048765fd18409769442915dcd403da682dfd852f Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar <89784800+kedhammar@users.noreply.github.com> Date: Thu, 16 May 2024 16:01:20 +0200 Subject: [PATCH 31/60] Use testdata base path param in tests/PromethION.main.nf.test Co-authored-by: Matthias Zepper <6963520+MatthiasZepper@users.noreply.github.com> --- tests/PromethION.main.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/PromethION.main.nf.test b/tests/PromethION.main.nf.test index de2beb35..d1969f3e 100644 --- a/tests/PromethION.main.nf.test +++ b/tests/PromethION.main.nf.test @@ -10,7 +10,7 @@ nextflow_pipeline { when { params { outdir = "$outputDir" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/PromethION/samplesheet.csv" + input = params.pipelines_testdata_base_path + "seqinspector/testdata/PromethION/samplesheet.csv" } } From 4329bb97c229bcb4d869e3c2d88af08cab730b37 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 17 May 2024 11:38:29 +0200 Subject: [PATCH 32/60] Make the tests work with 'pipelines_testdata_base_path' parameter. --- tests/MiSeq.main.nf.test | 2 +- tests/MiSeq.main.nf.test.config | 7 +++++++ tests/NovaSeq6000.main.nf.test | 2 +- tests/NovaSeq6000.main.nf.test.config | 7 +++++++ tests/PromethION.main.nf.test | 2 +- tests/PromethION.main.nf.test.config | 7 +++++++ tests/nextflow.config | 5 +++++ 7 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 tests/MiSeq.main.nf.test.config create mode 100644 tests/NovaSeq6000.main.nf.test.config create mode 100644 tests/PromethION.main.nf.test.config diff --git a/tests/MiSeq.main.nf.test b/tests/MiSeq.main.nf.test index a3dc0a5a..bfabd954 100644 --- a/tests/MiSeq.main.nf.test +++ b/tests/MiSeq.main.nf.test @@ -8,9 +8,9 @@ nextflow_pipeline { test("MiSeq data test") { when { + config "./MiSeq.main.nf.test.config" params { outdir = "$outputDir" - input = params.pipelines_testdata_base_path + "seqinspector/testdata/MiSeq/samplesheet.csv" } } diff --git a/tests/MiSeq.main.nf.test.config b/tests/MiSeq.main.nf.test.config new file mode 100644 index 00000000..073a9774 --- /dev/null +++ b/tests/MiSeq.main.nf.test.config @@ -0,0 +1,7 @@ +// Load the basic test config +includeConfig 'nextflow.config' + +// Load the correct samplesheet for that test +params { + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/MiSeq/samplesheet.csv' +} diff --git a/tests/NovaSeq6000.main.nf.test b/tests/NovaSeq6000.main.nf.test index bc687abe..174e215d 100644 --- a/tests/NovaSeq6000.main.nf.test +++ b/tests/NovaSeq6000.main.nf.test @@ -8,9 +8,9 @@ nextflow_pipeline { test("NovaSeq6000 data test") { when { + config "./NovaSeq6000.main.nf.test.config" params { outdir = "$outputDir" - input = params.pipelines_testdata_base_path + "seqinspector/testdata/NovaSeq6000/samplesheet.csv" } } diff --git a/tests/NovaSeq6000.main.nf.test.config b/tests/NovaSeq6000.main.nf.test.config new file mode 100644 index 00000000..cad5edd9 --- /dev/null +++ b/tests/NovaSeq6000.main.nf.test.config @@ -0,0 +1,7 @@ +// Load the basic test config +includeConfig 'nextflow.config' + +// Load the correct samplesheet for that test +params { + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/NovaSeq6000/samplesheet.csv' +} diff --git a/tests/PromethION.main.nf.test b/tests/PromethION.main.nf.test index d1969f3e..39284786 100644 --- a/tests/PromethION.main.nf.test +++ b/tests/PromethION.main.nf.test @@ -8,9 +8,9 @@ nextflow_pipeline { test("PromethION data test") { when { + config "./PromethION.main.nf.test.config" params { outdir = "$outputDir" - input = params.pipelines_testdata_base_path + "seqinspector/testdata/PromethION/samplesheet.csv" } } diff --git a/tests/PromethION.main.nf.test.config b/tests/PromethION.main.nf.test.config new file mode 100644 index 00000000..e1498a49 --- /dev/null +++ b/tests/PromethION.main.nf.test.config @@ -0,0 +1,7 @@ +// Load the basic test config +includeConfig 'nextflow.config' + +// Load the correct samplesheet for that test +params { + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/PromethION/samplesheet.csv' +} diff --git a/tests/nextflow.config b/tests/nextflow.config index 422545be..8d9ef461 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -13,4 +13,9 @@ params { max_memory = '3.GB' max_time = '2.h' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + + validationSchemaIgnoreParams = 'genomes,igenomes_base,pipelines_testdata_base_path' + + } From 3ff850395b52b9ee509901f203c301fd40aa9844 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 17 May 2024 13:28:56 +0200 Subject: [PATCH 33/60] update snapshots, add nf-test.log to gitignore --- .gitignore | 1 + tests/MiSeq.main.nf.test.snap | 12 ++++++++---- tests/NovaSeq6000.main.nf.test.snap | 18 +++++++++++------- tests/PromethION.main.nf.test.snap | 10 +++++++--- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 089a4079..72277655 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ testing/ testing* *.pyc .nf-test +.nf-test.log diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap index e222b515..96896382 100644 --- a/tests/MiSeq.main.nf.test.snap +++ b/tests/MiSeq.main.nf.test.snap @@ -4,16 +4,20 @@ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921" ], - "timestamp": "2024-05-08T16:29:33.284003" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-17T12:59:21.531493" } } \ No newline at end of file diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap index a406b891..cbb75383 100644 --- a/tests/NovaSeq6000.main.nf.test.snap +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -4,28 +4,32 @@ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,9b4fd8a6d6e8a9acabecd592f633472e", "multiqc_general_stats.txt:md5,8237b88ceb018d3cb1edcea62d10f4a2", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,9246a5b6b7b0410c79049fc3dbd08e92", "multiqc_general_stats.txt:md5,44328403f423c6f5ac9ee0a8a01e6725", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,84820276fae52d4d492831280ae6207c", "multiqc_general_stats.txt:md5,dd07799e5e4b9d389f9de49a852c3363", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,59ae05d89453da6f57010ffb6466f902", "multiqc_general_stats.txt:md5,e4629691992bfe639c01a84b90563334", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921" ], - "timestamp": "2024-05-14T10:17:35.440827" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-17T13:02:20.874181" } } \ No newline at end of file diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap index 24e52bd1..951c5550 100644 --- a/tests/PromethION.main.nf.test.snap +++ b/tests/PromethION.main.nf.test.snap @@ -4,12 +4,16 @@ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921" ], - "timestamp": "2024-05-08T17:17:23.151259" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-17T12:58:02.572837" } } \ No newline at end of file From 8ac4d76770515159a23f746d4d3fa8c4d3e06ce1 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 17 May 2024 15:49:01 +0200 Subject: [PATCH 34/60] visualize example run dir corresponsing to samplesheet --- docs/usage.md | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 57314973..7a4ec735 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,7 +10,7 @@ ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. ```bash --input '[path to samplesheet file]' @@ -18,15 +18,25 @@ You will need to create a samplesheet with information about the samples you wou ### Full samplesheet +The following simple run dir structure... + +``` +run_dir +├── sample1_lane1_group1_r1.fq.gz +├── sample2_lane1_group1_r1.fq.gz +├── sample3_lane2_group2_r1.fq.gz +└── sample4_lane2_group3_r1.fq.gz +``` + +...would be represented in the following samplesheet (shown as .tsv for readability) + ```csv title="samplesheet.csv" -sample,lane,group,fastq_1,fastq_2,rundir -CONTROL_REP1,1,,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX -CONTROL_REP2,1,,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX -CONTROL_REP3,1,,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX -TREATMENT_REP1,2,GROUP1,AEG588A4_S4_L003_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX -TREATMENT_REP2,2,GROUP1,AEG588A5_S5_L003_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX -TREATMENT_REP3,2,GROUP2,AEG588A6_S6_L003_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX -TREATMENT_REP3,2,GROUP2,AEG588A6_S6_L004_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX +sample lane group fastq_1 fastq_2 rundir +sample1 1 group1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir +sample2 1 group1 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir +sample3 2 group2 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir +sample4 2 group3 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir + ``` | Column | Description | @@ -34,11 +44,11 @@ TREATMENT_REP3,2,GROUP2,AEG588A6_S6_L004_R1_001.fastq.gz,,200624_A00834_0183_BHM | `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | | `lane` | Lane where the sample was processed on an Illumina instrument (optional). | | `group` | Group the sample belongs too, useful when several groups are pooled together (optional). | -| `rundir` | Path to the runfolder containing extra information about the sequencing run (optional) . | | `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | | `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz" (optional). | +| `rundir` | Path to the runfolder containing extra information about the sequencing run (optional) . | -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +Another [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. ## Running the pipeline From 84c9b3d95c693b722a3e61752ba498b05fc08e57 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 20 May 2024 12:41:50 +0200 Subject: [PATCH 35/60] naming fixes --- main.nf | 4 ++-- workflows/seqinspector.nf | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/main.nf b/main.nf index 9de4f94e..fb00bd5d 100644 --- a/main.nf +++ b/main.nf @@ -59,8 +59,8 @@ workflow NFCORE_SEQINSPECTOR { emit: global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html - lane_reports = SEQINSPECTOR.out.lane_reports // channel: /path/to/multiqc_report.html - group_report = SEQINSPECTOR.out.group_reports // channel: /path/to/multiqc_report.html + lane_reports = SEQINSPECTOR.out.lane_reports // channel: /path/to/multiqc_report.html + group_reports = SEQINSPECTOR.out.group_reports // channel: /path/to/multiqc_report.html rundir_report = SEQINSPECTOR.out.rundir_reports // channel: /path/to/multiqc_report.html } diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 0eb375dd..50fa481f 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -6,7 +6,7 @@ include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_LANE } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_GROUP } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_RUNDIR } from '../modules/nf-core/multiqc/main' @@ -84,7 +84,7 @@ workflow SEQINSPECTOR { ) ) - MULTIQC ( + MULTIQC_GLOBAL ( ch_multiqc_files .map { meta, file -> file } .mix(ch_multiqc_extra_files) @@ -97,9 +97,8 @@ workflow SEQINSPECTOR { // Generate reports by lane multiqc_extra_files_per_lane = ch_multiqc_files .filter { meta, sample -> meta.lane } - .map { meta, sample -> meta.lane } + .map { meta, sample -> [lane: meta.lane] } .unique() - .map { lane -> [lane:lane] } .combine(ch_multiqc_extra_files) lane_mqc_files = ch_multiqc_files @@ -209,7 +208,7 @@ workflow SEQINSPECTOR { ) emit: - global_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + global_report = MULTIQC_GLOBAL.out.report.toList() // channel: /path/to/multiqc_report.html lane_reports = MULTIQC_PER_LANE.out.report.toList() // channel: [ /path/to/multiqc_report.html ] group_reports = MULTIQC_PER_GROUP.out.report.toList() // channel: [ /path/to/multiqc_report.html ] rundir_reports = MULTIQC_PER_RUNDIR.out.report.toList() // channel: [ /path/to/multiqc_report.html ] From aaf17b68f90dcaa123a9bc0c7807c7bba0dc2fbd Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 20 May 2024 12:44:01 +0200 Subject: [PATCH 36/60] nf-core sync --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- .github/workflows/linting.yml | 19 ++++++++++--------- .github/workflows/linting_comment.yml | 2 +- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 6de151f7..5d27c53a 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -18,7 +18,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/seqi - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/seqinspector/tree/master/.github/CONTRIBUTING.md) - [ ] If necessary, also make a PR on the nf-core/seqinspector _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 1fcafe88..073e1876 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,12 +14,13 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - - name: Set up Python 3.12 - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + - name: Set up Python 3.11 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: - python-version: "3.12" + python-version: 3.11 + cache: "pip" - name: Install pre-commit run: pip install pre-commit @@ -31,14 +32,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v2 + uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: - python-version: "3.12" + python-version: "3.11" architecture: "x64" - name: Install dependencies @@ -59,7 +60,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 + uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 40acc23f..b706875f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 + uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 with: workflow: linting.yml workflow_conclusion: completed From e6dfea9ca1fbbb0019e438c6394d62daf1f2cda8 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 20 May 2024 12:44:54 +0200 Subject: [PATCH 37/60] nf-core fixes --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- .github/workflows/linting.yml | 19 +++++++++---------- .github/workflows/linting_comment.yml | 2 +- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 5d27c53a..6de151f7 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -18,7 +18,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/seqi - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/seqinspector/tree/master/.github/CONTRIBUTING.md) - [ ] If necessary, also make a PR on the nf-core/seqinspector _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 073e1876..1fcafe88 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,13 +14,12 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - name: Set up Python 3.11 - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: 3.11 - cache: "pip" + python-version: "3.12" - name: Install pre-commit run: pip install pre-commit @@ -32,14 +31,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.11" + python-version: "3.12" architecture: "x64" - name: Install dependencies @@ -60,7 +59,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index b706875f..40acc23f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 with: workflow: linting.yml workflow_conclusion: completed From 02affeb602655023888b330b0402bcbe2a0e613a Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Thu, 30 May 2024 13:18:36 +0200 Subject: [PATCH 38/60] Improve publishDir logic --- conf/modules.config | 56 +++++++++++++++++++++++++---- tests/MiSeq.main.nf.test.snap | 12 +++---- tests/NovaSeq6000.main.nf.test.snap | 18 ++++------ tests/PromethION.main.nf.test.snap | 10 ++---- 4 files changed, 64 insertions(+), 32 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index da3d2ca2..f7e78457 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -22,11 +22,20 @@ process { ext.args = '--quiet' } - withName: 'MULTIQC' { + withName: 'MULTIQC_GLOBAL' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'MULTIQC_PER_LANE' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc/lanes" }, + mode: params.publish_dir_mode, saveAs: { filename -> switch (filename) { @@ -38,27 +47,62 @@ process { def new_filename = filename.replaceFirst( "(?.*)\\[LANE:${lane}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') - "lanes/L${lane}/${new_filename}" + "L${lane}/${new_filename}" + break + default: + filename + } + } + ] + } + + withName: 'MULTIQC_PER_GROUP' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc/groups" }, + mode: params.publish_dir_mode, + saveAs: { + filename -> + switch (filename) { + case 'versions.yml': + null break case ~/\[GROUP:.+\]_multiqc_(report\.html|plots|data)/: def group = (filename =~ /\[GROUP:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] def new_filename = filename.replaceFirst( "(?.*)\\[GROUP:${group}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') - "groups/${group}/${new_filename}" + "${group}/${new_filename}" + break + default: + filename + } + } + ] + } + + withName: 'MULTIQC_PER_RUNDIR' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc/rundirss" }, + mode: params.publish_dir_mode, + saveAs: { + filename -> + switch (filename) { + case 'versions.yml': + null break case ~/\[RUNDIR:.+\]_multiqc_(report\.html|plots|data)/: def rundir = (filename =~ /\[RUNDIR:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] def new_filename = filename.replaceFirst( "(?.*)\\[RUNDIR:${rundir}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') - "rundirs/${rundir}/${new_filename}" + "${rundir}/${new_filename}" break default: filename - } + } } ] } - } diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap index 96896382..87c1a561 100644 --- a/tests/MiSeq.main.nf.test.snap +++ b/tests/MiSeq.main.nf.test.snap @@ -4,20 +4,16 @@ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921" + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-05-17T12:59:21.531493" + "timestamp": "2024-05-30T13:14:20.263485" } } \ No newline at end of file diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap index cbb75383..22600251 100644 --- a/tests/NovaSeq6000.main.nf.test.snap +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -4,32 +4,28 @@ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,9b4fd8a6d6e8a9acabecd592f633472e", "multiqc_general_stats.txt:md5,8237b88ceb018d3cb1edcea62d10f4a2", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,9246a5b6b7b0410c79049fc3dbd08e92", "multiqc_general_stats.txt:md5,44328403f423c6f5ac9ee0a8a01e6725", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,84820276fae52d4d492831280ae6207c", "multiqc_general_stats.txt:md5,dd07799e5e4b9d389f9de49a852c3363", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,59ae05d89453da6f57010ffb6466f902", "multiqc_general_stats.txt:md5,e4629691992bfe639c01a84b90563334", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921" + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-05-17T13:02:20.874181" + "timestamp": "2024-05-30T13:13:49.062282" } } \ No newline at end of file diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap index 951c5550..0ac213c1 100644 --- a/tests/PromethION.main.nf.test.snap +++ b/tests/PromethION.main.nf.test.snap @@ -4,16 +4,12 @@ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921" + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-05-17T12:58:02.572837" + "timestamp": "2024-05-30T13:14:40.99246" } } \ No newline at end of file From 8b83f13f95c26d7f336d53fc4f9571c14ab3b247 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 27 Aug 2024 13:17:02 +0200 Subject: [PATCH 39/60] Implement tagging system --- README.md | 4 +- assets/samplesheet.csv | 10 +- assets/schema_input.json | 16 +-- conf/modules.config | 64 +-------- docs/output.md | 45 +++--- docs/usage.md | 13 +- main.nf | 6 +- .../main.nf | 4 +- tests/MiSeq.main.nf.test | 18 +-- tests/MiSeq.main.nf.test.snap | 14 +- tests/NovaSeq6000.main.nf.test | 53 ++++--- tests/NovaSeq6000.main.nf.test.snap | 26 ++-- tests/PromethION.main.nf.test | 13 +- tests/PromethION.main.nf.test.snap | 10 +- workflows/seqinspector.nf | 131 ++++-------------- 15 files changed, 126 insertions(+), 301 deletions(-) diff --git a/README.md b/README.md index 7efdd3e9..31018e7b 100644 --- a/README.md +++ b/README.md @@ -44,8 +44,8 @@ First, prepare a samplesheet with your input data that looks as follows: `samplesheet.csv`: ```csv -sample,lane,group,fastq_1,fastq_2,rundir -CONTROL_REP1,1,GROUP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX +sample,fastq_1,fastq_2,rundir,tags +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX,"patient1" ``` Each row represents a fastq file (single-end) or a pair of fastq files (paired end). diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index fef3b4e6..00019e58 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,7 @@ -sample,lane,group,fastq_1,fastq_2,rundir -SAMPLE_PAIRED_END,1,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir -SAMPLE_SINGLE_END,2,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir +sample,fastq_1,fastq_2,rundir,tags +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir,"paired_sample,cohort1" +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A2_S2_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A2_S2_L002_R2_001.fastq.gz,/path/to/rundir,"paired_sample,cohort1" +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A3_S3_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A3_S3_L002_R2_001.fastq.gz,/path/to/rundir,"paired_sample,cohort2" +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,"patient1" +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,"patient2" +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,"patient3" diff --git a/assets/schema_input.json b/assets/schema_input.json index c9800d5d..62922b79 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -13,18 +13,6 @@ "errorMessage": "Sample name must be provided and cannot contain spaces", "meta": ["sample"] }, - "lane": { - "type": "integer", - "pattern": "^\\d+$", - "errorMessage": "Lane ID must be a number", - "meta": ["lane"] - }, - "group": { - "type": "string", - "pattern": "^\\S+$", - "errorMessage": "Group ID cannot contain spaces", - "meta": ["group"] - }, "fastq_1": { "type": "string", "format": "file-path", @@ -45,6 +33,10 @@ "exists": true, "errorMessage": "Run directory must be a path", "meta": ["rundir"] + }, + "tags": { + "type": "string", + "meta": ["tags"] } }, "required": ["sample", "fastq_1"], diff --git a/conf/modules.config b/conf/modules.config index f7e78457..c8838224 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -25,16 +25,16 @@ process { withName: 'MULTIQC_GLOBAL' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ - path: { "${params.outdir}/multiqc" }, + path: { "${params.outdir}/multiqc/global_report" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'MULTIQC_PER_LANE' { + withName: 'MULTIQC_PER_TAG' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ - path: { "${params.outdir}/multiqc/lanes" }, + path: { "${params.outdir}/multiqc/group_reports" }, mode: params.publish_dir_mode, saveAs: { filename -> @@ -42,62 +42,12 @@ process { case 'versions.yml': null break - case ~/\[LANE:\d+\]_multiqc_(report\.html|plots|data)/: - def lane = (filename =~ /\[LANE:(\d+)\]_multiqc_(report\.html|plots|data)/)[0][1] + case ~/\[TAG:.+\]_multiqc_(report\.html|plots|data)/: + def tag = (filename =~ /\[TAG:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] def new_filename = filename.replaceFirst( - "(?.*)\\[LANE:${lane}\\]_(?multiqc_(report\\.html|plots|data).*)", + "(?.*)\\[TAG:${tag}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') - "L${lane}/${new_filename}" - break - default: - filename - } - } - ] - } - - withName: 'MULTIQC_PER_GROUP' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } - publishDir = [ - path: { "${params.outdir}/multiqc/groups" }, - mode: params.publish_dir_mode, - saveAs: { - filename -> - switch (filename) { - case 'versions.yml': - null - break - case ~/\[GROUP:.+\]_multiqc_(report\.html|plots|data)/: - def group = (filename =~ /\[GROUP:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] - def new_filename = filename.replaceFirst( - "(?.*)\\[GROUP:${group}\\]_(?multiqc_(report\\.html|plots|data).*)", - '${prefix}${suffix}') - "${group}/${new_filename}" - break - default: - filename - } - } - ] - } - - withName: 'MULTIQC_PER_RUNDIR' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } - publishDir = [ - path: { "${params.outdir}/multiqc/rundirss" }, - mode: params.publish_dir_mode, - saveAs: { - filename -> - switch (filename) { - case 'versions.yml': - null - break - case ~/\[RUNDIR:.+\]_multiqc_(report\.html|plots|data)/: - def rundir = (filename =~ /\[RUNDIR:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] - def new_filename = filename.replaceFirst( - "(?.*)\\[RUNDIR:${rundir}\\]_(?multiqc_(report\\.html|plots|data).*)", - '${prefix}${suffix}') - "${rundir}/${new_filename}" + "${tag}/${new_filename}" break default: filename diff --git a/docs/output.md b/docs/output.md index 7af7806c..15c29ce2 100644 --- a/docs/output.md +++ b/docs/output.md @@ -39,36 +39,29 @@ The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They m ### MultiQC +nf-core/seqinspector will generate the following MultiQC reports: + +- one global reports including all the samples listed in the samplesheet +- one group report per unique tag. These reports compile samples that share the same tag. +
Output files - `multiqc/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. - - `lanes/` [1] - - `L1/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. - - `L2/` - - ... - - `groups/` [1] - - `GROUPNAME1/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. - - `GROUPNAME2/` - - ... - - `rundir/` [1] - - `RUNDIR1/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. - - `RUNDIR2/` - - ... - -[1] These files will only be generated if `lane`, `group` or `rundir` were specified for some samples. + - `global_report` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + - `group_reports` + - `tag1/` + - `multiqc_report.html` + - `multiqc_data/` + - `multiqc_plots/` + - `tag2/` + - `multiqc_report.html` + - `multiqc_data/` + - `multiqc_plots/` + - ...
diff --git a/docs/usage.md b/docs/usage.md index 7a4ec735..42d596bb 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -31,22 +31,21 @@ run_dir ...would be represented in the following samplesheet (shown as .tsv for readability) ```csv title="samplesheet.csv" -sample lane group fastq_1 fastq_2 rundir -sample1 1 group1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir -sample2 1 group1 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir -sample3 2 group2 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir -sample4 2 group3 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir +sample fastq_1 fastq_2 rundir tags +sample1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir "cohort1,patient1" +sample2 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir "cohort1,patient2" +sample3 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir "cohort1,patient3" +sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir "control" ``` | Column | Description | | --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `lane` | Lane where the sample was processed on an Illumina instrument (optional). | -| `group` | Group the sample belongs too, useful when several groups are pooled together (optional). | | `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | | `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz" (optional). | | `rundir` | Path to the runfolder containing extra information about the sequencing run (optional) . | +| `tags` | Comma-separated list of tags to group samples in special reports. | Another [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. diff --git a/main.nf b/main.nf index fb00bd5d..a55408ef 100644 --- a/main.nf +++ b/main.nf @@ -58,10 +58,8 @@ workflow NFCORE_SEQINSPECTOR { ) emit: - global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html - lane_reports = SEQINSPECTOR.out.lane_reports // channel: /path/to/multiqc_report.html - group_reports = SEQINSPECTOR.out.group_reports // channel: /path/to/multiqc_report.html - rundir_report = SEQINSPECTOR.out.rundir_reports // channel: /path/to/multiqc_report.html + global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html + grouped_reports = SEQINSPECTOR.out.grouped_reports // channel: /path/to/multiqc_report.html } /* diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index ea9c9b8a..afcfc68b 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -84,8 +84,8 @@ workflow PIPELINE_INITIALISATION { .fromSamplesheet("input") // Validates samplesheet against $projectDir/assets/schema_input.json. Path to validation schema is defined by $projectDir/nextflow_schema.json .map { meta, fastq_1, fastq_2 -> - def id_string = "${meta.sample}_${meta.group ?: "ungrouped"}_${meta.lane}" - def updated_meta = meta + [ id: id_string ] + def tags = meta.tags ? meta.tags.tokenize(",") : [] + def updated_meta = meta + [ id:meta.sample, tags:tags ] if (!fastq_2) { return [ updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] } else { diff --git a/tests/MiSeq.main.nf.test b/tests/MiSeq.main.nf.test index bfabd954..8fbff4a3 100644 --- a/tests/MiSeq.main.nf.test +++ b/tests/MiSeq.main.nf.test @@ -18,20 +18,10 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot( - path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_software_versions.txt"), ).match() } ) diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap index 87c1a561..26f0c2a8 100644 --- a/tests/MiSeq.main.nf.test.snap +++ b/tests/MiSeq.main.nf.test.snap @@ -2,18 +2,10 @@ "MiSeq data test": { "content": [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", - "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", - "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", - "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", + "multiqc_fastqc.txt:md5,e46e7baa8f57d4cf54d973925b5eadf9", + "multiqc_general_stats.txt:md5,a5e626a2e1a3c986092e4f89091cc41c", "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" ], - "timestamp": "2024-05-30T13:14:20.263485" + "timestamp": "2024-08-26T17:55:16.152573" } } \ No newline at end of file diff --git a/tests/NovaSeq6000.main.nf.test b/tests/NovaSeq6000.main.nf.test index 174e215d..f93891bf 100644 --- a/tests/NovaSeq6000.main.nf.test +++ b/tests/NovaSeq6000.main.nf.test @@ -18,35 +18,30 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot( - path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/cohort1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/cohort1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/cohort1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/cohort1/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/patient1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/patient1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/patient1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/patient1/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/patient2/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/patient2/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/patient2/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/patient2/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_software_versions.txt"), ).match() }, ) diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap index 22600251..f5c4776b 100644 --- a/tests/NovaSeq6000.main.nf.test.snap +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -2,30 +2,26 @@ "NovaSeq6000 data test": { "content": [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", - "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", + "multiqc_fastqc.txt:md5,e8ed6dca928396b8873d24e60ea1a133", + "multiqc_general_stats.txt:md5,fd9d46c5b441908cd07e5373d116db17", "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,9b4fd8a6d6e8a9acabecd592f633472e", - "multiqc_general_stats.txt:md5,8237b88ceb018d3cb1edcea62d10f4a2", + "multiqc_fastqc.txt:md5,ff9b31c6024f11a8135456e7ea01fc8f", + "multiqc_general_stats.txt:md5,f36bd6e27e92c25be076efea411d3a8e", "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,9246a5b6b7b0410c79049fc3dbd08e92", - "multiqc_general_stats.txt:md5,44328403f423c6f5ac9ee0a8a01e6725", + "multiqc_fastqc.txt:md5,62d51280dcd7634f6bed95ffe0d8dab8", + "multiqc_general_stats.txt:md5,2012002b6a057be981a97fcc96142a6c", "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,84820276fae52d4d492831280ae6207c", - "multiqc_general_stats.txt:md5,dd07799e5e4b9d389f9de49a852c3363", + "multiqc_fastqc.txt:md5,63749e803a2d5fc7ecc7cd93fa68df1f", + "multiqc_general_stats.txt:md5,656931993032400dea3d441b8b61b4d2", "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,59ae05d89453da6f57010ffb6466f902", - "multiqc_general_stats.txt:md5,e4629691992bfe639c01a84b90563334", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", - "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", + "multiqc_fastqc.txt:md5,91cc62e1b4059bdbe4b88affa43378af", + "multiqc_general_stats.txt:md5,6e500f82550e00b07c3e7aa1d46ab9e9", "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" ], - "timestamp": "2024-05-30T13:13:49.062282" + "timestamp": "2024-08-26T18:03:33.089142" } } \ No newline at end of file diff --git a/tests/PromethION.main.nf.test b/tests/PromethION.main.nf.test index 39284786..8fec8b33 100644 --- a/tests/PromethION.main.nf.test +++ b/tests/PromethION.main.nf.test @@ -18,15 +18,10 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot( - path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_software_versions.txt"), ).match() }, ) diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap index 0ac213c1..e3b34f23 100644 --- a/tests/PromethION.main.nf.test.snap +++ b/tests/PromethION.main.nf.test.snap @@ -2,14 +2,10 @@ "PromethION data test": { "content": [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", - "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", - "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", + "multiqc_fastqc.txt:md5,cecee3cb343c75c80180d3169c6f3ea1", + "multiqc_general_stats.txt:md5,e63c25089c4fc10618414ba2254d18c7", "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" ], - "timestamp": "2024-05-30T13:14:40.99246" + "timestamp": "2024-08-26T17:55:38.755385" } } \ No newline at end of file diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 50fa481f..e37df509 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -7,9 +7,7 @@ include { FASTQC } from '../modules/nf-core/fastqc/main' include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main' -include { MULTIQC as MULTIQC_PER_LANE } from '../modules/nf-core/multiqc/main' -include { MULTIQC as MULTIQC_PER_GROUP } from '../modules/nf-core/multiqc/main' -include { MULTIQC as MULTIQC_PER_RUNDIR } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_TAG } from '../modules/nf-core/multiqc/main' include { paramsSummaryMap } from 'plugin/nf-validation' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' @@ -94,124 +92,51 @@ workflow SEQINSPECTOR { ch_multiqc_logo.toList() ) - // Generate reports by lane - multiqc_extra_files_per_lane = ch_multiqc_files - .filter { meta, sample -> meta.lane } - .map { meta, sample -> [lane: meta.lane] } + ch_tags = ch_multiqc_files + .map { meta, sample -> meta.tags } + .flatten() .unique() - .combine(ch_multiqc_extra_files) - lane_mqc_files = ch_multiqc_files - .filter { meta, sample -> meta.lane } - .mix(multiqc_extra_files_per_lane) - .map { meta, sample -> [ "[LANE:${meta.lane}]", meta, sample ] } - .groupTuple() - .tap { mqc_by_lane } - .collectFile{ - lane, meta, samples -> [ - "${lane}_multiqc_extra_config.yml", - """ - |output_fn_name: \"${lane}_multiqc_report.html\" - |data_dir_name: \"${lane}_multiqc_data\" - |plots_dir_name: \"${lane}_multiqc_plots\" - """.stripMargin() - ] - } - .map { file -> [ (file =~ /(\[LANE:.+\])/)[0][1], file ] } - .join(mqc_by_lane) - .multiMap { lane, config, meta , samples_per_lane -> - samples_per_lane: samples_per_lane - config: config - } - - MULTIQC_PER_LANE( - lane_mqc_files.samples_per_lane, - ch_multiqc_config.toList(), - lane_mqc_files.config, - ch_multiqc_logo.toList() - ) - - // Generate reports by group - multiqc_extra_files_per_group = ch_multiqc_files - .filter { meta, sample -> meta.group } - .map { meta, sample -> meta.group } - .unique() - .map { group -> [group:group] } - .combine(ch_multiqc_extra_files) - - group_mqc_files = ch_multiqc_files - .filter { meta, sample -> meta.group } - .mix(multiqc_extra_files_per_group) - .map { meta, sample -> [ "[GROUP:${meta.group}]", meta, sample ] } - .groupTuple() - .tap { mqc_by_group } - .collectFile{ - group, meta, samples -> [ - "${group}_multiqc_extra_config.yml", - """ - |output_fn_name: \"${group}_multiqc_report.html\" - |data_dir_name: \"${group}_multiqc_data\" - |plots_dir_name: \"${group}_multiqc_plots\" - """.stripMargin() - ] - } - .map { file -> [ (file =~ /(\[GROUP:.+\])/)[0][1], file ] } - .join(mqc_by_group) - .multiMap { group, config, meta , samples_per_group -> - samples_per_group: samples_per_group - config: config - } - - MULTIQC_PER_GROUP( - group_mqc_files.samples_per_group, - ch_multiqc_config.toList(), - group_mqc_files.config, - ch_multiqc_logo.toList() - ) - - // Generate reports by rundir - multiqc_extra_files_per_rundir = ch_multiqc_files - .filter { meta, sample -> meta.rundir } - .map { meta, sample -> meta.rundir } - .unique() - .map { rundir -> [rundir:rundir] } + multiqc_extra_files_per_tag = ch_tags .combine(ch_multiqc_extra_files) - rundir_mqc_files = ch_multiqc_files - .filter { meta, sample -> meta.rundir } - .mix(multiqc_extra_files_per_rundir) - .map { meta, sample -> [ "[RUNDIR:${meta.rundir.name}]", meta, sample ] } + // Group samples by tag + tagged_mqc_files = ch_tags + .combine(ch_multiqc_files) + .filter { sample_tag, meta, sample -> sample_tag in meta.tags } + .map { sample_tag, meta, sample -> [sample_tag, sample] } + .mix(multiqc_extra_files_per_tag) .groupTuple() - .tap { mqc_by_rundir } - .collectFile{ - rundir, meta, samples -> [ - "${rundir}_multiqc_extra_config.yml", + .tap { mqc_by_tag } + .collectFile { + sample_tag, samples -> + def prefix_tag = "[TAG:${sample_tag}]" + [ + "${prefix_tag}_multiqc_extra_config.yml", """ - |output_fn_name: \"${rundir}_multiqc_report.html\" - |data_dir_name: \"${rundir}_multiqc_data\" - |plots_dir_name: \"${rundir}_multiqc_plots\" + |output_fn_name: \"${prefix_tag}_multiqc_report.html\" + |data_dir_name: \"${prefix_tag}_multiqc_data\" + |plots_dir_name: \"${prefix_tag}_multiqc_plots\" """.stripMargin() ] } - .map { file -> [ (file =~ /(\[RUNDIR:.+\])/)[0][1], file ] } - .join(mqc_by_rundir) - .multiMap { rundir, config, meta , samples_per_rundir -> - samples_per_rundir: samples_per_rundir + .map { file -> [ (file =~ /\[TAG:(.+)\]/)[0][1], file ] } + .join(mqc_by_tag) + .multiMap { sample_tag, config, samples -> + samples_per_tag: samples config: config } - MULTIQC_PER_RUNDIR( - rundir_mqc_files.samples_per_rundir, + MULTIQC_PER_TAG( + tagged_mqc_files.samples_per_tag, ch_multiqc_config.toList(), - rundir_mqc_files.config, + tagged_mqc_files.config, ch_multiqc_logo.toList() ) emit: global_report = MULTIQC_GLOBAL.out.report.toList() // channel: /path/to/multiqc_report.html - lane_reports = MULTIQC_PER_LANE.out.report.toList() // channel: [ /path/to/multiqc_report.html ] - group_reports = MULTIQC_PER_GROUP.out.report.toList() // channel: [ /path/to/multiqc_report.html ] - rundir_reports = MULTIQC_PER_RUNDIR.out.report.toList() // channel: [ /path/to/multiqc_report.html ] + grouped_reports = MULTIQC_PER_TAG.out.report.toList() // channel: [ /path/to/multiqc_report.html ] versions = ch_versions // channel: [ path(versions.yml) ] } From dabccc2e556be1860eb990d1914208a8e6901cef Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 27 Aug 2024 15:08:20 +0200 Subject: [PATCH 40/60] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6430addb..3aa5bacc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c ### `Added` +- [#20](https://github.com/nf-core/seqinspector/pull/20) Use tags to generate group reports - [#13](https://github.com/nf-core/seqinspector/pull/13) Generate reports per run, per project and per lane. ### `Fixed` From 8bba88869a4f67143639c61c3972599192140db8 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar <89784800+kedhammar@users.noreply.github.com> Date: Tue, 27 Aug 2024 15:52:46 +0200 Subject: [PATCH 41/60] Try adding regex assertion and error for samplesheet tags --- assets/schema_input.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/assets/schema_input.json b/assets/schema_input.json index 62922b79..bde27fa7 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -36,6 +36,8 @@ }, "tags": { "type": "string", + "pattern": "^\\\"([A-Za-z0-9_-]+,\\ ?)*([A-Za-z0-9_-]+)\\\"$", + "errorMessage": "Tags must consist of numbers, letters, underscores or dashes and must be provided as a comma-separated list flanked by a pair of double-quotes, e.g. \"patient_01, lane1, pos-CTRL_2\"." "meta": ["tags"] } }, From 6987e93e0e10b39917d56600410db8a61a8f6ec9 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar <89784800+kedhammar@users.noreply.github.com> Date: Tue, 27 Aug 2024 15:55:26 +0200 Subject: [PATCH 42/60] Add missing comma --- assets/schema_input.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index bde27fa7..d4a6dc70 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -37,7 +37,7 @@ "tags": { "type": "string", "pattern": "^\\\"([A-Za-z0-9_-]+,\\ ?)*([A-Za-z0-9_-]+)\\\"$", - "errorMessage": "Tags must consist of numbers, letters, underscores or dashes and must be provided as a comma-separated list flanked by a pair of double-quotes, e.g. \"patient_01, lane1, pos-CTRL_2\"." + "errorMessage": "Tags must consist of numbers, letters, underscores or dashes and must be provided as a comma-separated list flanked by a pair of double-quotes, e.g. \"patient_01, lane1, pos-CTRL_2\".", "meta": ["tags"] } }, From 363e782a7a68d2364fd73176eb50d24b72c1c02a Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 3 Sep 2024 10:35:51 +0200 Subject: [PATCH 43/60] Change example tags to more neutral ones --- README.md | 2 +- docs/usage.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 31018e7b..679b6f1c 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ First, prepare a samplesheet with your input data that looks as follows: ```csv sample,fastq_1,fastq_2,rundir,tags -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX,"patient1" +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX,"group1" ``` Each row represents a fastq file (single-end) or a pair of fastq files (paired end). diff --git a/docs/usage.md b/docs/usage.md index 42d596bb..621c7c0a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -32,9 +32,9 @@ run_dir ```csv title="samplesheet.csv" sample fastq_1 fastq_2 rundir tags -sample1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir "cohort1,patient1" -sample2 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir "cohort1,patient2" -sample3 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir "cohort1,patient3" +sample1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir "project1,group1" +sample2 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir "project1,group1" +sample3 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir "project1,group2" sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir "control" ``` From c1a21de429b24fde2f0e5ad8f313f7a7d1b468ba Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 3 Sep 2024 12:36:42 +0200 Subject: [PATCH 44/60] Check for tag collisions --- .../utils_nfcore_seqinspector_pipeline/main.nf | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index afcfc68b..bb875af6 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -103,6 +103,20 @@ workflow PIPELINE_INITIALISATION { // } .set { ch_samplesheet } + ch_samplesheet + .map { + meta, fastqs -> meta.tags + } + .flatten() + .unique() + .map { tag_name -> [tag_name.toLowerCase(), tag_name] } + .groupTuple() + .map { + tag_lowercase, tags -> + assert tags.size() == 1 : + "Tag name collision: " + tags.join(", ") + } + emit: samplesheet = ch_samplesheet versions = ch_versions From 8a43efa8ae3f78dd846f440b42934e50bba0ab91 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 3 Sep 2024 12:58:50 +0200 Subject: [PATCH 45/60] Use columns to separate tags --- README.md | 2 +- assets/samplesheet.csv | 12 ++++---- assets/schema_input.json | 4 +-- docs/usage.md | 10 +++---- .../main.nf | 2 +- tests/NovaSeq6000.main.nf.test | 28 +++++++++---------- 6 files changed, 29 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 679b6f1c..d7cd233f 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ First, prepare a samplesheet with your input data that looks as follows: ```csv sample,fastq_1,fastq_2,rundir,tags -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX,"group1" +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX,group1 ``` Each row represents a fastq file (single-end) or a pair of fastq files (paired end). diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 00019e58..ba2542dd 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,7 +1,7 @@ sample,fastq_1,fastq_2,rundir,tags -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir,"paired_sample,cohort1" -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A2_S2_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A2_S2_L002_R2_001.fastq.gz,/path/to/rundir,"paired_sample,cohort1" -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A3_S3_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A3_S3_L002_R2_001.fastq.gz,/path/to/rundir,"paired_sample,cohort2" -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,"patient1" -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,"patient2" -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,"patient3" +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane1 +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A2_S2_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A2_S2_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane1 +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A3_S3_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A3_S3_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane2 +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group1 +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group2 +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group3 diff --git a/assets/schema_input.json b/assets/schema_input.json index d4a6dc70..3569ae2f 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -36,8 +36,8 @@ }, "tags": { "type": "string", - "pattern": "^\\\"([A-Za-z0-9_-]+,\\ ?)*([A-Za-z0-9_-]+)\\\"$", - "errorMessage": "Tags must consist of numbers, letters, underscores or dashes and must be provided as a comma-separated list flanked by a pair of double-quotes, e.g. \"patient_01, lane1, pos-CTRL_2\".", + "pattern": "^([A-Za-z0-9_-]+:)*([A-Za-z0-9_-]+)$", + "errorMessage": "Tags must consist of numbers, letters, underscores or dashes and must be provided as a column-separated list, e.g. group_01:lane1:pos-CTRL_2.", "meta": ["tags"] } }, diff --git a/docs/usage.md b/docs/usage.md index 621c7c0a..38a039c4 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -32,10 +32,10 @@ run_dir ```csv title="samplesheet.csv" sample fastq_1 fastq_2 rundir tags -sample1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir "project1,group1" -sample2 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir "project1,group1" -sample3 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir "project1,group2" -sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir "control" +sample1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir project1:group1 +sample2 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir project1:group1 +sample3 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir project1:group2 +sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir control ``` @@ -45,7 +45,7 @@ sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir "c | `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | | `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz" (optional). | | `rundir` | Path to the runfolder containing extra information about the sequencing run (optional) . | -| `tags` | Comma-separated list of tags to group samples in special reports. | +| `tags` | Column-separated list of tags to group samples in special reports. | Another [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index bb875af6..eec4d09c 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -84,7 +84,7 @@ workflow PIPELINE_INITIALISATION { .fromSamplesheet("input") // Validates samplesheet against $projectDir/assets/schema_input.json. Path to validation schema is defined by $projectDir/nextflow_schema.json .map { meta, fastq_1, fastq_2 -> - def tags = meta.tags ? meta.tags.tokenize(",") : [] + def tags = meta.tags ? meta.tags.tokenize(":") : [] def updated_meta = meta + [ id:meta.sample, tags:tags ] if (!fastq_2) { return [ updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] diff --git a/tests/NovaSeq6000.main.nf.test b/tests/NovaSeq6000.main.nf.test index f93891bf..d050399e 100644 --- a/tests/NovaSeq6000.main.nf.test +++ b/tests/NovaSeq6000.main.nf.test @@ -23,20 +23,20 @@ nextflow_pipeline { path("$outputDir/multiqc/global_report/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/global_report/multiqc_data/multiqc_software_versions.txt"), - path("$outputDir/multiqc/group_reports/cohort1/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/group_reports/cohort1/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/group_reports/cohort1/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/group_reports/cohort1/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/group_reports/patient1/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/group_reports/patient1/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/group_reports/patient1/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/group_reports/patient1/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/group_reports/patient2/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/group_reports/patient2/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/group_reports/patient2/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/group_reports/patient2/multiqc_data/multiqc_software_versions.txt"), + path("$outputDir/multiqc/group_reports/lane1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/lane1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/lane1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/lane1/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/group1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/group1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/group1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/group1/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/group2/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/group2/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/group2/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/group2/multiqc_data/multiqc_software_versions.txt"), path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_citations.txt"), path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_fastqc.txt"), From 8779909fe25a35950a999847d0b052b1f188de9c Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 3 Sep 2024 13:55:23 +0200 Subject: [PATCH 46/60] switch to colon-separated tags --- README.md | 2 +- assets/samplesheet.csv | 12 ++++++------ assets/schema_input.json | 4 ++-- docs/usage.md | 12 ++++++------ .../local/utils_nfcore_seqinspector_pipeline/main.nf | 2 +- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 31018e7b..acba2cec 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ First, prepare a samplesheet with your input data that looks as follows: ```csv sample,fastq_1,fastq_2,rundir,tags -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX,"patient1" +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX,lane1:project5:group2 ``` Each row represents a fastq file (single-end) or a pair of fastq files (paired end). diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 00019e58..de5f1a68 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,7 +1,7 @@ sample,fastq_1,fastq_2,rundir,tags -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir,"paired_sample,cohort1" -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A2_S2_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A2_S2_L002_R2_001.fastq.gz,/path/to/rundir,"paired_sample,cohort1" -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A3_S3_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A3_S3_L002_R2_001.fastq.gz,/path/to/rundir,"paired_sample,cohort2" -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,"patient1" -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,"patient2" -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,"patient3" +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:cohort1 +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A2_S2_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A2_S2_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:cohort1 +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A3_S3_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A3_S3_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:cohort2 +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,patient1 +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,patient2 +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,patient3 diff --git a/assets/schema_input.json b/assets/schema_input.json index d4a6dc70..18941557 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -36,8 +36,8 @@ }, "tags": { "type": "string", - "pattern": "^\\\"([A-Za-z0-9_-]+,\\ ?)*([A-Za-z0-9_-]+)\\\"$", - "errorMessage": "Tags must consist of numbers, letters, underscores or dashes and must be provided as a comma-separated list flanked by a pair of double-quotes, e.g. \"patient_01, lane1, pos-CTRL_2\".", + "pattern": "^([a-z0-9_-]+:)*([a-z0-9_-]+)$", + "errorMessage": "Tags must be separated by colons and only consist of lowercase letters, numbers, underscores and hyphens.", "meta": ["tags"] } }, diff --git a/docs/usage.md b/docs/usage.md index 42d596bb..e2da41de 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -32,10 +32,10 @@ run_dir ```csv title="samplesheet.csv" sample fastq_1 fastq_2 rundir tags -sample1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir "cohort1,patient1" -sample2 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir "cohort1,patient2" -sample3 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir "cohort1,patient3" -sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir "control" +sample1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir cohort1:patient1 +sample2 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir cohort1:patient2 +sample3 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir cohort1:patient3 +sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir control ``` @@ -44,8 +44,8 @@ sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir "c | `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | | `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | | `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz" (optional). | -| `rundir` | Path to the runfolder containing extra information about the sequencing run (optional) . | -| `tags` | Comma-separated list of tags to group samples in special reports. | +| `rundir` | Path to the runfolder containing extra information about the sequencing run (optional). | +| `tags` | Colon-separated list of tags to group samples in special reports. | Another [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index afcfc68b..0ebfdd0a 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -84,7 +84,7 @@ workflow PIPELINE_INITIALISATION { .fromSamplesheet("input") // Validates samplesheet against $projectDir/assets/schema_input.json. Path to validation schema is defined by $projectDir/nextflow_schema.json .map { meta, fastq_1, fastq_2 -> - def tags = meta.tags ? meta.tags.tokenize(",") : [] + def tags = meta.tags ? meta.tags.tokenize(":") : [] def updated_meta = meta + [ id:meta.sample, tags:tags ] if (!fastq_2) { return [ updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] From f496b85d07540807a452a094fb9cbc52013cc9f1 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 3 Sep 2024 13:55:30 +0200 Subject: [PATCH 47/60] use testdata with tags --- conf/test.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index 658e75f3..c458ad0f 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,7 +22,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'seqinspector/testdata/MiSeq/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/NovaSeq6000/samplesheet.csv' // Genome references genome = 'R64-1-1' From 38e39a9828646f984e7902bd119b802e288624c9 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 3 Sep 2024 14:10:16 +0200 Subject: [PATCH 48/60] Make ids unique --- .../local/utils_nfcore_seqinspector_pipeline/main.nf | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index eec4d09c..632acb38 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -87,9 +87,17 @@ workflow PIPELINE_INITIALISATION { def tags = meta.tags ? meta.tags.tokenize(":") : [] def updated_meta = meta + [ id:meta.sample, tags:tags ] if (!fastq_2) { - return [ updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] + return [ + updated_meta.id + fastq_1.toString().replaceAll('/', '_'), + updated_meta + [ single_end:true ], + [ fastq_1 ] + ] } else { - return [ updated_meta.id, updated_meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + return [ + updated_meta.id + fastq_1.toString().replaceAll('/', '_') + '_' + fastq_2.toString().replaceAll('/', '_'), + updated_meta + [ single_end:false ], + [ fastq_1, fastq_2 ] + ] } } .groupTuple() From c96a41c0c6f5c4d3b7f8ff0d71242fd876b04325 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Tue, 3 Sep 2024 12:26:11 +0000 Subject: [PATCH 49/60] bump multiqc to prevent gitpod crashing, populate the new process inputs w. empty channels --- modules.json | 2 +- modules/nf-core/multiqc/environment.yml | 2 +- modules/nf-core/multiqc/main.nf | 14 ++++++++++--- modules/nf-core/multiqc/meta.yml | 13 ++++++++++++ modules/nf-core/multiqc/tests/main.nf.test | 8 ++++++++ .../nf-core/multiqc/tests/main.nf.test.snap | 20 +++++++++---------- modules/nf-core/multiqc/tests/nextflow.config | 5 +++++ workflows/seqinspector.nf | 8 ++++++-- 8 files changed, 55 insertions(+), 17 deletions(-) create mode 100644 modules/nf-core/multiqc/tests/nextflow.config diff --git a/modules.json b/modules.json index 87fe816c..70f3486c 100644 --- a/modules.json +++ b/modules.json @@ -12,7 +12,7 @@ }, "multiqc": { "branch": "master", - "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", + "git_sha": "19ca321db5d8bd48923262c2eca6422359633491", "installed_by": ["modules"] } } diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index ca39fb67..a31464c9 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::multiqc=1.21 + - bioconda::multiqc=1.24.1 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 47ac352f..ceaec139 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,14 +3,16 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : - 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.24.1--pyhdfd78af_0' : + 'biocontainers/multiqc:1.24.1--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" path(multiqc_config) path(extra_multiqc_config) path(multiqc_logo) + path(replace_names) + path(sample_names) output: path "*multiqc_report.html", emit: report @@ -23,16 +25,22 @@ process MULTIQC { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' - def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' + def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' + def replace = replace_names ? "--replace-names ${replace_names}" : '' + def samples = sample_names ? "--sample-names ${sample_names}" : '' """ multiqc \\ --force \\ $args \\ $config \\ + $prefix \\ $extra_config \\ $logo \\ + $replace \\ + $samples \\ . cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index 45a9bc35..382c08cb 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -29,6 +29,19 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" + - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" output: - report: type: file diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index f1c4242e..33316a7d 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -8,6 +8,8 @@ nextflow_process { tag "modules_nfcore" tag "multiqc" + config "./nextflow.config" + test("sarscov2 single-end [fastqc]") { when { @@ -17,6 +19,8 @@ nextflow_process { input[1] = [] input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } @@ -41,6 +45,8 @@ nextflow_process { input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } @@ -66,6 +72,8 @@ nextflow_process { input[1] = [] input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index bfebd802..83fa080c 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,6eb13f3b11bbcbfc98ad3166420ff760" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-29T08:48:55.657331" + "timestamp": "2024-07-10T12:41:34.562023" }, "multiqc_stub": { "content": [ @@ -17,25 +17,25 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,6eb13f3b11bbcbfc98ad3166420ff760" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-29T08:49:49.071937" + "timestamp": "2024-07-10T11:27:11.933869532" }, "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,6eb13f3b11bbcbfc98ad3166420ff760" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-29T08:49:25.457567" + "timestamp": "2024-07-10T11:26:56.709849369" } -} \ No newline at end of file +} diff --git a/modules/nf-core/multiqc/tests/nextflow.config b/modules/nf-core/multiqc/tests/nextflow.config new file mode 100644 index 00000000..c537a6a3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MULTIQC' { + ext.prefix = null + } +} diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index e37df509..1ba00c62 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -89,7 +89,9 @@ workflow SEQINSPECTOR { .collect(), ch_multiqc_config.toList(), Channel.empty().toList(), - ch_multiqc_logo.toList() + ch_multiqc_logo.toList(), + Channel.empty().toList(), + Channel.empty().toList() ) ch_tags = ch_multiqc_files @@ -131,7 +133,9 @@ workflow SEQINSPECTOR { tagged_mqc_files.samples_per_tag, ch_multiqc_config.toList(), tagged_mqc_files.config, - ch_multiqc_logo.toList() + ch_multiqc_logo.toList(), + Channel.empty().toList(), + Channel.empty().toList() ) emit: From 2e269ecc236a66866f8413a1129656ed9c1d816f Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 3 Sep 2024 16:36:59 +0200 Subject: [PATCH 50/60] Allow for uppercase letters --- assets/schema_input.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 18941557..f2440839 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -36,7 +36,7 @@ }, "tags": { "type": "string", - "pattern": "^([a-z0-9_-]+:)*([a-z0-9_-]+)$", + "pattern": "^([A-Za-z0-9_-]+:)*([A-Za-z0-9_-]+)$", "errorMessage": "Tags must be separated by colons and only consist of lowercase letters, numbers, underscores and hyphens.", "meta": ["tags"] } From c97a597e8165081565d5acb1afdfd61e83f6309c Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 10 Sep 2024 08:54:03 +0200 Subject: [PATCH 51/60] Use samplesheet row in id Co-authored-by: Matthias Zepper --- .../local/utils_nfcore_seqinspector_pipeline/main.nf | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index 632acb38..3c63037b 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -82,19 +82,21 @@ workflow PIPELINE_INITIALISATION { // Channel .fromSamplesheet("input") // Validates samplesheet against $projectDir/assets/schema_input.json. Path to validation schema is defined by $projectDir/nextflow_schema.json + .toList() + .flatMap { it.withIndex().collect { entry, idx -> entry + "${idx+1}" } } .map { - meta, fastq_1, fastq_2 -> + meta, fastq_1, fastq_2, idx -> def tags = meta.tags ? meta.tags.tokenize(":") : [] - def updated_meta = meta + [ id:meta.sample, tags:tags ] + def updated_meta = meta + [ id:"${meta.sample}_${idx}", tags:tags ] if (!fastq_2) { return [ - updated_meta.id + fastq_1.toString().replaceAll('/', '_'), + updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] } else { return [ - updated_meta.id + fastq_1.toString().replaceAll('/', '_') + '_' + fastq_2.toString().replaceAll('/', '_'), + updated_meta.id, updated_meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] From 1f33928fdc2f9350fbb4ea86e253f5f7cc9b6670 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 10 Sep 2024 09:03:50 +0200 Subject: [PATCH 52/60] Update snapshots --- tests/MiSeq.main.nf.test.snap | 12 ++++++---- tests/NovaSeq6000.main.nf.test.snap | 36 ++++++++++++++++------------- tests/PromethION.main.nf.test.snap | 12 ++++++---- 3 files changed, 36 insertions(+), 24 deletions(-) diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap index 26f0c2a8..9dacaf8a 100644 --- a/tests/MiSeq.main.nf.test.snap +++ b/tests/MiSeq.main.nf.test.snap @@ -2,10 +2,14 @@ "MiSeq data test": { "content": [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,e46e7baa8f57d4cf54d973925b5eadf9", - "multiqc_general_stats.txt:md5,a5e626a2e1a3c986092e4f89091cc41c", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + "multiqc_fastqc.txt:md5,7b1b7fd457b60404768045b148d4c0a8", + "multiqc_general_stats.txt:md5,962713a1473a318f2cb29bb5290c4c8e", + "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5" ], - "timestamp": "2024-08-26T17:55:16.152573" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-10T08:57:05.870194" } } \ No newline at end of file diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap index f5c4776b..3f27e5a2 100644 --- a/tests/NovaSeq6000.main.nf.test.snap +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -2,26 +2,30 @@ "NovaSeq6000 data test": { "content": [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,e8ed6dca928396b8873d24e60ea1a133", - "multiqc_general_stats.txt:md5,fd9d46c5b441908cd07e5373d116db17", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_fastqc.txt:md5,3730f9046b20ac5c17a86db0a33f8d5d", + "multiqc_general_stats.txt:md5,d521de54d1e659bf7892105f7d23d4db", + "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,ff9b31c6024f11a8135456e7ea01fc8f", - "multiqc_general_stats.txt:md5,f36bd6e27e92c25be076efea411d3a8e", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_fastqc.txt:md5,8284e25ccc21041cf3b5a32eb6a51e78", + "multiqc_general_stats.txt:md5,d52544eb1a505c889a2f9117cf94a5fa", + "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,62d51280dcd7634f6bed95ffe0d8dab8", - "multiqc_general_stats.txt:md5,2012002b6a057be981a97fcc96142a6c", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_fastqc.txt:md5,f38ffdc112c73af3a41ed15848a3761f", + "multiqc_general_stats.txt:md5,5b1190093085ef073d4bd5818c9cde79", + "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,63749e803a2d5fc7ecc7cd93fa68df1f", - "multiqc_general_stats.txt:md5,656931993032400dea3d441b8b61b4d2", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_fastqc.txt:md5,7ff71ceb8ecdf086331047f8860c3347", + "multiqc_general_stats.txt:md5,79c1090dd8a97912893f8491641b9dc9", + "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,91cc62e1b4059bdbe4b88affa43378af", - "multiqc_general_stats.txt:md5,6e500f82550e00b07c3e7aa1d46ab9e9", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + "multiqc_fastqc.txt:md5,519ff344a896ac369bba4d5c5b8be7b5", + "multiqc_general_stats.txt:md5,41611bd5ab9e79425c466bf976b03bdc", + "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5" ], - "timestamp": "2024-08-26T18:03:33.089142" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-10T08:58:26.732622" } } \ No newline at end of file diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap index e3b34f23..fb8cda25 100644 --- a/tests/PromethION.main.nf.test.snap +++ b/tests/PromethION.main.nf.test.snap @@ -2,10 +2,14 @@ "PromethION data test": { "content": [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,cecee3cb343c75c80180d3169c6f3ea1", - "multiqc_general_stats.txt:md5,e63c25089c4fc10618414ba2254d18c7", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + "multiqc_fastqc.txt:md5,35984961f25a0d4e7352cab4d5650178", + "multiqc_general_stats.txt:md5,1465b0b1959e3864b28ecc2340df351b", + "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5" ], - "timestamp": "2024-08-26T17:55:38.755385" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-10T08:58:57.180636" } } \ No newline at end of file From 7ce086a6fddbe96de440a234128576bfc4d6bff5 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Mon, 28 Oct 2024 11:12:03 +0000 Subject: [PATCH 53/60] nf-core tools 3.0.2 modules update --- modules.json | 4 +- modules/nf-core/fastqc/environment.yml | 2 - modules/nf-core/fastqc/main.nf | 5 +- modules/nf-core/fastqc/meta.yml | 57 +-- modules/nf-core/fastqc/tests/main.nf.test | 225 ++++++++--- .../nf-core/fastqc/tests/main.nf.test.snap | 370 ++++++++++++++++-- modules/nf-core/multiqc/environment.yml | 4 +- modules/nf-core/multiqc/main.nf | 6 +- modules/nf-core/multiqc/meta.yml | 91 +++-- .../nf-core/multiqc/tests/main.nf.test.snap | 26 +- 10 files changed, 603 insertions(+), 187 deletions(-) diff --git a/modules.json b/modules.json index 70f3486c..a84d111b 100644 --- a/modules.json +++ b/modules.json @@ -7,12 +7,12 @@ "nf-core": { "fastqc": { "branch": "master", - "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "19ca321db5d8bd48923262c2eca6422359633491", + "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", "installed_by": ["modules"] } } diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml index 1787b38a..691d4c76 100644 --- a/modules/nf-core/fastqc/environment.yml +++ b/modules/nf-core/fastqc/environment.yml @@ -1,7 +1,5 @@ -name: fastqc channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index d79f1c86..d8989f48 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -26,7 +26,10 @@ process FASTQC { def rename_to = old_new_pairs*.join(' ').join(' ') def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') - def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') + // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) + // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 + // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') / task.cpus // FastQC memory value allowed range (100 - 10000) def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index ee5507e0..4827da7a 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -16,35 +16,44 @@ tools: homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ licence: ["GPL-2.0-only"] + identifier: biotools:fastqc input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - html: - type: file - description: FastQC report - pattern: "*_{fastqc.html}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: FastQC report + pattern: "*_{fastqc.html}" - zip: - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.zip": + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@grst" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index 70edae4d..e9d79a07 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -23,17 +23,14 @@ nextflow_process { then { assertAll ( - { assert process.success }, - - // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. - // looks like this:
Mon 2 Oct 2023
test.gz
- // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - - { assert snapshot(process.out.versions).match("fastqc_versions_single") } + { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } ) } } @@ -54,16 +51,14 @@ nextflow_process { then { assertAll ( - { assert process.success }, - - { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, - { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, - { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, - { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, - { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, - - { assert snapshot(process.out.versions).match("fastqc_versions_paired") } + { assert process.success }, + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } ) } } @@ -83,13 +78,11 @@ nextflow_process { then { assertAll ( - { assert process.success }, - - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - - { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") } + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } ) } } @@ -109,13 +102,11 @@ nextflow_process { then { assertAll ( - { assert process.success }, - - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - - { assert snapshot(process.out.versions).match("fastqc_versions_bam") } + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } ) } } @@ -138,22 +129,20 @@ nextflow_process { then { assertAll ( - { assert process.success }, - - { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, - { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, - { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, - { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, - { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, - { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, - { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, - { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, - { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, - - { assert snapshot(process.out.versions).match("fastqc_versions_multiple") } + { assert process.success }, + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } ) } } @@ -173,21 +162,18 @@ nextflow_process { then { assertAll ( - { assert process.success }, - - { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - - { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") } + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } ) } } test("sarscov2 single-end [fastq] - stub") { - options "-stub" - + options "-stub" when { process { """ @@ -201,12 +187,123 @@ nextflow_process { then { assertAll ( - { assert process.success }, - { assert snapshot(process.out.html.collect { file(it[1]).getName() } + - process.out.zip.collect { file(it[1]).getName() } + - process.out.versions ).match("fastqc_stub") } + { assert process.success }, + { assert snapshot(process.out).match() } ) } } + test("sarscov2 paired-end [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 interleaved [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 paired-end [bam] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 multiple [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 custom_prefix - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap index 86f7c311..d5db3092 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -1,88 +1,392 @@ { - "fastqc_versions_interleaved": { + "sarscov2 custom_prefix": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:40:07.293713" + "timestamp": "2024-07-22T11:02:16.374038" }, - "fastqc_stub": { + "sarscov2 single-end [fastq] - stub": { "content": [ - [ - "test.html", - "test.zip", - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": true + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:24.993809" + }, + "sarscov2 custom_prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:31:01.425198" + "timestamp": "2024-07-22T11:03:10.93942" }, - "fastqc_versions_multiple": { + "sarscov2 interleaved [fastq]": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:40:55.797907" + "timestamp": "2024-07-22T11:01:42.355718" }, - "fastqc_versions_bam": { + "sarscov2 paired-end [bam]": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:40:26.795862" + "timestamp": "2024-07-22T11:01:53.276274" }, - "fastqc_versions_single": { + "sarscov2 multiple [fastq]": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:39:27.043675" + "timestamp": "2024-07-22T11:02:05.527626" }, - "fastqc_versions_paired": { + "sarscov2 paired-end [fastq]": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:31.188871" + }, + "sarscov2 paired-end [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:34.273566" + }, + "sarscov2 multiple [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:39:47.584191" + "timestamp": "2024-07-22T11:03:02.304411" }, - "fastqc_versions_custom_prefix": { + "sarscov2 single-end [fastq]": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:19.095607" + }, + "sarscov2 interleaved [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:44.640184" + }, + "sarscov2 paired-end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:41:14.576531" + "timestamp": "2024-07-22T11:02:53.550742" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index a31464c9..6f5b867b 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,7 +1,5 @@ -name: multiqc channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::multiqc=1.24.1 + - bioconda::multiqc=1.25.1 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index ceaec139..cc0643e1 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.24.1--pyhdfd78af_0' : - 'biocontainers/multiqc:1.24.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' : + 'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -52,7 +52,7 @@ process MULTIQC { stub: """ mkdir multiqc_data - touch multiqc_plots + mkdir multiqc_plots touch multiqc_report.html cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index 382c08cb..b16c1879 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,6 @@ name: multiqc -description: Aggregate results from bioinformatics analyses across many samples into a single report +description: Aggregate results from bioinformatics analyses across many samples into + a single report keywords: - QC - bioinformatics tools @@ -12,53 +13,59 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] + identifier: biotools:multiqc input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. - pattern: "*.{yml,yaml}" - - multiqc_logo: - type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" - - replace_names: - type: file - description: | - Optional two-column sample renaming file. First column a set of - patterns, second column a set of corresponding replacements. Passed via - MultiQC's `--replace-names` option. - pattern: "*.{tsv}" - - sample_names: - type: file - description: | - Optional TSV file with headers, passed to the MultiQC --sample_names - argument. - pattern: "*.{tsv}" + - - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + - - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + - - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" output: - report: - type: file - description: MultiQC report file - pattern: "multiqc_report.html" + - "*multiqc_report.html": + type: file + description: MultiQC report file + pattern: "multiqc_report.html" - data: - type: directory - description: MultiQC data dir - pattern: "multiqc_data" + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" - plots: - type: file - description: Plots created by MultiQC - pattern: "*_data" + - "*_plots": + type: file + description: Plots created by MultiQC + pattern: "*_data" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@abhi18av" - "@bunop" diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index 83fa080c..2fcbb5ff 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,6eb13f3b11bbcbfc98ad3166420ff760" + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-10T12:41:34.562023" + "timestamp": "2024-10-02T17:51:46.317523" }, "multiqc_stub": { "content": [ @@ -17,25 +17,25 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,6eb13f3b11bbcbfc98ad3166420ff760" + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-10T11:27:11.933869532" + "timestamp": "2024-10-02T17:52:20.680978" }, "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,6eb13f3b11bbcbfc98ad3166420ff760" + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-10T11:26:56.709849369" + "timestamp": "2024-10-02T17:52:09.185842" } -} +} \ No newline at end of file From a5951302f2b783a4ffe9fd1d3b11474fe48d3db6 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Mon, 28 Oct 2024 13:30:37 +0000 Subject: [PATCH 54/60] merge fix --- .../local/utils_nfcore_seqinspector_pipeline/main.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index c2270583..a84e064f 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -74,6 +74,7 @@ workflow PIPELINE_INITIALISATION { Channel .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + .toList() .flatMap { it.withIndex().collect { entry, idx -> entry + "${idx+1}" } } .map { meta, fastq_1, fastq_2, idx -> @@ -94,8 +95,8 @@ workflow PIPELINE_INITIALISATION { } } .groupTuple() - .map { samplesheet -> - validateInputSamplesheet(samplesheet) + .map { + validateInputSamplesheet(it) // Applies additional group validation checks that schema_input.json cannot do. } .transpose() // Replace the map below // .map { From 036408867cb7b852cf1e35089d9bd4edab29bb89 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Mon, 28 Oct 2024 13:30:48 +0000 Subject: [PATCH 55/60] update snapshots --- tests/MiSeq.main.nf.test.snap | 8 ++++---- tests/NovaSeq6000.main.nf.test.snap | 24 ++++++++++++------------ tests/PromethION.main.nf.test.snap | 10 +++++----- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap index 9dacaf8a..4613d525 100644 --- a/tests/MiSeq.main.nf.test.snap +++ b/tests/MiSeq.main.nf.test.snap @@ -3,13 +3,13 @@ "content": [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,7b1b7fd457b60404768045b148d4c0a8", - "multiqc_general_stats.txt:md5,962713a1473a318f2cb29bb5290c4c8e", - "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5" + "multiqc_general_stats.txt:md5,5b28a83b14cb2fe88d084d08900ebdbf", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3" ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nextflow": "24.10.0" }, - "timestamp": "2024-09-10T08:57:05.870194" + "timestamp": "2024-10-28T13:18:10.3675973" } } \ No newline at end of file diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap index 3f27e5a2..ee3c22b7 100644 --- a/tests/NovaSeq6000.main.nf.test.snap +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -3,29 +3,29 @@ "content": [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,3730f9046b20ac5c17a86db0a33f8d5d", - "multiqc_general_stats.txt:md5,d521de54d1e659bf7892105f7d23d4db", - "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5", + "multiqc_general_stats.txt:md5,25abe0f6a35eb4a3b056fc3cf5c13732", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,8284e25ccc21041cf3b5a32eb6a51e78", - "multiqc_general_stats.txt:md5,d52544eb1a505c889a2f9117cf94a5fa", - "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5", + "multiqc_general_stats.txt:md5,90ee35137492b80aab36ef67f72d8921", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,f38ffdc112c73af3a41ed15848a3761f", - "multiqc_general_stats.txt:md5,5b1190093085ef073d4bd5818c9cde79", - "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5", + "multiqc_general_stats.txt:md5,d62a2fc39e674d98783d408791803148", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,7ff71ceb8ecdf086331047f8860c3347", - "multiqc_general_stats.txt:md5,79c1090dd8a97912893f8491641b9dc9", - "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5", + "multiqc_general_stats.txt:md5,2f09b8f199ac40cf67ba50843cebd29c", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,519ff344a896ac369bba4d5c5b8be7b5", - "multiqc_general_stats.txt:md5,41611bd5ab9e79425c466bf976b03bdc", - "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5" + "multiqc_general_stats.txt:md5,6a1c16f068d7ba3a9225a17eb570ed9a", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3" ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nextflow": "24.10.0" }, - "timestamp": "2024-09-10T08:58:26.732622" + "timestamp": "2024-10-28T13:19:13.226135825" } } \ No newline at end of file diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap index fb8cda25..026a8cd2 100644 --- a/tests/PromethION.main.nf.test.snap +++ b/tests/PromethION.main.nf.test.snap @@ -2,14 +2,14 @@ "PromethION data test": { "content": [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,35984961f25a0d4e7352cab4d5650178", - "multiqc_general_stats.txt:md5,1465b0b1959e3864b28ecc2340df351b", - "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5" + "multiqc_fastqc.txt:md5,1a4b472e13cadc770832b0e20d1de7b0", + "multiqc_general_stats.txt:md5,409cefc7f17f95d176ced6032bf8fb32", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3" ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nextflow": "24.10.0" }, - "timestamp": "2024-09-10T08:58:57.180636" + "timestamp": "2024-10-28T13:19:57.261730412" } } \ No newline at end of file From 545af046c6660824322f982f4ac322776c25284c Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Mon, 28 Oct 2024 13:38:38 +0000 Subject: [PATCH 56/60] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3aa5bacc..b0b12de1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c - [#20](https://github.com/nf-core/seqinspector/pull/20) Use tags to generate group reports - [#13](https://github.com/nf-core/seqinspector/pull/13) Generate reports per run, per project and per lane. +- [#49](https://github.com/nf-core/seqinspector/pull/49) Merge with template 3.0.2. ### `Fixed` From 6dc45ac897a9457d00a2bd632d694336f0b40e26 Mon Sep 17 00:00:00 2001 From: erkutilaslan Date: Mon, 28 Oct 2024 11:45:21 +0100 Subject: [PATCH 57/60] add module bbmap/clumpify --- modules.json | 5 ++ .../nf-core/bbmap/clumpify/environment.yml | 5 ++ modules/nf-core/bbmap/clumpify/main.nf | 38 ++++++++++ modules/nf-core/bbmap/clumpify/meta.yml | 56 +++++++++++++++ .../nf-core/bbmap/clumpify/tests/main.nf.test | 72 +++++++++++++++++++ .../bbmap/clumpify/tests/main.nf.test.snap | 49 +++++++++++++ workflows/seqinspector.nf | 12 ++++ 7 files changed, 237 insertions(+) create mode 100644 modules/nf-core/bbmap/clumpify/environment.yml create mode 100644 modules/nf-core/bbmap/clumpify/main.nf create mode 100644 modules/nf-core/bbmap/clumpify/meta.yml create mode 100644 modules/nf-core/bbmap/clumpify/tests/main.nf.test create mode 100644 modules/nf-core/bbmap/clumpify/tests/main.nf.test.snap diff --git a/modules.json b/modules.json index 8e632d50..bf8f5f02 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,11 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "bbmap/clumpify": { + "branch": "master", + "git_sha": "a1abf90966a2a4016d3c3e41e228bfcbd4811ccc", + "installed_by": ["modules"] + }, "fastqc": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", diff --git a/modules/nf-core/bbmap/clumpify/environment.yml b/modules/nf-core/bbmap/clumpify/environment.yml new file mode 100644 index 00000000..a2f65506 --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bbmap=39.10 diff --git a/modules/nf-core/bbmap/clumpify/main.nf b/modules/nf-core/bbmap/clumpify/main.nf new file mode 100644 index 00000000..fc6a85ad --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/main.nf @@ -0,0 +1,38 @@ +process BBMAP_CLUMPIFY { + tag "$meta.id" + label 'process_single' + label 'process_high_memory' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bbmap:39.10--h92535d8_0': + 'biocontainers/bbmap:39.10--h92535d8_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path('*.fastq.gz'), emit: reads + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def raw = meta.single_end ? "in=$reads" : "in1=${reads[0]} in2=${reads[1]}" + def clumped = meta.single_end ? "out=${prefix}.clumped.fastq.gz" : "out1=${prefix}_1.clumped.fastq.gz out2=${prefix}_2.clumped.fastq.gz" + """ + clumpify.sh \\ + $raw \\ + $clumped \\ + $args \\ + &> ${prefix}.clumpify.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bbmap/clumpify/meta.yml b/modules/nf-core/bbmap/clumpify/meta.yml new file mode 100644 index 00000000..7db435cd --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/meta.yml @@ -0,0 +1,56 @@ +name: bbmap_clumpify +description: Create 30% Smaller, Faster Gzipped Fastq Files. And remove duplicates +keywords: + - clumping fastqs + - smaller fastqs + - deduping + - fastq +tools: + - bbmap: + description: BBMap is a short read aligner, as well as various other bioinformatic + tools. + homepage: https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/clumpify-guide/ + documentation: https://www.biostars.org/p/225338/ + licence: ["UC-LBL license (see package)"] + identifier: biotools:bbmap +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastq.gz": + type: file + description: The reordered/clumped (and if necessary deduped) fastq reads + pattern: "*.clumped.fastq.gz" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: Clumpify log file + pattern: "*clumpify.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@tamuanand" +maintainers: + - "@tamuanand" diff --git a/modules/nf-core/bbmap/clumpify/tests/main.nf.test b/modules/nf-core/bbmap/clumpify/tests/main.nf.test new file mode 100644 index 00000000..f43b8767 --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/tests/main.nf.test @@ -0,0 +1,72 @@ + +nextflow_process { + + name "Test Process BBMAP_CLUMPIFY" + script "../main.nf" + process "BBMAP_CLUMPIFY" + + tag "modules" + tag "modules_nfcore" + tag "bbmap" + tag "bbmap/clumpify" + + test("test-bbmap-clumpify-single-end") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.reads, + file(process.out.log[0][1]).name, + process.out.versions + ).match() + } + ) + } + } + + test("test-bbmap-clumpify-paired-end") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.reads, + file(process.out.log[0][1]).name, + process.out.versions + ).match() + } + ) + } + } + +} diff --git a/modules/nf-core/bbmap/clumpify/tests/main.nf.test.snap b/modules/nf-core/bbmap/clumpify/tests/main.nf.test.snap new file mode 100644 index 00000000..e84c345f --- /dev/null +++ b/modules/nf-core/bbmap/clumpify/tests/main.nf.test.snap @@ -0,0 +1,49 @@ +{ + "test-bbmap-clumpify-paired-end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.clumped.fastq.gz:md5,27e51643262c1ef3905c4be184c3814c", + "test_2.clumped.fastq.gz:md5,c70ab7bbd44d6b6fadd6a1a79ef1648f" + ] + ] + ], + "test.clumpify.log", + [ + "versions.yml:md5,fdf0404f694fca43bcf9be6458d927cd" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T16:16:59.10822554" + }, + "test-bbmap-clumpify-single-end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.clumped.fastq.gz:md5,27e51643262c1ef3905c4be184c3814c" + ] + ], + "test.clumpify.log", + [ + "versions.yml:md5,fdf0404f694fca43bcf9be6458d927cd" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T16:16:36.9005326" + } +} \ No newline at end of file diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index ea628117..4b633651 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -4,6 +4,8 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ include { FASTQC } from '../modules/nf-core/fastqc/main' +include { BBMAP_CLUMPIFY } from '../modules/nf-core/bbmap/clumpify/main' + include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_TAG } from '../modules/nf-core/multiqc/main' @@ -39,6 +41,16 @@ workflow SEQINSPECTOR { ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + // + // MODULE: Run BBMAP_CLUMPIFY + // + BBMAP_CLUMPIFY ( + ch_samplesheet + ) +// ch_multiqc_files = ch_multiqc_files.mix(BBMAP_CLUMPIFY.out.zip.collect{it[1]}) + ch_versions = ch_versions.mix(BBMAP_CLUMPIFY.out.versions) + + // // Collate and save software versions // From 445edd1bf793005dbdf774d2da67ecd50f611b33 Mon Sep 17 00:00:00 2001 From: erkutilaslan Date: Mon, 28 Oct 2024 15:28:37 +0100 Subject: [PATCH 58/60] cleanup code --- workflows/seqinspector.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 4b633651..59f19e5e 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -47,7 +47,6 @@ workflow SEQINSPECTOR { BBMAP_CLUMPIFY ( ch_samplesheet ) -// ch_multiqc_files = ch_multiqc_files.mix(BBMAP_CLUMPIFY.out.zip.collect{it[1]}) ch_versions = ch_versions.mix(BBMAP_CLUMPIFY.out.versions) From ba4bac0b66a39306cb94212efa519f531e210942 Mon Sep 17 00:00:00 2001 From: erkutilaslan Date: Mon, 28 Oct 2024 16:58:01 +0100 Subject: [PATCH 59/60] fix lint errors --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index c8838224..8d85bdce 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1,4 +1,4 @@ -/* +/ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Config file for defining DSL2 per module options and publishing paths ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From b0e25588ad9a42eea02d25efdd3e9f8c150eeaec Mon Sep 17 00:00:00 2001 From: erkutilaslan Date: Tue, 29 Oct 2024 11:41:14 +0100 Subject: [PATCH 60/60] fix comment --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 8d85bdce..c8838224 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1,4 +1,4 @@ -/ +/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Config file for defining DSL2 per module options and publishing paths ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~