From 56e01a0357334b461662398db8c89fd6340a5b73 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Tue, 19 Mar 2024 10:09:55 +0100 Subject: [PATCH 001/172] Update assets/schema_input.json --- assets/schema_input.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index fc4fdb96..332031c2 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -41,7 +41,7 @@ }, "rundir": { "type": "string", - "format": "file-path", + "format": "directory-path", "exists": true, "errorMessage": "Run directory must be a path", "meta": ["rundir"] From 6c94332bf1ba3738e39c80031f6d83436a4832b4 Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Wed, 20 Mar 2024 09:16:32 +0100 Subject: [PATCH 002/172] grop instead of project in a single place Co-authored-by: Adrien Coulier --- subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index e1f8e4de..cdbb6400 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -84,7 +84,7 @@ workflow PIPELINE_INITIALISATION { .fromSamplesheet("input") // Validates samplesheet against $projectDir/assets/schema_input.json. Path to validation schema is defined by $projectDir/nextflow_schema.json .map { meta, fastq_1, fastq_2 -> - def id_string = "${meta.sample}_${meta.group ?: "ungrouped"}_${meta.lane}" + def id_string = "${meta.sample}_${meta.project ?: "ungrouped"}_${meta.lane}" def updated_meta = meta + [ id: id_string ] if (!fastq_2) { return [ updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] From 4779844462856d017d7ecdcba13cf018198caf3b Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Wed, 20 Mar 2024 09:51:59 +0100 Subject: [PATCH 003/172] Updated test profile input --- conf/test.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index fbbffdd6..38e9ee32 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,7 +22,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + input = 'https://raw.githubusercontent.com/KarNair/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv' // Genome references genome = 'R64-1-1' From 28e0137e65c001682ce5e3257ea6fe6248ccf85e Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Wed, 20 Mar 2024 14:51:35 +0100 Subject: [PATCH 004/172] Update assets/schema_input.json Co-authored-by: Karthik Nair <35717861+KarNair@users.noreply.github.com> --- assets/schema_input.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 332031c2..9fb321b5 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -41,7 +41,7 @@ }, "rundir": { "type": "string", - "format": "directory-path", + "format": "path", "exists": true, "errorMessage": "Run directory must be a path", "meta": ["rundir"] From a31040e055e38f8a530a99fcc865b2bf9f2e14b9 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Mon, 18 Mar 2024 15:16:00 +0100 Subject: [PATCH 005/172] Generate reports per lane, group and rundir --- conf/modules.config | 20 ++++++- main.nf | 5 +- workflows/seqinspector.nf | 120 ++++++++++++++++++++++++++++++++++---- 3 files changed, 133 insertions(+), 12 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index e3ea8fa6..296ca786 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -35,7 +35,25 @@ process { publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { + filename -> + switch (filename) { + case 'versions.yml': + null + break + case ~/L\d+_multiqc_(report\.html|plots|data)/: + "lanes/L${(filename =~ /L(\d+)_multiqc_(report\.html|plots|data)/)[0][1]}/${filename}" + break + case ~/G-.+_multiqc_(report\.html|plots|data)/: + "groups/G-${(filename =~ /G-(.+)_multiqc_(report\.html|plots|data)/)[0][1]}/${filename}" + break + case ~/D-.+_multiqc_(report\.html|plots|data)/: + "rundirs/D-${(filename =~ /D-(.+)_multiqc_(report\.html|plots|data)/)[0][1]}/${filename}" + break + default: + filename + } + } ] } diff --git a/main.nf b/main.nf index 1e9c2ede..58afd1fe 100644 --- a/main.nf +++ b/main.nf @@ -58,7 +58,10 @@ workflow NFCORE_SEQINSPECTOR { ) emit: - multiqc_report = SEQINSPECTOR.out.multiqc_report // channel: /path/to/multiqc_report.html + global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html + lane_reports = SEQINSPECTOR.out.lane_reports // channel: /path/to/multiqc_report.html + group_report = SEQINSPECTOR.out.group_reports // channel: /path/to/multiqc_report.html + rundir_report = SEQINSPECTOR.out.rundir_reports // channel: /path/to/multiqc_report.html } /* diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 9ae3384b..b6fd3bc4 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -5,7 +5,10 @@ */ include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_LANE } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_GROUP } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_RUNDIR } from '../modules/nf-core/multiqc/main' include { paramsSummaryMap } from 'plugin/nf-validation' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' @@ -26,6 +29,8 @@ workflow SEQINSPECTOR { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() + ch_multiqc_extra_files = Channel.empty() + ch_multiqc_reports = Channel.empty() // // MODULE: Run FastQC @@ -33,7 +38,7 @@ workflow SEQINSPECTOR { FASTQC ( ch_samplesheet ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) // @@ -46,26 +51,121 @@ workflow SEQINSPECTOR { // // MODULE: MultiQC // - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + ch_multiqc_config = params.multiqc_config ? + Channel.fromPath(params.multiqc_config, checkIfExists: true) : + Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) + ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_collated_versions) + ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) MULTIQC ( - ch_multiqc_files.collect(), + ch_multiqc_files + .map { meta, file -> file } + .mix(ch_multiqc_extra_files) + .collect(), ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), + Channel.empty().toList(), + ch_multiqc_logo.toList() + ) + + multiqc_extra_files = ch_multiqc_extra_files.toList() + + // Generate reports by lane + lane_mqc_files = ch_multiqc_files + .map { meta, sample -> [ "L${meta.lane}", meta, sample ] } + .groupTuple() + .tap { mqc_by_lane } + .collectFile{ + lane, meta, samples -> [ + "${lane}_multiqc_extra_config.yml", + "output_fn_name: \"${lane}_multiqc_report.html\"\ndata_dir_name: \"${lane}_multiqc_data\"\nplots_dir_name: \"${lane}_multiqc_plots\"" + ] + } + .map { file -> def fileparts = file.name.split("_") + [ fileparts[0], file ] + } + .join(mqc_by_lane) + .multiMap { lane, config, meta , samples_per_lane -> + samples_per_lane: samples_per_lane + config: config + } + + MULTIQC_PER_LANE( + lane_mqc_files.samples_per_lane + .map { samples -> samples + multiqc_extra_files.value }, + ch_multiqc_config.toList(), + lane_mqc_files.config, + ch_multiqc_logo.toList() + ) + + // Generate reports by group + group_mqc_files = ch_multiqc_files + .filter { meta, sample -> meta.group } + .map { meta, sample -> [ "G-${meta.group}", meta, sample ] } + .groupTuple() + .tap { mqc_by_group } + .collectFile{ + group, meta, samples -> [ + "${group}_multiqc_extra_config.yml", + "output_fn_name: \"${group}_multiqc_report.html\"\ndata_dir_name: \"${group}_multiqc_data\"\nplots_dir_name: \"${group}_multiqc_plots\"" + ] + } + .map { file -> def fileparts = file.name.split("_") + [ fileparts[0], file ] + } + .join(mqc_by_group) + .multiMap { group, config, meta , samples_per_group -> + samples_per_group: samples_per_group + config: config + } + + MULTIQC_PER_GROUP( + group_mqc_files.samples_per_group + .map { samples -> samples + multiqc_extra_files.value }, + ch_multiqc_config.toList(), + group_mqc_files.config, + ch_multiqc_logo.toList() + ) + + // Generate reports by rundir + rundir_mqc_files = ch_multiqc_files + .filter { meta, sample -> meta.rundir } + .map { meta, sample -> [ "D-${meta.rundir.name}", meta, sample ] } + .groupTuple() + .tap { mqc_by_rundir } + .collectFile{ + rundir, meta, samples -> [ + "${rundir}_multiqc_extra_config.yml", + "output_fn_name: \"${rundir}_multiqc_report.html\"\ndata_dir_name: \"${rundir}_multiqc_data\"\nplots_dir_name: \"${rundir}_multiqc_plots\"" + ] + } + .map { file -> def fileparts = file.name.split("_") + [ fileparts[0], file ] + } + .join(mqc_by_rundir) + .multiMap { rundir, config, meta , samples_per_rundir -> + samples_per_rundir: samples_per_rundir + config: config + } + + MULTIQC_PER_RUNDIR( + rundir_mqc_files.samples_per_rundir + .map { samples -> samples + multiqc_extra_files.value }, + ch_multiqc_config.toList(), + rundir_mqc_files.config, ch_multiqc_logo.toList() ) emit: - multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + global_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + lane_reports = MULTIQC_PER_LANE.out.report.toList() // channel: [ /path/to/multiqc_report.html ] + group_reports = MULTIQC_PER_GROUP.out.report.toList() // channel: [ /path/to/multiqc_report.html ] + rundir_reports = MULTIQC_PER_RUNDIR.out.report.toList() // channel: [ /path/to/multiqc_report.html ] versions = ch_versions // channel: [ path(versions.yml) ] } From 0da58701e706f23001df33bf997bf671ea37a787 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Thu, 28 Mar 2024 16:21:45 +0100 Subject: [PATCH 006/172] Improve formatting --- conf/modules.config | 2 +- main.nf | 6 ++-- workflows/seqinspector.nf | 71 +++++++++++++++++++++++++-------------- 3 files changed, 50 insertions(+), 29 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 296ca786..8b7a9c69 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -52,7 +52,7 @@ process { break default: filename - } + } } ] } diff --git a/main.nf b/main.nf index 58afd1fe..1ee043e9 100644 --- a/main.nf +++ b/main.nf @@ -58,9 +58,9 @@ workflow NFCORE_SEQINSPECTOR { ) emit: - global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html - lane_reports = SEQINSPECTOR.out.lane_reports // channel: /path/to/multiqc_report.html - group_report = SEQINSPECTOR.out.group_reports // channel: /path/to/multiqc_report.html + global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html + lane_reports = SEQINSPECTOR.out.lane_reports // channel: /path/to/multiqc_report.html + group_report = SEQINSPECTOR.out.group_reports // channel: /path/to/multiqc_report.html rundir_report = SEQINSPECTOR.out.rundir_reports // channel: /path/to/multiqc_report.html } diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index b6fd3bc4..f0a269a0 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -4,15 +4,17 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { MULTIQC as MULTIQC_PER_LANE } from '../modules/nf-core/multiqc/main' -include { MULTIQC as MULTIQC_PER_GROUP } from '../modules/nf-core/multiqc/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' + +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_LANE } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_GROUP } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_RUNDIR } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-validation' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqinspector_pipeline' + +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqinspector_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -27,10 +29,10 @@ workflow SEQINSPECTOR { main: - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() ch_multiqc_extra_files = Channel.empty() - ch_multiqc_reports = Channel.empty() + ch_multiqc_reports = Channel.empty() // // MODULE: Run FastQC @@ -45,23 +47,42 @@ workflow SEQINSPECTOR { // Collate and save software versions // softwareVersionsToYAML(ch_versions) - .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true) - .set { ch_collated_versions } + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_pipeline_software_mqc_versions.yml', + sort: true, + newLine: true + ).set { ch_collated_versions } // // MODULE: MultiQC // - ch_multiqc_config = params.multiqc_config ? + ch_multiqc_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_collated_versions) - ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) + ch_multiqc_logo = params.multiqc_logo ? + Channel.fromPath(params.multiqc_logo, checkIfExists: true) : + Channel.empty() + + summary_params = paramsSummaryMap( + workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value( + paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? + file(params.multiqc_methods_description, checkIfExists: true) : + file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_extra_files = ch_multiqc_extra_files.mix( + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_extra_files = ch_multiqc_extra_files.mix(ch_collated_versions) + ch_multiqc_extra_files = ch_multiqc_extra_files.mix( + ch_methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: false + ) + ) MULTIQC ( ch_multiqc_files @@ -163,10 +184,10 @@ workflow SEQINSPECTOR { emit: global_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html - lane_reports = MULTIQC_PER_LANE.out.report.toList() // channel: [ /path/to/multiqc_report.html ] - group_reports = MULTIQC_PER_GROUP.out.report.toList() // channel: [ /path/to/multiqc_report.html ] + lane_reports = MULTIQC_PER_LANE.out.report.toList() // channel: [ /path/to/multiqc_report.html ] + group_reports = MULTIQC_PER_GROUP.out.report.toList() // channel: [ /path/to/multiqc_report.html ] rundir_reports = MULTIQC_PER_RUNDIR.out.report.toList() // channel: [ /path/to/multiqc_report.html ] - versions = ch_versions // channel: [ path(versions.yml) ] + versions = ch_versions // channel: [ path(versions.yml) ] } /* From d233d8f4052208b4e572ddb1e8d561387df1cce5 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Thu, 28 Mar 2024 16:56:51 +0100 Subject: [PATCH 007/172] Improve output sorting --- conf/modules.config | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 8b7a9c69..3b2fc025 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -42,13 +42,25 @@ process { null break case ~/L\d+_multiqc_(report\.html|plots|data)/: - "lanes/L${(filename =~ /L(\d+)_multiqc_(report\.html|plots|data)/)[0][1]}/${filename}" + def lane = (filename =~ /L(\d+)_multiqc_(report\.html|plots|data)/)[0][1] + def new_filename = filename.replaceFirst( + "(?.*)L${lane}_(?multiqc_(report\\.html|plots|data).*)", + '${prefix}${suffix}') + "lanes/L${lane}/${new_filename}" break case ~/G-.+_multiqc_(report\.html|plots|data)/: - "groups/G-${(filename =~ /G-(.+)_multiqc_(report\.html|plots|data)/)[0][1]}/${filename}" + def group = (filename =~ /G-(.+)_multiqc_(report\.html|plots|data)/)[0][1] + def new_filename = filename.replaceFirst( + "(?.*)G-${group}_(?multiqc_(report\\.html|plots|data).*)", + '${prefix}${suffix}') + "groups/${group}/${new_filename}" break case ~/D-.+_multiqc_(report\.html|plots|data)/: - "rundirs/D-${(filename =~ /D-(.+)_multiqc_(report\.html|plots|data)/)[0][1]}/${filename}" + def rundir = (filename =~ /D-(.+)_multiqc_(report\.html|plots|data)/)[0][1] + def new_filename = filename.replaceFirst( + "(?.*)D-${rundir}_(?multiqc_(report\\.html|plots|data).*)", + '${prefix}${suffix}') + "rundirs/${rundir}/${new_filename}" break default: filename From 307e43c6a21c77ad30c8f02997c3220568d35fb2 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Mon, 8 Apr 2024 14:11:47 +0200 Subject: [PATCH 008/172] Use `group` instead of `project` --- assets/schema_input.json | 4 ++-- subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 9fb321b5..1648944f 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -19,11 +19,11 @@ "errorMessage": "Lane ID must be a number", "meta": ["lane"] }, - "project": { + "group": { "type": "string", "pattern": "^\\S+$", "errorMessage": "Project ID cannot contain spaces", - "meta": ["project"] + "meta": ["group"] }, "fastq_1": { "type": "string", diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index cdbb6400..e1f8e4de 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -84,7 +84,7 @@ workflow PIPELINE_INITIALISATION { .fromSamplesheet("input") // Validates samplesheet against $projectDir/assets/schema_input.json. Path to validation schema is defined by $projectDir/nextflow_schema.json .map { meta, fastq_1, fastq_2 -> - def id_string = "${meta.sample}_${meta.project ?: "ungrouped"}_${meta.lane}" + def id_string = "${meta.sample}_${meta.group ?: "ungrouped"}_${meta.lane}" def updated_meta = meta + [ id: id_string ] if (!fastq_2) { return [ updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] From 627cf940a2a6b5f7cca7cbb692af1cf74293a289 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Mon, 8 Apr 2024 14:21:59 +0200 Subject: [PATCH 009/172] Fix output channel --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 1ee043e9..9de4f94e 100644 --- a/main.nf +++ b/main.nf @@ -104,7 +104,7 @@ workflow { params.outdir, params.monochrome_logs, params.hook_url, - NFCORE_SEQINSPECTOR.out.multiqc_report + NFCORE_SEQINSPECTOR.out.global_report, ) } From c9ba02866dc35a1b345476645ece7f9a6e48cc8b Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Mon, 8 Apr 2024 14:22:17 +0200 Subject: [PATCH 010/172] Fix linting --- conf/modules.config | 2 +- .../local/utils_nfcore_seqinspector_pipeline/main.nf | 1 - workflows/seqinspector.nf | 6 +++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 3b2fc025..44a35137 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -36,7 +36,7 @@ process { path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, saveAs: { - filename -> + filename -> switch (filename) { case 'versions.yml': null diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index e1f8e4de..9001400e 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -101,7 +101,6 @@ workflow PIPELINE_INITIALISATION { // meta, fastqs -> // return [ meta, fastqs.flatten() ] // } - .view() .set { ch_samplesheet } emit: diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index f0a269a0..815bc40f 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -108,7 +108,7 @@ workflow SEQINSPECTOR { ] } .map { file -> def fileparts = file.name.split("_") - [ fileparts[0], file ] + [ fileparts[0], file ] } .join(mqc_by_lane) .multiMap { lane, config, meta , samples_per_lane -> @@ -137,7 +137,7 @@ workflow SEQINSPECTOR { ] } .map { file -> def fileparts = file.name.split("_") - [ fileparts[0], file ] + [ fileparts[0], file ] } .join(mqc_by_group) .multiMap { group, config, meta , samples_per_group -> @@ -166,7 +166,7 @@ workflow SEQINSPECTOR { ] } .map { file -> def fileparts = file.name.split("_") - [ fileparts[0], file ] + [ fileparts[0], file ] } .join(mqc_by_rundir) .multiMap { rundir, config, meta , samples_per_rundir -> From 2cfc91d449bac7c1a6d22d125fd57e3d8592a51b Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Mon, 8 Apr 2024 15:48:41 +0200 Subject: [PATCH 011/172] Give credits back to NGI --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5e2bc4ca..abe0d4ef 100644 --- a/README.md +++ b/README.md @@ -80,11 +80,11 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/seqinspector was originally written by Adrien Coulier. +nf-core/seqinspector was originally written by the Swedish [@NationalGenomicsInfrastructure](https://github.com/NationalGenomicsInfrastructure/). We thank the following people for their extensive assistance in the development of this pipeline: - +- [@mahesh-panchal](https://github.com/mahesh-panchal) ## Contributions and Support From fbfb02dbba5945fcfcf8d431cccd7a230eea5ff5 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 9 Apr 2024 09:26:32 +0200 Subject: [PATCH 012/172] Fix file names --- conf/modules.config | 18 +++++++++--------- workflows/seqinspector.nf | 24 ++++++++++++++++++------ 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 44a35137..b2e48f3d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -41,24 +41,24 @@ process { case 'versions.yml': null break - case ~/L\d+_multiqc_(report\.html|plots|data)/: - def lane = (filename =~ /L(\d+)_multiqc_(report\.html|plots|data)/)[0][1] + case ~/\[LANE:\d+\]_multiqc_(report\.html|plots|data)/: + def lane = (filename =~ /\[LANE:(\d+)\]_multiqc_(report\.html|plots|data)/)[0][1] def new_filename = filename.replaceFirst( - "(?.*)L${lane}_(?multiqc_(report\\.html|plots|data).*)", + "(?.*)\\[LANE:${lane}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') "lanes/L${lane}/${new_filename}" break - case ~/G-.+_multiqc_(report\.html|plots|data)/: - def group = (filename =~ /G-(.+)_multiqc_(report\.html|plots|data)/)[0][1] + case ~/\[GROUP:.+\]_multiqc_(report\.html|plots|data)/: + def group = (filename =~ /\[GROUP:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] def new_filename = filename.replaceFirst( - "(?.*)G-${group}_(?multiqc_(report\\.html|plots|data).*)", + "(?.*)\\[GROUP:${group}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') "groups/${group}/${new_filename}" break - case ~/D-.+_multiqc_(report\.html|plots|data)/: - def rundir = (filename =~ /D-(.+)_multiqc_(report\.html|plots|data)/)[0][1] + case ~/\[RUNDIR:.+\]_multiqc_(report\.html|plots|data)/: + def rundir = (filename =~ /\[RUNDIR:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] def new_filename = filename.replaceFirst( - "(?.*)D-${rundir}_(?multiqc_(report\\.html|plots|data).*)", + "(?.*)\\[RUNDIR:${rundir}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') "rundirs/${rundir}/${new_filename}" break diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 815bc40f..2acafc3f 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -98,13 +98,17 @@ workflow SEQINSPECTOR { // Generate reports by lane lane_mqc_files = ch_multiqc_files - .map { meta, sample -> [ "L${meta.lane}", meta, sample ] } + .map { meta, sample -> [ "[LANE:${meta.lane}]", meta, sample ] } .groupTuple() .tap { mqc_by_lane } .collectFile{ lane, meta, samples -> [ "${lane}_multiqc_extra_config.yml", - "output_fn_name: \"${lane}_multiqc_report.html\"\ndata_dir_name: \"${lane}_multiqc_data\"\nplots_dir_name: \"${lane}_multiqc_plots\"" + """ + |output_fn_name: \"${lane}_multiqc_report.html\" + |data_dir_name: \"${lane}_multiqc_data\" + |plots_dir_name: \"${lane}_multiqc_plots\" + """.stripMargin() ] } .map { file -> def fileparts = file.name.split("_") @@ -127,13 +131,17 @@ workflow SEQINSPECTOR { // Generate reports by group group_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.group } - .map { meta, sample -> [ "G-${meta.group}", meta, sample ] } + .map { meta, sample -> [ "[GROUP:${meta.group}]", meta, sample ] } .groupTuple() .tap { mqc_by_group } .collectFile{ group, meta, samples -> [ "${group}_multiqc_extra_config.yml", - "output_fn_name: \"${group}_multiqc_report.html\"\ndata_dir_name: \"${group}_multiqc_data\"\nplots_dir_name: \"${group}_multiqc_plots\"" + """ + |output_fn_name: \"${group}_multiqc_report.html\" + |data_dir_name: \"${group}_multiqc_data\" + |plots_dir_name: \"${group}_multiqc_plots\" + """.stripMargin() ] } .map { file -> def fileparts = file.name.split("_") @@ -156,13 +164,17 @@ workflow SEQINSPECTOR { // Generate reports by rundir rundir_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.rundir } - .map { meta, sample -> [ "D-${meta.rundir.name}", meta, sample ] } + .map { meta, sample -> [ "[RUNDIR:${meta.rundir.name}]", meta, sample ] } .groupTuple() .tap { mqc_by_rundir } .collectFile{ rundir, meta, samples -> [ "${rundir}_multiqc_extra_config.yml", - "output_fn_name: \"${rundir}_multiqc_report.html\"\ndata_dir_name: \"${rundir}_multiqc_data\"\nplots_dir_name: \"${rundir}_multiqc_plots\"" + """ + |output_fn_name: \"${rundir}_multiqc_report.html\" + |data_dir_name: \"${rundir}_multiqc_data\" + |plots_dir_name: \"${rundir}_multiqc_plots\" + """.stripMargin() ] } .map { file -> def fileparts = file.name.split("_") From 8a19929d03832ecaf95a50962898882de04e6884 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Fri, 3 May 2024 09:50:42 +0200 Subject: [PATCH 013/172] Set up tests --- .gitignore | 1 + nf-test.config | 8 ++++++++ tests/main.nf.test | 20 +++++++++++++++++++ tests/nextflow.config | 5 +++++ tests/workflows/seqinspector.nf.test | 30 ++++++++++++++++++++++++++++ 5 files changed, 64 insertions(+) create mode 100644 nf-test.config create mode 100644 tests/main.nf.test create mode 100644 tests/nextflow.config create mode 100644 tests/workflows/seqinspector.nf.test diff --git a/.gitignore b/.gitignore index 5124c9ac..089a4079 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ results/ testing/ testing* *.pyc +.nf-test diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 00000000..6969c085 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,8 @@ +config { + + testsDir "tests" + workDir ".nf-test" + configFile "tests/nextflow.config" + profile "test,docker" + +} diff --git a/tests/main.nf.test b/tests/main.nf.test new file mode 100644 index 00000000..72498b5b --- /dev/null +++ b/tests/main.nf.test @@ -0,0 +1,20 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "main.nf" + + test("Should run without failures") { + + when { + params { + outdir = "tests/results" + } + } + + then { + assert workflow.success + } + + } + +} diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 00000000..c19b1ad0 --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,5 @@ +/* +======================================================================================== + Nextflow config file for running tests +======================================================================================== +*/ diff --git a/tests/workflows/seqinspector.nf.test b/tests/workflows/seqinspector.nf.test new file mode 100644 index 00000000..bbb529dd --- /dev/null +++ b/tests/workflows/seqinspector.nf.test @@ -0,0 +1,30 @@ +nextflow_workflow { + + name "Test Workflow SEQINSPECTOR" + script "workflows/seqinspector.nf" + workflow "SEQINSPECTOR" + + test("Should run without failures pipeline") { + + when { + params { + // define parameters here. Example: + // outdir = "tests/results" + } + workflow { + """ + // define inputs of the workflow here. Example: + // input[0] = file("https://raw.githubusercontent.com/nf-core/test-datasets/e47966b63444ec0fcdef23bfc410eeca22535ac7/testdata/MiSeq/samplesheet.csv") + input[0] = file("assets/samplesheet.csv") + """ + } + } + + then { + assert workflow.success + assert snapshot(workflow.out).match() + } + + } + +} From eba628ef9389a13214e6c9663965d6ea56d7e994 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 3 May 2024 12:18:42 +0200 Subject: [PATCH 014/172] point test conf upstream --- conf/test.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index 38e9ee32..2c26a9c1 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,7 +22,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/KarNair/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv' // Genome references genome = 'R64-1-1' From 23f69d1302457fc32295c3c77bd41d443bb96d9e Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 3 May 2024 13:05:47 +0200 Subject: [PATCH 015/172] project -> group --- assets/samplesheet.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index fbe5de2d..fef3b4e6 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,3 @@ -sample,lane,project,fastq_1,fastq_2,rundir +sample,lane,group,fastq_1,fastq_2,rundir SAMPLE_PAIRED_END,1,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir SAMPLE_SINGLE_END,2,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir From 1ebf3f1adc86f3c50020a56f95430b06f3a5000a Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 3 May 2024 13:06:48 +0200 Subject: [PATCH 016/172] project -> group --- assets/schema_input.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 1648944f..7115bfab 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -22,7 +22,7 @@ "group": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Project ID cannot contain spaces", + "errorMessage": "Group ID cannot contain spaces", "meta": ["group"] }, "fastq_1": { From b0bf471667c0547654afd14d668e8f70aab55185 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 3 May 2024 13:53:31 +0200 Subject: [PATCH 017/172] make lane non-compulsory --- assets/schema_input.json | 2 +- workflows/seqinspector.nf | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 7115bfab..c9800d5d 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -47,7 +47,7 @@ "meta": ["rundir"] } }, - "required": ["sample", "lane", "fastq_1"], + "required": ["sample", "fastq_1"], "dependentRequired": { "fastq_2": ["fastq_1"] } diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 2acafc3f..018e079d 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -98,6 +98,7 @@ workflow SEQINSPECTOR { // Generate reports by lane lane_mqc_files = ch_multiqc_files + .filter { meta, sample -> meta.lane } .map { meta, sample -> [ "[LANE:${meta.lane}]", meta, sample ] } .groupTuple() .tap { mqc_by_lane } From 9e0eca39baf5b58760ec4117b4faefde4a512f09 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 3 May 2024 15:59:10 +0200 Subject: [PATCH 018/172] remove unused file --- assets/samplesheet.csv | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 assets/samplesheet.csv diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv deleted file mode 100644 index fef3b4e6..00000000 --- a/assets/samplesheet.csv +++ /dev/null @@ -1,3 +0,0 @@ -sample,lane,group,fastq_1,fastq_2,rundir -SAMPLE_PAIRED_END,1,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir -SAMPLE_SINGLE_END,2,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir From 98e60bf160cb6a4476d55b38a44095e67f79eeea Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 3 May 2024 16:38:52 +0200 Subject: [PATCH 019/172] revamp nf-test, run once for each sequencing platform --- tests/MiSeq.main.nf.test | 23 +++++++++++++++++++++ tests/NovaSeq6000.main.nf.test | 23 +++++++++++++++++++++ tests/PromethION.main.nf.test | 23 +++++++++++++++++++++ tests/main.nf.test | 20 ------------------- tests/nextflow.config | 11 ++++++++++ tests/workflows/seqinspector.nf.test | 30 ---------------------------- 6 files changed, 80 insertions(+), 50 deletions(-) create mode 100644 tests/MiSeq.main.nf.test create mode 100644 tests/NovaSeq6000.main.nf.test create mode 100644 tests/PromethION.main.nf.test delete mode 100644 tests/main.nf.test delete mode 100644 tests/workflows/seqinspector.nf.test diff --git a/tests/MiSeq.main.nf.test b/tests/MiSeq.main.nf.test new file mode 100644 index 00000000..0246a75b --- /dev/null +++ b/tests/MiSeq.main.nf.test @@ -0,0 +1,23 @@ +nextflow_pipeline { + + name "Test Workflow main.nf on MiSeq data" + script "../main.nf" + tag "seqinspector" + tag "PIPELINE" + + test("MiSeq data test") { + + when { + params { + outdir = "tests/results/MiSeq" + input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv" + } + } + + then { + assert workflow.success + } + + } + +} diff --git a/tests/NovaSeq6000.main.nf.test b/tests/NovaSeq6000.main.nf.test new file mode 100644 index 00000000..7c410e57 --- /dev/null +++ b/tests/NovaSeq6000.main.nf.test @@ -0,0 +1,23 @@ +nextflow_pipeline { + + name "Test Workflow main.nf on NovaSeq6000 data" + script "../main.nf" + tag "seqinspector" + tag "PIPELINE" + + test("NovaSeq6000 data test") { + + when { + params { + outdir = "tests/results/NovaSeq6000" + input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/NovaSeq6000/samplesheet.csv" + } + } + + then { + assert workflow.success + } + + } + +} diff --git a/tests/PromethION.main.nf.test b/tests/PromethION.main.nf.test new file mode 100644 index 00000000..9987c953 --- /dev/null +++ b/tests/PromethION.main.nf.test @@ -0,0 +1,23 @@ +nextflow_pipeline { + + name "Test Workflow main.nf on PromethION data" + script "../main.nf" + tag "seqinspector" + tag "PIPELINE" + + test("PromethION data test") { + + when { + params { + outdir = "tests/results/PromethION" + input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/PromethION/samplesheet.csv" + } + } + + then { + assert workflow.success + } + + } + +} diff --git a/tests/main.nf.test b/tests/main.nf.test deleted file mode 100644 index 72498b5b..00000000 --- a/tests/main.nf.test +++ /dev/null @@ -1,20 +0,0 @@ -nextflow_pipeline { - - name "Test Workflow main.nf" - script "main.nf" - - test("Should run without failures") { - - when { - params { - outdir = "tests/results" - } - } - - then { - assert workflow.success - } - - } - -} diff --git a/tests/nextflow.config b/tests/nextflow.config index c19b1ad0..422545be 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -3,3 +3,14 @@ Nextflow config file for running tests ======================================================================================== */ + +params { + config_profile_name = 'nf-test profile' + config_profile_description = 'Configuration profile to use for nf-test.' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '3.GB' + max_time = '2.h' + +} diff --git a/tests/workflows/seqinspector.nf.test b/tests/workflows/seqinspector.nf.test deleted file mode 100644 index bbb529dd..00000000 --- a/tests/workflows/seqinspector.nf.test +++ /dev/null @@ -1,30 +0,0 @@ -nextflow_workflow { - - name "Test Workflow SEQINSPECTOR" - script "workflows/seqinspector.nf" - workflow "SEQINSPECTOR" - - test("Should run without failures pipeline") { - - when { - params { - // define parameters here. Example: - // outdir = "tests/results" - } - workflow { - """ - // define inputs of the workflow here. Example: - // input[0] = file("https://raw.githubusercontent.com/nf-core/test-datasets/e47966b63444ec0fcdef23bfc410eeca22535ac7/testdata/MiSeq/samplesheet.csv") - input[0] = file("assets/samplesheet.csv") - """ - } - } - - then { - assert workflow.success - assert snapshot(workflow.out).match() - } - - } - -} From d95c660d8fbdba413920d45a7f7025e9261989c0 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Mon, 6 May 2024 15:54:05 +0200 Subject: [PATCH 020/172] Update modules and subworkflows --- modules.json | 4 ++-- modules/nf-core/fastqc/main.nf | 6 ++++++ subworkflows/nf-core/utils_nfcore_pipeline/main.nf | 8 +++++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/modules.json b/modules.json index ebbc5dc2..87fe816c 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "fastqc": { "branch": "master", - "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c", + "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", "installed_by": ["modules"] }, "multiqc": { @@ -26,7 +26,7 @@ }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 9e19a74c..d79f1c86 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -25,6 +25,11 @@ process FASTQC { def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } def rename_to = old_new_pairs*.join(' ').join(' ') def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) + """ printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name @@ -33,6 +38,7 @@ process FASTQC { fastqc \\ $args \\ --threads $task.cpus \\ + --memory $fastqc_memory \\ $renamed_files cat <<-END_VERSIONS > versions.yml diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index a8b55d6f..14558c39 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -65,9 +65,15 @@ def checkProfileProvided(nextflow_cli_args) { // Citation string for pipeline // def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + - " ${workflow.manifest.doi}\n\n" + + temp_doi_ref + "\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + From e0527ccb6c235fcb01f5fa26e1b192e705b3f873 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 7 May 2024 16:45:09 +0200 Subject: [PATCH 021/172] Fix multiqc extra files Issue with the previous implementation was that sometimes MULTIQC_PER_LANE would execute before the extra files were collected into `ch_multiqc_extra_files`, causing `null` to be added to the list of files passed to multiqc. --- workflows/seqinspector.nf | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 018e079d..65343a8b 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -94,11 +94,17 @@ workflow SEQINSPECTOR { ch_multiqc_logo.toList() ) - multiqc_extra_files = ch_multiqc_extra_files.toList() - // Generate reports by lane + multiqc_extra_files_per_lane = ch_multiqc_files + .filter { meta, sample -> meta.lane } + .map { meta, sample -> meta.lane } + .unique() + .map { lane -> [lane:lane] } + .cross(ch_multiqc_extra_files) + lane_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.lane } + .mix(multiqc_extra_files_per_lane) .map { meta, sample -> [ "[LANE:${meta.lane}]", meta, sample ] } .groupTuple() .tap { mqc_by_lane } @@ -122,16 +128,23 @@ workflow SEQINSPECTOR { } MULTIQC_PER_LANE( - lane_mqc_files.samples_per_lane - .map { samples -> samples + multiqc_extra_files.value }, + lane_mqc_files.samples_per_lane, ch_multiqc_config.toList(), lane_mqc_files.config, ch_multiqc_logo.toList() ) // Generate reports by group + multiqc_extra_files_per_group = ch_multiqc_files + .filter { meta, sample -> meta.group } + .map { meta, sample -> meta.group } + .unique() + .map { group -> [group:group] } + .cross(ch_multiqc_extra_files) + group_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.group } + .mix(multiqc_extra_files_per_group) .map { meta, sample -> [ "[GROUP:${meta.group}]", meta, sample ] } .groupTuple() .tap { mqc_by_group } @@ -155,16 +168,23 @@ workflow SEQINSPECTOR { } MULTIQC_PER_GROUP( - group_mqc_files.samples_per_group - .map { samples -> samples + multiqc_extra_files.value }, + group_mqc_files.samples_per_group, ch_multiqc_config.toList(), group_mqc_files.config, ch_multiqc_logo.toList() ) // Generate reports by rundir + multiqc_extra_files_per_rundir = ch_multiqc_files + .filter { meta, sample -> meta.rundir } + .map { meta, sample -> meta.rundir } + .unique() + .map { rundir -> [rundir:rundir] } + .cross(ch_multiqc_extra_files) + rundir_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.rundir } + .mix(multiqc_extra_files_per_rundir) .map { meta, sample -> [ "[RUNDIR:${meta.rundir.name}]", meta, sample ] } .groupTuple() .tap { mqc_by_rundir } @@ -188,8 +208,7 @@ workflow SEQINSPECTOR { } MULTIQC_PER_RUNDIR( - rundir_mqc_files.samples_per_rundir - .map { samples -> samples + multiqc_extra_files.value }, + rundir_mqc_files.samples_per_rundir, ch_multiqc_config.toList(), rundir_mqc_files.config, ch_multiqc_logo.toList() From 42159a132e7f73f2594fcb0e66ca26702311cefb Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 10:29:17 +0200 Subject: [PATCH 022/172] Add test snapshots --- tests/MiSeq.main.nf.test | 25 +++++++++++++++--- tests/MiSeq.main.nf.test.snap | 19 ++++++++++++++ tests/NovaSeq6000.main.nf.test | 40 ++++++++++++++++++++++++++--- tests/NovaSeq6000.main.nf.test.snap | 31 ++++++++++++++++++++++ tests/PromethION.main.nf.test | 20 ++++++++++++--- tests/PromethION.main.nf.test.snap | 15 +++++++++++ workflows/seqinspector.nf | 18 +++++-------- 7 files changed, 144 insertions(+), 24 deletions(-) create mode 100644 tests/MiSeq.main.nf.test.snap create mode 100644 tests/NovaSeq6000.main.nf.test.snap create mode 100644 tests/PromethION.main.nf.test.snap diff --git a/tests/MiSeq.main.nf.test b/tests/MiSeq.main.nf.test index 0246a75b..1e72de95 100644 --- a/tests/MiSeq.main.nf.test +++ b/tests/MiSeq.main.nf.test @@ -9,15 +9,32 @@ nextflow_pipeline { when { params { - outdir = "tests/results/MiSeq" + outdir = "$outputDir" input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv" } } then { - assert workflow.success - } + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_software_versions.txt"), - } + path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_software_versions.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt"), + ).match() + } + ) + } + } } diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap new file mode 100644 index 00000000..e222b515 --- /dev/null +++ b/tests/MiSeq.main.nf.test.snap @@ -0,0 +1,19 @@ +{ + "MiSeq data test": { + "content": [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", + "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", + "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", + "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + ], + "timestamp": "2024-05-08T16:29:33.284003" + } +} \ No newline at end of file diff --git a/tests/NovaSeq6000.main.nf.test b/tests/NovaSeq6000.main.nf.test index 7c410e57..f5de9aa3 100644 --- a/tests/NovaSeq6000.main.nf.test +++ b/tests/NovaSeq6000.main.nf.test @@ -9,15 +9,47 @@ nextflow_pipeline { when { params { - outdir = "tests/results/NovaSeq6000" + outdir = "$outputDir" input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/NovaSeq6000/samplesheet.csv" } } then { - assert workflow.success - } + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_software_versions.txt"), - } + path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_software_versions.txt"), + path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt"), + ).match() + }, + ) + } + } } diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap new file mode 100644 index 00000000..a406b891 --- /dev/null +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "NovaSeq6000 data test": { + "content": [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", + "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,9b4fd8a6d6e8a9acabecd592f633472e", + "multiqc_general_stats.txt:md5,8237b88ceb018d3cb1edcea62d10f4a2", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,9246a5b6b7b0410c79049fc3dbd08e92", + "multiqc_general_stats.txt:md5,44328403f423c6f5ac9ee0a8a01e6725", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,84820276fae52d4d492831280ae6207c", + "multiqc_general_stats.txt:md5,dd07799e5e4b9d389f9de49a852c3363", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,59ae05d89453da6f57010ffb6466f902", + "multiqc_general_stats.txt:md5,e4629691992bfe639c01a84b90563334", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", + "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + ], + "timestamp": "2024-05-14T10:17:35.440827" + } +} \ No newline at end of file diff --git a/tests/PromethION.main.nf.test b/tests/PromethION.main.nf.test index 9987c953..de2beb35 100644 --- a/tests/PromethION.main.nf.test +++ b/tests/PromethION.main.nf.test @@ -9,15 +9,27 @@ nextflow_pipeline { when { params { - outdir = "tests/results/PromethION" + outdir = "$outputDir" input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/PromethION/samplesheet.csv" } } then { - assert workflow.success - } + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_software_versions.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt"), + ).match() + }, + ) + } } - } diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap new file mode 100644 index 00000000..24e52bd1 --- /dev/null +++ b/tests/PromethION.main.nf.test.snap @@ -0,0 +1,15 @@ +{ + "PromethION data test": { + "content": [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", + "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", + "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + ], + "timestamp": "2024-05-08T17:17:23.151259" + } +} \ No newline at end of file diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 65343a8b..3e2a08dc 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -100,7 +100,7 @@ workflow SEQINSPECTOR { .map { meta, sample -> meta.lane } .unique() .map { lane -> [lane:lane] } - .cross(ch_multiqc_extra_files) + .combine(ch_multiqc_extra_files) lane_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.lane } @@ -118,9 +118,7 @@ workflow SEQINSPECTOR { """.stripMargin() ] } - .map { file -> def fileparts = file.name.split("_") - [ fileparts[0], file ] - } + .map { file -> [ (file =~ /(\[LANE:.+\])/)[0][1], file ] } .join(mqc_by_lane) .multiMap { lane, config, meta , samples_per_lane -> samples_per_lane: samples_per_lane @@ -140,7 +138,7 @@ workflow SEQINSPECTOR { .map { meta, sample -> meta.group } .unique() .map { group -> [group:group] } - .cross(ch_multiqc_extra_files) + .combine(ch_multiqc_extra_files) group_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.group } @@ -158,9 +156,7 @@ workflow SEQINSPECTOR { """.stripMargin() ] } - .map { file -> def fileparts = file.name.split("_") - [ fileparts[0], file ] - } + .map { file -> [ (file =~ /(\[GROUP:.+\])/)[0][1], file ] } .join(mqc_by_group) .multiMap { group, config, meta , samples_per_group -> samples_per_group: samples_per_group @@ -180,7 +176,7 @@ workflow SEQINSPECTOR { .map { meta, sample -> meta.rundir } .unique() .map { rundir -> [rundir:rundir] } - .cross(ch_multiqc_extra_files) + .combine(ch_multiqc_extra_files) rundir_mqc_files = ch_multiqc_files .filter { meta, sample -> meta.rundir } @@ -198,9 +194,7 @@ workflow SEQINSPECTOR { """.stripMargin() ] } - .map { file -> def fileparts = file.name.split("_") - [ fileparts[0], file ] - } + .map { file -> [ (file =~ /(\[RUNDIR:.+\])/)[0][1], file ] } .join(mqc_by_rundir) .multiMap { rundir, config, meta , samples_per_rundir -> samples_per_rundir: samples_per_rundir From ef61f9f7e32f63235f0bf574a2fd89c1c119ffcf Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 11:08:13 +0200 Subject: [PATCH 023/172] Remove unused module configuration --- conf/base.config | 3 --- conf/modules.config | 8 -------- 2 files changed, 11 deletions(-) diff --git a/conf/base.config b/conf/base.config index 8d7dffc6..aab50f93 100644 --- a/conf/base.config +++ b/conf/base.config @@ -59,7 +59,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } } diff --git a/conf/modules.config b/conf/modules.config index b2e48f3d..da3d2ca2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -22,14 +22,6 @@ process { ext.args = '--quiet' } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - pattern: '*_versions.yml' - ] - } - withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ From 51b01e98ca9560154634b08d541475abeffbc35c Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 11:16:41 +0200 Subject: [PATCH 024/172] Update usage docs and restore example samplesheet --- assets/samplesheet.csv | 3 +++ docs/usage.md | 36 ++++++++++++------------------------ 2 files changed, 15 insertions(+), 24 deletions(-) create mode 100644 assets/samplesheet.csv diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv new file mode 100644 index 00000000..fef3b4e6 --- /dev/null +++ b/assets/samplesheet.csv @@ -0,0 +1,3 @@ +sample,lane,group,fastq_1,fastq_2,rundir +SAMPLE_PAIRED_END,1,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir +SAMPLE_SINGLE_END,2,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir diff --git a/docs/usage.md b/docs/usage.md index f926de73..df2be203 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -16,39 +16,27 @@ You will need to create a samplesheet with information about the samples you wou --input '[path to samplesheet file]' ``` -### Multiple runs of the same sample - -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: - -```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz -``` - ### Full samplesheet -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. - -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. - ```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +sample,lane,group,fastq_1,fastq_2,rundir +CONTROL_REP1,1,,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX +CONTROL_REP2,1,,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX +CONTROL_REP3,1,,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX +TREATMENT_REP1,2,GROUP1,AEG588A4_S4_L003_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX +TREATMENT_REP2,2,GROUP1,AEG588A5_S5_L003_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX +TREATMENT_REP3,2,GROUP2,AEG588A6_S6_L003_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX +TREATMENT_REP3,2,GROUP2,AEG588A6_S6_L004_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX ``` | Column | Description | | --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `lane` | Lane where the sample was processed on an Illumina instrument (optional). | +| `group` | Group the sample belongs too, useful when several groups are pooled together (optional). | +| `rundir` | Path to the runfolder containing extra information about the sequencing run (optional) . | | `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz" (optional). | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. From 7c7f31f715a40c1f5b061099c9cd945214875ab0 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 12:09:51 +0200 Subject: [PATCH 025/172] Update output docs --- docs/output.md | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index 77e4cc1c..f83e1fa0 100644 --- a/docs/output.md +++ b/docs/output.md @@ -6,8 +6,6 @@ This document describes the output produced by the pipeline. Most of the plots a The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: @@ -48,6 +46,31 @@ The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They m - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - `multiqc_plots/`: directory containing static images from the report in various formats. + - `lanes/` [1] + - `L1/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + - `L2/` + - ... + - `groups/` [1] + - `GROUPNAME1/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + - `GROUPNAME2/` + - ... + - `rundir/` [1] + - `RUNDIR1/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + - `RUNDIR2/` + - ... + + +[1] These files will only be generated if `lane`, `group` or `rundir` were specified for some samples. + From 1c4f6e07019fad57bce6d5d3473155b2f3b364b3 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 12:16:38 +0200 Subject: [PATCH 026/172] Update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 58a53ef4..6430addb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c ### `Added` +- [#13](https://github.com/nf-core/seqinspector/pull/13) Generate reports per run, per project and per lane. + ### `Fixed` ### `Dependencies` From 211bfafb21f6f8c6b208d33875ddc9e45552973a Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 12:20:34 +0200 Subject: [PATCH 027/172] Update samplesheet in readme file --- README.md | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index abe0d4ef..edc54ae5 100644 --- a/README.md +++ b/README.md @@ -39,26 +39,19 @@ > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - - Now, you can run the pipeline using: - - ```bash nextflow run nf-core/seqinspector \ -profile \ From df4e9cbcbd1f62990f44dd90d9eb0a2990d0c657 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 14 May 2024 12:43:27 +0200 Subject: [PATCH 028/172] Run prettier --- docs/output.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index f83e1fa0..7af7806c 100644 --- a/docs/output.md +++ b/docs/output.md @@ -68,10 +68,8 @@ The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They m - `RUNDIR2/` - ... - [1] These files will only be generated if `lane`, `group` or `rundir` were specified for some samples. - [MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. From 2303db9a4c28fe0b5a907ab73bc3946f3e3daa3e Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar <89784800+kedhammar@users.noreply.github.com> Date: Thu, 16 May 2024 16:00:32 +0200 Subject: [PATCH 029/172] Use testdata base path param in tests/MiSeq.main.nf.test Co-authored-by: Matthias Zepper <6963520+MatthiasZepper@users.noreply.github.com> --- tests/MiSeq.main.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/MiSeq.main.nf.test b/tests/MiSeq.main.nf.test index 1e72de95..a3dc0a5a 100644 --- a/tests/MiSeq.main.nf.test +++ b/tests/MiSeq.main.nf.test @@ -10,7 +10,7 @@ nextflow_pipeline { when { params { outdir = "$outputDir" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv" + input = params.pipelines_testdata_base_path + "seqinspector/testdata/MiSeq/samplesheet.csv" } } From 1ea8ac0dac6c0cb11e098beb3c516ba0bb6cdc49 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar <89784800+kedhammar@users.noreply.github.com> Date: Thu, 16 May 2024 16:01:00 +0200 Subject: [PATCH 030/172] Use testdata base path param in tests/NovaSeq6000.main.nf.test Co-authored-by: Matthias Zepper <6963520+MatthiasZepper@users.noreply.github.com> --- tests/NovaSeq6000.main.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/NovaSeq6000.main.nf.test b/tests/NovaSeq6000.main.nf.test index f5de9aa3..bc687abe 100644 --- a/tests/NovaSeq6000.main.nf.test +++ b/tests/NovaSeq6000.main.nf.test @@ -10,7 +10,7 @@ nextflow_pipeline { when { params { outdir = "$outputDir" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/NovaSeq6000/samplesheet.csv" + input = params.pipelines_testdata_base_path + "seqinspector/testdata/NovaSeq6000/samplesheet.csv" } } From 048765fd18409769442915dcd403da682dfd852f Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar <89784800+kedhammar@users.noreply.github.com> Date: Thu, 16 May 2024 16:01:20 +0200 Subject: [PATCH 031/172] Use testdata base path param in tests/PromethION.main.nf.test Co-authored-by: Matthias Zepper <6963520+MatthiasZepper@users.noreply.github.com> --- tests/PromethION.main.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/PromethION.main.nf.test b/tests/PromethION.main.nf.test index de2beb35..d1969f3e 100644 --- a/tests/PromethION.main.nf.test +++ b/tests/PromethION.main.nf.test @@ -10,7 +10,7 @@ nextflow_pipeline { when { params { outdir = "$outputDir" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/seqinspector/testdata/PromethION/samplesheet.csv" + input = params.pipelines_testdata_base_path + "seqinspector/testdata/PromethION/samplesheet.csv" } } From 4329bb97c229bcb4d869e3c2d88af08cab730b37 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 17 May 2024 11:38:29 +0200 Subject: [PATCH 032/172] Make the tests work with 'pipelines_testdata_base_path' parameter. --- tests/MiSeq.main.nf.test | 2 +- tests/MiSeq.main.nf.test.config | 7 +++++++ tests/NovaSeq6000.main.nf.test | 2 +- tests/NovaSeq6000.main.nf.test.config | 7 +++++++ tests/PromethION.main.nf.test | 2 +- tests/PromethION.main.nf.test.config | 7 +++++++ tests/nextflow.config | 5 +++++ 7 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 tests/MiSeq.main.nf.test.config create mode 100644 tests/NovaSeq6000.main.nf.test.config create mode 100644 tests/PromethION.main.nf.test.config diff --git a/tests/MiSeq.main.nf.test b/tests/MiSeq.main.nf.test index a3dc0a5a..bfabd954 100644 --- a/tests/MiSeq.main.nf.test +++ b/tests/MiSeq.main.nf.test @@ -8,9 +8,9 @@ nextflow_pipeline { test("MiSeq data test") { when { + config "./MiSeq.main.nf.test.config" params { outdir = "$outputDir" - input = params.pipelines_testdata_base_path + "seqinspector/testdata/MiSeq/samplesheet.csv" } } diff --git a/tests/MiSeq.main.nf.test.config b/tests/MiSeq.main.nf.test.config new file mode 100644 index 00000000..073a9774 --- /dev/null +++ b/tests/MiSeq.main.nf.test.config @@ -0,0 +1,7 @@ +// Load the basic test config +includeConfig 'nextflow.config' + +// Load the correct samplesheet for that test +params { + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/MiSeq/samplesheet.csv' +} diff --git a/tests/NovaSeq6000.main.nf.test b/tests/NovaSeq6000.main.nf.test index bc687abe..174e215d 100644 --- a/tests/NovaSeq6000.main.nf.test +++ b/tests/NovaSeq6000.main.nf.test @@ -8,9 +8,9 @@ nextflow_pipeline { test("NovaSeq6000 data test") { when { + config "./NovaSeq6000.main.nf.test.config" params { outdir = "$outputDir" - input = params.pipelines_testdata_base_path + "seqinspector/testdata/NovaSeq6000/samplesheet.csv" } } diff --git a/tests/NovaSeq6000.main.nf.test.config b/tests/NovaSeq6000.main.nf.test.config new file mode 100644 index 00000000..cad5edd9 --- /dev/null +++ b/tests/NovaSeq6000.main.nf.test.config @@ -0,0 +1,7 @@ +// Load the basic test config +includeConfig 'nextflow.config' + +// Load the correct samplesheet for that test +params { + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/NovaSeq6000/samplesheet.csv' +} diff --git a/tests/PromethION.main.nf.test b/tests/PromethION.main.nf.test index d1969f3e..39284786 100644 --- a/tests/PromethION.main.nf.test +++ b/tests/PromethION.main.nf.test @@ -8,9 +8,9 @@ nextflow_pipeline { test("PromethION data test") { when { + config "./PromethION.main.nf.test.config" params { outdir = "$outputDir" - input = params.pipelines_testdata_base_path + "seqinspector/testdata/PromethION/samplesheet.csv" } } diff --git a/tests/PromethION.main.nf.test.config b/tests/PromethION.main.nf.test.config new file mode 100644 index 00000000..e1498a49 --- /dev/null +++ b/tests/PromethION.main.nf.test.config @@ -0,0 +1,7 @@ +// Load the basic test config +includeConfig 'nextflow.config' + +// Load the correct samplesheet for that test +params { + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/PromethION/samplesheet.csv' +} diff --git a/tests/nextflow.config b/tests/nextflow.config index 422545be..8d9ef461 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -13,4 +13,9 @@ params { max_memory = '3.GB' max_time = '2.h' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + + validationSchemaIgnoreParams = 'genomes,igenomes_base,pipelines_testdata_base_path' + + } From 3ff850395b52b9ee509901f203c301fd40aa9844 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 17 May 2024 13:28:56 +0200 Subject: [PATCH 033/172] update snapshots, add nf-test.log to gitignore --- .gitignore | 1 + tests/MiSeq.main.nf.test.snap | 12 ++++++++---- tests/NovaSeq6000.main.nf.test.snap | 18 +++++++++++------- tests/PromethION.main.nf.test.snap | 10 +++++++--- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 089a4079..72277655 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ testing/ testing* *.pyc .nf-test +.nf-test.log diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap index e222b515..96896382 100644 --- a/tests/MiSeq.main.nf.test.snap +++ b/tests/MiSeq.main.nf.test.snap @@ -4,16 +4,20 @@ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921" ], - "timestamp": "2024-05-08T16:29:33.284003" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-17T12:59:21.531493" } } \ No newline at end of file diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap index a406b891..cbb75383 100644 --- a/tests/NovaSeq6000.main.nf.test.snap +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -4,28 +4,32 @@ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,9b4fd8a6d6e8a9acabecd592f633472e", "multiqc_general_stats.txt:md5,8237b88ceb018d3cb1edcea62d10f4a2", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,9246a5b6b7b0410c79049fc3dbd08e92", "multiqc_general_stats.txt:md5,44328403f423c6f5ac9ee0a8a01e6725", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,84820276fae52d4d492831280ae6207c", "multiqc_general_stats.txt:md5,dd07799e5e4b9d389f9de49a852c3363", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,59ae05d89453da6f57010ffb6466f902", "multiqc_general_stats.txt:md5,e4629691992bfe639c01a84b90563334", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921" ], - "timestamp": "2024-05-14T10:17:35.440827" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-17T13:02:20.874181" } } \ No newline at end of file diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap index 24e52bd1..951c5550 100644 --- a/tests/PromethION.main.nf.test.snap +++ b/tests/PromethION.main.nf.test.snap @@ -4,12 +4,16 @@ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921" ], - "timestamp": "2024-05-08T17:17:23.151259" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-17T12:58:02.572837" } } \ No newline at end of file From 8ac4d76770515159a23f746d4d3fa8c4d3e06ce1 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 17 May 2024 15:49:01 +0200 Subject: [PATCH 034/172] visualize example run dir corresponsing to samplesheet --- docs/usage.md | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 57314973..7a4ec735 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,7 +10,7 @@ ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. ```bash --input '[path to samplesheet file]' @@ -18,15 +18,25 @@ You will need to create a samplesheet with information about the samples you wou ### Full samplesheet +The following simple run dir structure... + +``` +run_dir +├── sample1_lane1_group1_r1.fq.gz +├── sample2_lane1_group1_r1.fq.gz +├── sample3_lane2_group2_r1.fq.gz +└── sample4_lane2_group3_r1.fq.gz +``` + +...would be represented in the following samplesheet (shown as .tsv for readability) + ```csv title="samplesheet.csv" -sample,lane,group,fastq_1,fastq_2,rundir -CONTROL_REP1,1,,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX -CONTROL_REP2,1,,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX -CONTROL_REP3,1,,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX -TREATMENT_REP1,2,GROUP1,AEG588A4_S4_L003_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX -TREATMENT_REP2,2,GROUP1,AEG588A5_S5_L003_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX -TREATMENT_REP3,2,GROUP2,AEG588A6_S6_L003_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX -TREATMENT_REP3,2,GROUP2,AEG588A6_S6_L004_R1_001.fastq.gz,,200624_A00834_0183_BHMTFYDRXX +sample lane group fastq_1 fastq_2 rundir +sample1 1 group1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir +sample2 1 group1 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir +sample3 2 group2 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir +sample4 2 group3 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir + ``` | Column | Description | @@ -34,11 +44,11 @@ TREATMENT_REP3,2,GROUP2,AEG588A6_S6_L004_R1_001.fastq.gz,,200624_A00834_0183_BHM | `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | | `lane` | Lane where the sample was processed on an Illumina instrument (optional). | | `group` | Group the sample belongs too, useful when several groups are pooled together (optional). | -| `rundir` | Path to the runfolder containing extra information about the sequencing run (optional) . | | `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | | `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz" (optional). | +| `rundir` | Path to the runfolder containing extra information about the sequencing run (optional) . | -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +Another [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. ## Running the pipeline From 84c9b3d95c693b722a3e61752ba498b05fc08e57 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 20 May 2024 12:41:50 +0200 Subject: [PATCH 035/172] naming fixes --- main.nf | 4 ++-- workflows/seqinspector.nf | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/main.nf b/main.nf index 9de4f94e..fb00bd5d 100644 --- a/main.nf +++ b/main.nf @@ -59,8 +59,8 @@ workflow NFCORE_SEQINSPECTOR { emit: global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html - lane_reports = SEQINSPECTOR.out.lane_reports // channel: /path/to/multiqc_report.html - group_report = SEQINSPECTOR.out.group_reports // channel: /path/to/multiqc_report.html + lane_reports = SEQINSPECTOR.out.lane_reports // channel: /path/to/multiqc_report.html + group_reports = SEQINSPECTOR.out.group_reports // channel: /path/to/multiqc_report.html rundir_report = SEQINSPECTOR.out.rundir_reports // channel: /path/to/multiqc_report.html } diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 0eb375dd..50fa481f 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -6,7 +6,7 @@ include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_LANE } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_GROUP } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_RUNDIR } from '../modules/nf-core/multiqc/main' @@ -84,7 +84,7 @@ workflow SEQINSPECTOR { ) ) - MULTIQC ( + MULTIQC_GLOBAL ( ch_multiqc_files .map { meta, file -> file } .mix(ch_multiqc_extra_files) @@ -97,9 +97,8 @@ workflow SEQINSPECTOR { // Generate reports by lane multiqc_extra_files_per_lane = ch_multiqc_files .filter { meta, sample -> meta.lane } - .map { meta, sample -> meta.lane } + .map { meta, sample -> [lane: meta.lane] } .unique() - .map { lane -> [lane:lane] } .combine(ch_multiqc_extra_files) lane_mqc_files = ch_multiqc_files @@ -209,7 +208,7 @@ workflow SEQINSPECTOR { ) emit: - global_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + global_report = MULTIQC_GLOBAL.out.report.toList() // channel: /path/to/multiqc_report.html lane_reports = MULTIQC_PER_LANE.out.report.toList() // channel: [ /path/to/multiqc_report.html ] group_reports = MULTIQC_PER_GROUP.out.report.toList() // channel: [ /path/to/multiqc_report.html ] rundir_reports = MULTIQC_PER_RUNDIR.out.report.toList() // channel: [ /path/to/multiqc_report.html ] From aaf17b68f90dcaa123a9bc0c7807c7bba0dc2fbd Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 20 May 2024 12:44:01 +0200 Subject: [PATCH 036/172] nf-core sync --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- .github/workflows/linting.yml | 19 ++++++++++--------- .github/workflows/linting_comment.yml | 2 +- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 6de151f7..5d27c53a 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -18,7 +18,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/seqi - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/seqinspector/tree/master/.github/CONTRIBUTING.md) - [ ] If necessary, also make a PR on the nf-core/seqinspector _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 1fcafe88..073e1876 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,12 +14,13 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - - name: Set up Python 3.12 - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + - name: Set up Python 3.11 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: - python-version: "3.12" + python-version: 3.11 + cache: "pip" - name: Install pre-commit run: pip install pre-commit @@ -31,14 +32,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v2 + uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: - python-version: "3.12" + python-version: "3.11" architecture: "x64" - name: Install dependencies @@ -59,7 +60,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 + uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 40acc23f..b706875f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 + uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 with: workflow: linting.yml workflow_conclusion: completed From e6dfea9ca1fbbb0019e438c6394d62daf1f2cda8 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 20 May 2024 12:44:54 +0200 Subject: [PATCH 037/172] nf-core fixes --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- .github/workflows/linting.yml | 19 +++++++++---------- .github/workflows/linting_comment.yml | 2 +- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 5d27c53a..6de151f7 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -18,7 +18,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/seqi - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/seqinspector/tree/master/.github/CONTRIBUTING.md) - [ ] If necessary, also make a PR on the nf-core/seqinspector _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 073e1876..1fcafe88 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,13 +14,12 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - name: Set up Python 3.11 - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: 3.11 - cache: "pip" + python-version: "3.12" - name: Install pre-commit run: pip install pre-commit @@ -32,14 +31,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.11" + python-version: "3.12" architecture: "x64" - name: Install dependencies @@ -60,7 +59,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index b706875f..40acc23f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 with: workflow: linting.yml workflow_conclusion: completed From 02affeb602655023888b330b0402bcbe2a0e613a Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Thu, 30 May 2024 13:18:36 +0200 Subject: [PATCH 038/172] Improve publishDir logic --- conf/modules.config | 56 +++++++++++++++++++++++++---- tests/MiSeq.main.nf.test.snap | 12 +++---- tests/NovaSeq6000.main.nf.test.snap | 18 ++++------ tests/PromethION.main.nf.test.snap | 10 ++---- 4 files changed, 64 insertions(+), 32 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index da3d2ca2..f7e78457 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -22,11 +22,20 @@ process { ext.args = '--quiet' } - withName: 'MULTIQC' { + withName: 'MULTIQC_GLOBAL' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'MULTIQC_PER_LANE' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc/lanes" }, + mode: params.publish_dir_mode, saveAs: { filename -> switch (filename) { @@ -38,27 +47,62 @@ process { def new_filename = filename.replaceFirst( "(?.*)\\[LANE:${lane}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') - "lanes/L${lane}/${new_filename}" + "L${lane}/${new_filename}" + break + default: + filename + } + } + ] + } + + withName: 'MULTIQC_PER_GROUP' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc/groups" }, + mode: params.publish_dir_mode, + saveAs: { + filename -> + switch (filename) { + case 'versions.yml': + null break case ~/\[GROUP:.+\]_multiqc_(report\.html|plots|data)/: def group = (filename =~ /\[GROUP:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] def new_filename = filename.replaceFirst( "(?.*)\\[GROUP:${group}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') - "groups/${group}/${new_filename}" + "${group}/${new_filename}" + break + default: + filename + } + } + ] + } + + withName: 'MULTIQC_PER_RUNDIR' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc/rundirss" }, + mode: params.publish_dir_mode, + saveAs: { + filename -> + switch (filename) { + case 'versions.yml': + null break case ~/\[RUNDIR:.+\]_multiqc_(report\.html|plots|data)/: def rundir = (filename =~ /\[RUNDIR:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] def new_filename = filename.replaceFirst( "(?.*)\\[RUNDIR:${rundir}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') - "rundirs/${rundir}/${new_filename}" + "${rundir}/${new_filename}" break default: filename - } + } } ] } - } diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap index 96896382..87c1a561 100644 --- a/tests/MiSeq.main.nf.test.snap +++ b/tests/MiSeq.main.nf.test.snap @@ -4,20 +4,16 @@ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921" + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-05-17T12:59:21.531493" + "timestamp": "2024-05-30T13:14:20.263485" } } \ No newline at end of file diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap index cbb75383..22600251 100644 --- a/tests/NovaSeq6000.main.nf.test.snap +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -4,32 +4,28 @@ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,9b4fd8a6d6e8a9acabecd592f633472e", "multiqc_general_stats.txt:md5,8237b88ceb018d3cb1edcea62d10f4a2", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,9246a5b6b7b0410c79049fc3dbd08e92", "multiqc_general_stats.txt:md5,44328403f423c6f5ac9ee0a8a01e6725", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,84820276fae52d4d492831280ae6207c", "multiqc_general_stats.txt:md5,dd07799e5e4b9d389f9de49a852c3363", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,59ae05d89453da6f57010ffb6466f902", "multiqc_general_stats.txt:md5,e4629691992bfe639c01a84b90563334", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921" + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-05-17T13:02:20.874181" + "timestamp": "2024-05-30T13:13:49.062282" } } \ No newline at end of file diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap index 951c5550..0ac213c1 100644 --- a/tests/PromethION.main.nf.test.snap +++ b/tests/PromethION.main.nf.test.snap @@ -4,16 +4,12 @@ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921", + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", - "multiqc_software_versions.txt:md5,b1e01403f9bdaa81ebabd388d5f9a921" + "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-05-17T12:58:02.572837" + "timestamp": "2024-05-30T13:14:40.99246" } } \ No newline at end of file From 8b83f13f95c26d7f336d53fc4f9571c14ab3b247 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 27 Aug 2024 13:17:02 +0200 Subject: [PATCH 039/172] Implement tagging system --- README.md | 4 +- assets/samplesheet.csv | 10 +- assets/schema_input.json | 16 +-- conf/modules.config | 64 +-------- docs/output.md | 45 +++--- docs/usage.md | 13 +- main.nf | 6 +- .../main.nf | 4 +- tests/MiSeq.main.nf.test | 18 +-- tests/MiSeq.main.nf.test.snap | 14 +- tests/NovaSeq6000.main.nf.test | 53 ++++--- tests/NovaSeq6000.main.nf.test.snap | 26 ++-- tests/PromethION.main.nf.test | 13 +- tests/PromethION.main.nf.test.snap | 10 +- workflows/seqinspector.nf | 131 ++++-------------- 15 files changed, 126 insertions(+), 301 deletions(-) diff --git a/README.md b/README.md index 7efdd3e9..31018e7b 100644 --- a/README.md +++ b/README.md @@ -44,8 +44,8 @@ First, prepare a samplesheet with your input data that looks as follows: `samplesheet.csv`: ```csv -sample,lane,group,fastq_1,fastq_2,rundir -CONTROL_REP1,1,GROUP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX +sample,fastq_1,fastq_2,rundir,tags +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX,"patient1" ``` Each row represents a fastq file (single-end) or a pair of fastq files (paired end). diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index fef3b4e6..00019e58 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,7 @@ -sample,lane,group,fastq_1,fastq_2,rundir -SAMPLE_PAIRED_END,1,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir -SAMPLE_SINGLE_END,2,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir +sample,fastq_1,fastq_2,rundir,tags +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir,"paired_sample,cohort1" +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A2_S2_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A2_S2_L002_R2_001.fastq.gz,/path/to/rundir,"paired_sample,cohort1" +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A3_S3_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A3_S3_L002_R2_001.fastq.gz,/path/to/rundir,"paired_sample,cohort2" +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,"patient1" +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,"patient2" +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,"patient3" diff --git a/assets/schema_input.json b/assets/schema_input.json index c9800d5d..62922b79 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -13,18 +13,6 @@ "errorMessage": "Sample name must be provided and cannot contain spaces", "meta": ["sample"] }, - "lane": { - "type": "integer", - "pattern": "^\\d+$", - "errorMessage": "Lane ID must be a number", - "meta": ["lane"] - }, - "group": { - "type": "string", - "pattern": "^\\S+$", - "errorMessage": "Group ID cannot contain spaces", - "meta": ["group"] - }, "fastq_1": { "type": "string", "format": "file-path", @@ -45,6 +33,10 @@ "exists": true, "errorMessage": "Run directory must be a path", "meta": ["rundir"] + }, + "tags": { + "type": "string", + "meta": ["tags"] } }, "required": ["sample", "fastq_1"], diff --git a/conf/modules.config b/conf/modules.config index f7e78457..c8838224 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -25,16 +25,16 @@ process { withName: 'MULTIQC_GLOBAL' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ - path: { "${params.outdir}/multiqc" }, + path: { "${params.outdir}/multiqc/global_report" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'MULTIQC_PER_LANE' { + withName: 'MULTIQC_PER_TAG' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ - path: { "${params.outdir}/multiqc/lanes" }, + path: { "${params.outdir}/multiqc/group_reports" }, mode: params.publish_dir_mode, saveAs: { filename -> @@ -42,62 +42,12 @@ process { case 'versions.yml': null break - case ~/\[LANE:\d+\]_multiqc_(report\.html|plots|data)/: - def lane = (filename =~ /\[LANE:(\d+)\]_multiqc_(report\.html|plots|data)/)[0][1] + case ~/\[TAG:.+\]_multiqc_(report\.html|plots|data)/: + def tag = (filename =~ /\[TAG:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] def new_filename = filename.replaceFirst( - "(?.*)\\[LANE:${lane}\\]_(?multiqc_(report\\.html|plots|data).*)", + "(?.*)\\[TAG:${tag}\\]_(?multiqc_(report\\.html|plots|data).*)", '${prefix}${suffix}') - "L${lane}/${new_filename}" - break - default: - filename - } - } - ] - } - - withName: 'MULTIQC_PER_GROUP' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } - publishDir = [ - path: { "${params.outdir}/multiqc/groups" }, - mode: params.publish_dir_mode, - saveAs: { - filename -> - switch (filename) { - case 'versions.yml': - null - break - case ~/\[GROUP:.+\]_multiqc_(report\.html|plots|data)/: - def group = (filename =~ /\[GROUP:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] - def new_filename = filename.replaceFirst( - "(?.*)\\[GROUP:${group}\\]_(?multiqc_(report\\.html|plots|data).*)", - '${prefix}${suffix}') - "${group}/${new_filename}" - break - default: - filename - } - } - ] - } - - withName: 'MULTIQC_PER_RUNDIR' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } - publishDir = [ - path: { "${params.outdir}/multiqc/rundirss" }, - mode: params.publish_dir_mode, - saveAs: { - filename -> - switch (filename) { - case 'versions.yml': - null - break - case ~/\[RUNDIR:.+\]_multiqc_(report\.html|plots|data)/: - def rundir = (filename =~ /\[RUNDIR:(.+)\]_multiqc_(report\.html|plots|data)/)[0][1] - def new_filename = filename.replaceFirst( - "(?.*)\\[RUNDIR:${rundir}\\]_(?multiqc_(report\\.html|plots|data).*)", - '${prefix}${suffix}') - "${rundir}/${new_filename}" + "${tag}/${new_filename}" break default: filename diff --git a/docs/output.md b/docs/output.md index 7af7806c..15c29ce2 100644 --- a/docs/output.md +++ b/docs/output.md @@ -39,36 +39,29 @@ The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They m ### MultiQC +nf-core/seqinspector will generate the following MultiQC reports: + +- one global reports including all the samples listed in the samplesheet +- one group report per unique tag. These reports compile samples that share the same tag. +
Output files - `multiqc/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. - - `lanes/` [1] - - `L1/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. - - `L2/` - - ... - - `groups/` [1] - - `GROUPNAME1/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. - - `GROUPNAME2/` - - ... - - `rundir/` [1] - - `RUNDIR1/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. - - `RUNDIR2/` - - ... - -[1] These files will only be generated if `lane`, `group` or `rundir` were specified for some samples. + - `global_report` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + - `group_reports` + - `tag1/` + - `multiqc_report.html` + - `multiqc_data/` + - `multiqc_plots/` + - `tag2/` + - `multiqc_report.html` + - `multiqc_data/` + - `multiqc_plots/` + - ...
diff --git a/docs/usage.md b/docs/usage.md index 7a4ec735..42d596bb 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -31,22 +31,21 @@ run_dir ...would be represented in the following samplesheet (shown as .tsv for readability) ```csv title="samplesheet.csv" -sample lane group fastq_1 fastq_2 rundir -sample1 1 group1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir -sample2 1 group1 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir -sample3 2 group2 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir -sample4 2 group3 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir +sample fastq_1 fastq_2 rundir tags +sample1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir "cohort1,patient1" +sample2 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir "cohort1,patient2" +sample3 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir "cohort1,patient3" +sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir "control" ``` | Column | Description | | --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `lane` | Lane where the sample was processed on an Illumina instrument (optional). | -| `group` | Group the sample belongs too, useful when several groups are pooled together (optional). | | `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | | `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz" (optional). | | `rundir` | Path to the runfolder containing extra information about the sequencing run (optional) . | +| `tags` | Comma-separated list of tags to group samples in special reports. | Another [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. diff --git a/main.nf b/main.nf index fb00bd5d..a55408ef 100644 --- a/main.nf +++ b/main.nf @@ -58,10 +58,8 @@ workflow NFCORE_SEQINSPECTOR { ) emit: - global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html - lane_reports = SEQINSPECTOR.out.lane_reports // channel: /path/to/multiqc_report.html - group_reports = SEQINSPECTOR.out.group_reports // channel: /path/to/multiqc_report.html - rundir_report = SEQINSPECTOR.out.rundir_reports // channel: /path/to/multiqc_report.html + global_report = SEQINSPECTOR.out.global_report // channel: /path/to/multiqc_report.html + grouped_reports = SEQINSPECTOR.out.grouped_reports // channel: /path/to/multiqc_report.html } /* diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index ea9c9b8a..afcfc68b 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -84,8 +84,8 @@ workflow PIPELINE_INITIALISATION { .fromSamplesheet("input") // Validates samplesheet against $projectDir/assets/schema_input.json. Path to validation schema is defined by $projectDir/nextflow_schema.json .map { meta, fastq_1, fastq_2 -> - def id_string = "${meta.sample}_${meta.group ?: "ungrouped"}_${meta.lane}" - def updated_meta = meta + [ id: id_string ] + def tags = meta.tags ? meta.tags.tokenize(",") : [] + def updated_meta = meta + [ id:meta.sample, tags:tags ] if (!fastq_2) { return [ updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] } else { diff --git a/tests/MiSeq.main.nf.test b/tests/MiSeq.main.nf.test index bfabd954..8fbff4a3 100644 --- a/tests/MiSeq.main.nf.test +++ b/tests/MiSeq.main.nf.test @@ -18,20 +18,10 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot( - path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/groups/P001/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_software_versions.txt"), ).match() } ) diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap index 87c1a561..26f0c2a8 100644 --- a/tests/MiSeq.main.nf.test.snap +++ b/tests/MiSeq.main.nf.test.snap @@ -2,18 +2,10 @@ "MiSeq data test": { "content": [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", - "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", - "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,692b8aed0614ed1655f2c1cbea1ba312", - "multiqc_general_stats.txt:md5,630167d67d3f92408cd1a04422c7196f", + "multiqc_fastqc.txt:md5,e46e7baa8f57d4cf54d973925b5eadf9", + "multiqc_general_stats.txt:md5,a5e626a2e1a3c986092e4f89091cc41c", "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" ], - "timestamp": "2024-05-30T13:14:20.263485" + "timestamp": "2024-08-26T17:55:16.152573" } } \ No newline at end of file diff --git a/tests/NovaSeq6000.main.nf.test b/tests/NovaSeq6000.main.nf.test index 174e215d..f93891bf 100644 --- a/tests/NovaSeq6000.main.nf.test +++ b/tests/NovaSeq6000.main.nf.test @@ -18,35 +18,30 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot( - path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/lanes/L1/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/groups/S1/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/groups/S2/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/groups/S3/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/groups/S4/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/cohort1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/cohort1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/cohort1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/cohort1/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/patient1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/patient1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/patient1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/patient1/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/patient2/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/patient2/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/patient2/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/patient2/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_software_versions.txt"), ).match() }, ) diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap index 22600251..f5c4776b 100644 --- a/tests/NovaSeq6000.main.nf.test.snap +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -2,30 +2,26 @@ "NovaSeq6000 data test": { "content": [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", - "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", + "multiqc_fastqc.txt:md5,e8ed6dca928396b8873d24e60ea1a133", + "multiqc_general_stats.txt:md5,fd9d46c5b441908cd07e5373d116db17", "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,9b4fd8a6d6e8a9acabecd592f633472e", - "multiqc_general_stats.txt:md5,8237b88ceb018d3cb1edcea62d10f4a2", + "multiqc_fastqc.txt:md5,ff9b31c6024f11a8135456e7ea01fc8f", + "multiqc_general_stats.txt:md5,f36bd6e27e92c25be076efea411d3a8e", "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,9246a5b6b7b0410c79049fc3dbd08e92", - "multiqc_general_stats.txt:md5,44328403f423c6f5ac9ee0a8a01e6725", + "multiqc_fastqc.txt:md5,62d51280dcd7634f6bed95ffe0d8dab8", + "multiqc_general_stats.txt:md5,2012002b6a057be981a97fcc96142a6c", "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,84820276fae52d4d492831280ae6207c", - "multiqc_general_stats.txt:md5,dd07799e5e4b9d389f9de49a852c3363", + "multiqc_fastqc.txt:md5,63749e803a2d5fc7ecc7cd93fa68df1f", + "multiqc_general_stats.txt:md5,656931993032400dea3d441b8b61b4d2", "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,59ae05d89453da6f57010ffb6466f902", - "multiqc_general_stats.txt:md5,e4629691992bfe639c01a84b90563334", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,5ba3f4f4ec6026a5f5d55418384dcd3e", - "multiqc_general_stats.txt:md5,123cd6b64c9c15607405bcdd45a843d4", + "multiqc_fastqc.txt:md5,91cc62e1b4059bdbe4b88affa43378af", + "multiqc_general_stats.txt:md5,6e500f82550e00b07c3e7aa1d46ab9e9", "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" ], - "timestamp": "2024-05-30T13:13:49.062282" + "timestamp": "2024-08-26T18:03:33.089142" } } \ No newline at end of file diff --git a/tests/PromethION.main.nf.test b/tests/PromethION.main.nf.test index 39284786..8fec8b33 100644 --- a/tests/PromethION.main.nf.test +++ b/tests/PromethION.main.nf.test @@ -18,15 +18,10 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot( - path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/groups/r10p41_e8p2_human_runs_jkw/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_software_versions.txt"), ).match() }, ) diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap index 0ac213c1..e3b34f23 100644 --- a/tests/PromethION.main.nf.test.snap +++ b/tests/PromethION.main.nf.test.snap @@ -2,14 +2,10 @@ "PromethION data test": { "content": [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", - "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,333360ff12007d64f2bf7673b0658bed", - "multiqc_general_stats.txt:md5,b1999255f9a502618d59be2f2e93bad2", + "multiqc_fastqc.txt:md5,cecee3cb343c75c80180d3169c6f3ea1", + "multiqc_general_stats.txt:md5,e63c25089c4fc10618414ba2254d18c7", "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" ], - "timestamp": "2024-05-30T13:14:40.99246" + "timestamp": "2024-08-26T17:55:38.755385" } } \ No newline at end of file diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 50fa481f..e37df509 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -7,9 +7,7 @@ include { FASTQC } from '../modules/nf-core/fastqc/main' include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main' -include { MULTIQC as MULTIQC_PER_LANE } from '../modules/nf-core/multiqc/main' -include { MULTIQC as MULTIQC_PER_GROUP } from '../modules/nf-core/multiqc/main' -include { MULTIQC as MULTIQC_PER_RUNDIR } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_PER_TAG } from '../modules/nf-core/multiqc/main' include { paramsSummaryMap } from 'plugin/nf-validation' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' @@ -94,124 +92,51 @@ workflow SEQINSPECTOR { ch_multiqc_logo.toList() ) - // Generate reports by lane - multiqc_extra_files_per_lane = ch_multiqc_files - .filter { meta, sample -> meta.lane } - .map { meta, sample -> [lane: meta.lane] } + ch_tags = ch_multiqc_files + .map { meta, sample -> meta.tags } + .flatten() .unique() - .combine(ch_multiqc_extra_files) - lane_mqc_files = ch_multiqc_files - .filter { meta, sample -> meta.lane } - .mix(multiqc_extra_files_per_lane) - .map { meta, sample -> [ "[LANE:${meta.lane}]", meta, sample ] } - .groupTuple() - .tap { mqc_by_lane } - .collectFile{ - lane, meta, samples -> [ - "${lane}_multiqc_extra_config.yml", - """ - |output_fn_name: \"${lane}_multiqc_report.html\" - |data_dir_name: \"${lane}_multiqc_data\" - |plots_dir_name: \"${lane}_multiqc_plots\" - """.stripMargin() - ] - } - .map { file -> [ (file =~ /(\[LANE:.+\])/)[0][1], file ] } - .join(mqc_by_lane) - .multiMap { lane, config, meta , samples_per_lane -> - samples_per_lane: samples_per_lane - config: config - } - - MULTIQC_PER_LANE( - lane_mqc_files.samples_per_lane, - ch_multiqc_config.toList(), - lane_mqc_files.config, - ch_multiqc_logo.toList() - ) - - // Generate reports by group - multiqc_extra_files_per_group = ch_multiqc_files - .filter { meta, sample -> meta.group } - .map { meta, sample -> meta.group } - .unique() - .map { group -> [group:group] } - .combine(ch_multiqc_extra_files) - - group_mqc_files = ch_multiqc_files - .filter { meta, sample -> meta.group } - .mix(multiqc_extra_files_per_group) - .map { meta, sample -> [ "[GROUP:${meta.group}]", meta, sample ] } - .groupTuple() - .tap { mqc_by_group } - .collectFile{ - group, meta, samples -> [ - "${group}_multiqc_extra_config.yml", - """ - |output_fn_name: \"${group}_multiqc_report.html\" - |data_dir_name: \"${group}_multiqc_data\" - |plots_dir_name: \"${group}_multiqc_plots\" - """.stripMargin() - ] - } - .map { file -> [ (file =~ /(\[GROUP:.+\])/)[0][1], file ] } - .join(mqc_by_group) - .multiMap { group, config, meta , samples_per_group -> - samples_per_group: samples_per_group - config: config - } - - MULTIQC_PER_GROUP( - group_mqc_files.samples_per_group, - ch_multiqc_config.toList(), - group_mqc_files.config, - ch_multiqc_logo.toList() - ) - - // Generate reports by rundir - multiqc_extra_files_per_rundir = ch_multiqc_files - .filter { meta, sample -> meta.rundir } - .map { meta, sample -> meta.rundir } - .unique() - .map { rundir -> [rundir:rundir] } + multiqc_extra_files_per_tag = ch_tags .combine(ch_multiqc_extra_files) - rundir_mqc_files = ch_multiqc_files - .filter { meta, sample -> meta.rundir } - .mix(multiqc_extra_files_per_rundir) - .map { meta, sample -> [ "[RUNDIR:${meta.rundir.name}]", meta, sample ] } + // Group samples by tag + tagged_mqc_files = ch_tags + .combine(ch_multiqc_files) + .filter { sample_tag, meta, sample -> sample_tag in meta.tags } + .map { sample_tag, meta, sample -> [sample_tag, sample] } + .mix(multiqc_extra_files_per_tag) .groupTuple() - .tap { mqc_by_rundir } - .collectFile{ - rundir, meta, samples -> [ - "${rundir}_multiqc_extra_config.yml", + .tap { mqc_by_tag } + .collectFile { + sample_tag, samples -> + def prefix_tag = "[TAG:${sample_tag}]" + [ + "${prefix_tag}_multiqc_extra_config.yml", """ - |output_fn_name: \"${rundir}_multiqc_report.html\" - |data_dir_name: \"${rundir}_multiqc_data\" - |plots_dir_name: \"${rundir}_multiqc_plots\" + |output_fn_name: \"${prefix_tag}_multiqc_report.html\" + |data_dir_name: \"${prefix_tag}_multiqc_data\" + |plots_dir_name: \"${prefix_tag}_multiqc_plots\" """.stripMargin() ] } - .map { file -> [ (file =~ /(\[RUNDIR:.+\])/)[0][1], file ] } - .join(mqc_by_rundir) - .multiMap { rundir, config, meta , samples_per_rundir -> - samples_per_rundir: samples_per_rundir + .map { file -> [ (file =~ /\[TAG:(.+)\]/)[0][1], file ] } + .join(mqc_by_tag) + .multiMap { sample_tag, config, samples -> + samples_per_tag: samples config: config } - MULTIQC_PER_RUNDIR( - rundir_mqc_files.samples_per_rundir, + MULTIQC_PER_TAG( + tagged_mqc_files.samples_per_tag, ch_multiqc_config.toList(), - rundir_mqc_files.config, + tagged_mqc_files.config, ch_multiqc_logo.toList() ) emit: global_report = MULTIQC_GLOBAL.out.report.toList() // channel: /path/to/multiqc_report.html - lane_reports = MULTIQC_PER_LANE.out.report.toList() // channel: [ /path/to/multiqc_report.html ] - group_reports = MULTIQC_PER_GROUP.out.report.toList() // channel: [ /path/to/multiqc_report.html ] - rundir_reports = MULTIQC_PER_RUNDIR.out.report.toList() // channel: [ /path/to/multiqc_report.html ] + grouped_reports = MULTIQC_PER_TAG.out.report.toList() // channel: [ /path/to/multiqc_report.html ] versions = ch_versions // channel: [ path(versions.yml) ] } From dabccc2e556be1860eb990d1914208a8e6901cef Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 27 Aug 2024 15:08:20 +0200 Subject: [PATCH 040/172] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6430addb..3aa5bacc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c ### `Added` +- [#20](https://github.com/nf-core/seqinspector/pull/20) Use tags to generate group reports - [#13](https://github.com/nf-core/seqinspector/pull/13) Generate reports per run, per project and per lane. ### `Fixed` From 8bba88869a4f67143639c61c3972599192140db8 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar <89784800+kedhammar@users.noreply.github.com> Date: Tue, 27 Aug 2024 15:52:46 +0200 Subject: [PATCH 041/172] Try adding regex assertion and error for samplesheet tags --- assets/schema_input.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/assets/schema_input.json b/assets/schema_input.json index 62922b79..bde27fa7 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -36,6 +36,8 @@ }, "tags": { "type": "string", + "pattern": "^\\\"([A-Za-z0-9_-]+,\\ ?)*([A-Za-z0-9_-]+)\\\"$", + "errorMessage": "Tags must consist of numbers, letters, underscores or dashes and must be provided as a comma-separated list flanked by a pair of double-quotes, e.g. \"patient_01, lane1, pos-CTRL_2\"." "meta": ["tags"] } }, From 6987e93e0e10b39917d56600410db8a61a8f6ec9 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar <89784800+kedhammar@users.noreply.github.com> Date: Tue, 27 Aug 2024 15:55:26 +0200 Subject: [PATCH 042/172] Add missing comma --- assets/schema_input.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index bde27fa7..d4a6dc70 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -37,7 +37,7 @@ "tags": { "type": "string", "pattern": "^\\\"([A-Za-z0-9_-]+,\\ ?)*([A-Za-z0-9_-]+)\\\"$", - "errorMessage": "Tags must consist of numbers, letters, underscores or dashes and must be provided as a comma-separated list flanked by a pair of double-quotes, e.g. \"patient_01, lane1, pos-CTRL_2\"." + "errorMessage": "Tags must consist of numbers, letters, underscores or dashes and must be provided as a comma-separated list flanked by a pair of double-quotes, e.g. \"patient_01, lane1, pos-CTRL_2\".", "meta": ["tags"] } }, From 363e782a7a68d2364fd73176eb50d24b72c1c02a Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 3 Sep 2024 10:35:51 +0200 Subject: [PATCH 043/172] Change example tags to more neutral ones --- README.md | 2 +- docs/usage.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 31018e7b..679b6f1c 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ First, prepare a samplesheet with your input data that looks as follows: ```csv sample,fastq_1,fastq_2,rundir,tags -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX,"patient1" +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX,"group1" ``` Each row represents a fastq file (single-end) or a pair of fastq files (paired end). diff --git a/docs/usage.md b/docs/usage.md index 42d596bb..621c7c0a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -32,9 +32,9 @@ run_dir ```csv title="samplesheet.csv" sample fastq_1 fastq_2 rundir tags -sample1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir "cohort1,patient1" -sample2 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir "cohort1,patient2" -sample3 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir "cohort1,patient3" +sample1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir "project1,group1" +sample2 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir "project1,group1" +sample3 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir "project1,group2" sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir "control" ``` From c1a21de429b24fde2f0e5ad8f313f7a7d1b468ba Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 3 Sep 2024 12:36:42 +0200 Subject: [PATCH 044/172] Check for tag collisions --- .../utils_nfcore_seqinspector_pipeline/main.nf | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index afcfc68b..bb875af6 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -103,6 +103,20 @@ workflow PIPELINE_INITIALISATION { // } .set { ch_samplesheet } + ch_samplesheet + .map { + meta, fastqs -> meta.tags + } + .flatten() + .unique() + .map { tag_name -> [tag_name.toLowerCase(), tag_name] } + .groupTuple() + .map { + tag_lowercase, tags -> + assert tags.size() == 1 : + "Tag name collision: " + tags.join(", ") + } + emit: samplesheet = ch_samplesheet versions = ch_versions From 8a43efa8ae3f78dd846f440b42934e50bba0ab91 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 3 Sep 2024 12:58:50 +0200 Subject: [PATCH 045/172] Use columns to separate tags --- README.md | 2 +- assets/samplesheet.csv | 12 ++++---- assets/schema_input.json | 4 +-- docs/usage.md | 10 +++---- .../main.nf | 2 +- tests/NovaSeq6000.main.nf.test | 28 +++++++++---------- 6 files changed, 29 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 679b6f1c..d7cd233f 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ First, prepare a samplesheet with your input data that looks as follows: ```csv sample,fastq_1,fastq_2,rundir,tags -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX,"group1" +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX,group1 ``` Each row represents a fastq file (single-end) or a pair of fastq files (paired end). diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 00019e58..ba2542dd 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,7 +1,7 @@ sample,fastq_1,fastq_2,rundir,tags -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir,"paired_sample,cohort1" -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A2_S2_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A2_S2_L002_R2_001.fastq.gz,/path/to/rundir,"paired_sample,cohort1" -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A3_S3_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A3_S3_L002_R2_001.fastq.gz,/path/to/rundir,"paired_sample,cohort2" -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,"patient1" -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,"patient2" -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,"patient3" +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane1 +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A2_S2_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A2_S2_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane1 +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A3_S3_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A3_S3_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane2 +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group1 +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group2 +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group3 diff --git a/assets/schema_input.json b/assets/schema_input.json index d4a6dc70..3569ae2f 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -36,8 +36,8 @@ }, "tags": { "type": "string", - "pattern": "^\\\"([A-Za-z0-9_-]+,\\ ?)*([A-Za-z0-9_-]+)\\\"$", - "errorMessage": "Tags must consist of numbers, letters, underscores or dashes and must be provided as a comma-separated list flanked by a pair of double-quotes, e.g. \"patient_01, lane1, pos-CTRL_2\".", + "pattern": "^([A-Za-z0-9_-]+:)*([A-Za-z0-9_-]+)$", + "errorMessage": "Tags must consist of numbers, letters, underscores or dashes and must be provided as a column-separated list, e.g. group_01:lane1:pos-CTRL_2.", "meta": ["tags"] } }, diff --git a/docs/usage.md b/docs/usage.md index 621c7c0a..38a039c4 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -32,10 +32,10 @@ run_dir ```csv title="samplesheet.csv" sample fastq_1 fastq_2 rundir tags -sample1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir "project1,group1" -sample2 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir "project1,group1" -sample3 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir "project1,group2" -sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir "control" +sample1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir project1:group1 +sample2 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir project1:group1 +sample3 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir project1:group2 +sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir control ``` @@ -45,7 +45,7 @@ sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir "c | `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | | `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz" (optional). | | `rundir` | Path to the runfolder containing extra information about the sequencing run (optional) . | -| `tags` | Comma-separated list of tags to group samples in special reports. | +| `tags` | Column-separated list of tags to group samples in special reports. | Another [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index bb875af6..eec4d09c 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -84,7 +84,7 @@ workflow PIPELINE_INITIALISATION { .fromSamplesheet("input") // Validates samplesheet against $projectDir/assets/schema_input.json. Path to validation schema is defined by $projectDir/nextflow_schema.json .map { meta, fastq_1, fastq_2 -> - def tags = meta.tags ? meta.tags.tokenize(",") : [] + def tags = meta.tags ? meta.tags.tokenize(":") : [] def updated_meta = meta + [ id:meta.sample, tags:tags ] if (!fastq_2) { return [ updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] diff --git a/tests/NovaSeq6000.main.nf.test b/tests/NovaSeq6000.main.nf.test index f93891bf..d050399e 100644 --- a/tests/NovaSeq6000.main.nf.test +++ b/tests/NovaSeq6000.main.nf.test @@ -23,20 +23,20 @@ nextflow_pipeline { path("$outputDir/multiqc/global_report/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/global_report/multiqc_data/multiqc_software_versions.txt"), - path("$outputDir/multiqc/group_reports/cohort1/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/group_reports/cohort1/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/group_reports/cohort1/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/group_reports/cohort1/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/group_reports/patient1/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/group_reports/patient1/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/group_reports/patient1/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/group_reports/patient1/multiqc_data/multiqc_software_versions.txt"), - - path("$outputDir/multiqc/group_reports/patient2/multiqc_data/multiqc_citations.txt"), - path("$outputDir/multiqc/group_reports/patient2/multiqc_data/multiqc_fastqc.txt"), - path("$outputDir/multiqc/group_reports/patient2/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/group_reports/patient2/multiqc_data/multiqc_software_versions.txt"), + path("$outputDir/multiqc/group_reports/lane1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/lane1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/lane1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/lane1/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/group1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/group1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/group1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/group1/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/group2/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/group2/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/group2/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/group2/multiqc_data/multiqc_software_versions.txt"), path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_citations.txt"), path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_fastqc.txt"), From 8779909fe25a35950a999847d0b052b1f188de9c Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 3 Sep 2024 13:55:23 +0200 Subject: [PATCH 046/172] switch to colon-separated tags --- README.md | 2 +- assets/samplesheet.csv | 12 ++++++------ assets/schema_input.json | 4 ++-- docs/usage.md | 12 ++++++------ .../local/utils_nfcore_seqinspector_pipeline/main.nf | 2 +- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 31018e7b..acba2cec 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ First, prepare a samplesheet with your input data that looks as follows: ```csv sample,fastq_1,fastq_2,rundir,tags -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX,"patient1" +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX,lane1:project5:group2 ``` Each row represents a fastq file (single-end) or a pair of fastq files (paired end). diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 00019e58..de5f1a68 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,7 +1,7 @@ sample,fastq_1,fastq_2,rundir,tags -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir,"paired_sample,cohort1" -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A2_S2_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A2_S2_L002_R2_001.fastq.gz,/path/to/rundir,"paired_sample,cohort1" -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A3_S3_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A3_S3_L002_R2_001.fastq.gz,/path/to/rundir,"paired_sample,cohort2" -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,"patient1" -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,"patient2" -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,"patient3" +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:cohort1 +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A2_S2_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A2_S2_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:cohort1 +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A3_S3_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A3_S3_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:cohort2 +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,patient1 +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,patient2 +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,patient3 diff --git a/assets/schema_input.json b/assets/schema_input.json index d4a6dc70..18941557 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -36,8 +36,8 @@ }, "tags": { "type": "string", - "pattern": "^\\\"([A-Za-z0-9_-]+,\\ ?)*([A-Za-z0-9_-]+)\\\"$", - "errorMessage": "Tags must consist of numbers, letters, underscores or dashes and must be provided as a comma-separated list flanked by a pair of double-quotes, e.g. \"patient_01, lane1, pos-CTRL_2\".", + "pattern": "^([a-z0-9_-]+:)*([a-z0-9_-]+)$", + "errorMessage": "Tags must be separated by colons and only consist of lowercase letters, numbers, underscores and hyphens.", "meta": ["tags"] } }, diff --git a/docs/usage.md b/docs/usage.md index 42d596bb..e2da41de 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -32,10 +32,10 @@ run_dir ```csv title="samplesheet.csv" sample fastq_1 fastq_2 rundir tags -sample1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir "cohort1,patient1" -sample2 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir "cohort1,patient2" -sample3 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir "cohort1,patient3" -sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir "control" +sample1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir cohort1:patient1 +sample2 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir cohort1:patient2 +sample3 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir cohort1:patient3 +sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir control ``` @@ -44,8 +44,8 @@ sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir "c | `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | | `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | | `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz" (optional). | -| `rundir` | Path to the runfolder containing extra information about the sequencing run (optional) . | -| `tags` | Comma-separated list of tags to group samples in special reports. | +| `rundir` | Path to the runfolder containing extra information about the sequencing run (optional). | +| `tags` | Colon-separated list of tags to group samples in special reports. | Another [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index afcfc68b..0ebfdd0a 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -84,7 +84,7 @@ workflow PIPELINE_INITIALISATION { .fromSamplesheet("input") // Validates samplesheet against $projectDir/assets/schema_input.json. Path to validation schema is defined by $projectDir/nextflow_schema.json .map { meta, fastq_1, fastq_2 -> - def tags = meta.tags ? meta.tags.tokenize(",") : [] + def tags = meta.tags ? meta.tags.tokenize(":") : [] def updated_meta = meta + [ id:meta.sample, tags:tags ] if (!fastq_2) { return [ updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] From f496b85d07540807a452a094fb9cbc52013cc9f1 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 3 Sep 2024 13:55:30 +0200 Subject: [PATCH 047/172] use testdata with tags --- conf/test.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index 658e75f3..c458ad0f 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,7 +22,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'seqinspector/testdata/MiSeq/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/NovaSeq6000/samplesheet.csv' // Genome references genome = 'R64-1-1' From 38e39a9828646f984e7902bd119b802e288624c9 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 3 Sep 2024 14:10:16 +0200 Subject: [PATCH 048/172] Make ids unique --- .../local/utils_nfcore_seqinspector_pipeline/main.nf | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index eec4d09c..632acb38 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -87,9 +87,17 @@ workflow PIPELINE_INITIALISATION { def tags = meta.tags ? meta.tags.tokenize(":") : [] def updated_meta = meta + [ id:meta.sample, tags:tags ] if (!fastq_2) { - return [ updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] + return [ + updated_meta.id + fastq_1.toString().replaceAll('/', '_'), + updated_meta + [ single_end:true ], + [ fastq_1 ] + ] } else { - return [ updated_meta.id, updated_meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + return [ + updated_meta.id + fastq_1.toString().replaceAll('/', '_') + '_' + fastq_2.toString().replaceAll('/', '_'), + updated_meta + [ single_end:false ], + [ fastq_1, fastq_2 ] + ] } } .groupTuple() From c96a41c0c6f5c4d3b7f8ff0d71242fd876b04325 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Tue, 3 Sep 2024 12:26:11 +0000 Subject: [PATCH 049/172] bump multiqc to prevent gitpod crashing, populate the new process inputs w. empty channels --- modules.json | 2 +- modules/nf-core/multiqc/environment.yml | 2 +- modules/nf-core/multiqc/main.nf | 14 ++++++++++--- modules/nf-core/multiqc/meta.yml | 13 ++++++++++++ modules/nf-core/multiqc/tests/main.nf.test | 8 ++++++++ .../nf-core/multiqc/tests/main.nf.test.snap | 20 +++++++++---------- modules/nf-core/multiqc/tests/nextflow.config | 5 +++++ workflows/seqinspector.nf | 8 ++++++-- 8 files changed, 55 insertions(+), 17 deletions(-) create mode 100644 modules/nf-core/multiqc/tests/nextflow.config diff --git a/modules.json b/modules.json index 87fe816c..70f3486c 100644 --- a/modules.json +++ b/modules.json @@ -12,7 +12,7 @@ }, "multiqc": { "branch": "master", - "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", + "git_sha": "19ca321db5d8bd48923262c2eca6422359633491", "installed_by": ["modules"] } } diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index ca39fb67..a31464c9 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::multiqc=1.21 + - bioconda::multiqc=1.24.1 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 47ac352f..ceaec139 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,14 +3,16 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : - 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.24.1--pyhdfd78af_0' : + 'biocontainers/multiqc:1.24.1--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" path(multiqc_config) path(extra_multiqc_config) path(multiqc_logo) + path(replace_names) + path(sample_names) output: path "*multiqc_report.html", emit: report @@ -23,16 +25,22 @@ process MULTIQC { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' - def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' + def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' + def replace = replace_names ? "--replace-names ${replace_names}" : '' + def samples = sample_names ? "--sample-names ${sample_names}" : '' """ multiqc \\ --force \\ $args \\ $config \\ + $prefix \\ $extra_config \\ $logo \\ + $replace \\ + $samples \\ . cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index 45a9bc35..382c08cb 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -29,6 +29,19 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" + - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" output: - report: type: file diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index f1c4242e..33316a7d 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -8,6 +8,8 @@ nextflow_process { tag "modules_nfcore" tag "multiqc" + config "./nextflow.config" + test("sarscov2 single-end [fastqc]") { when { @@ -17,6 +19,8 @@ nextflow_process { input[1] = [] input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } @@ -41,6 +45,8 @@ nextflow_process { input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } @@ -66,6 +72,8 @@ nextflow_process { input[1] = [] input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index bfebd802..83fa080c 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,6eb13f3b11bbcbfc98ad3166420ff760" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-29T08:48:55.657331" + "timestamp": "2024-07-10T12:41:34.562023" }, "multiqc_stub": { "content": [ @@ -17,25 +17,25 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,6eb13f3b11bbcbfc98ad3166420ff760" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-29T08:49:49.071937" + "timestamp": "2024-07-10T11:27:11.933869532" }, "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,6eb13f3b11bbcbfc98ad3166420ff760" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-29T08:49:25.457567" + "timestamp": "2024-07-10T11:26:56.709849369" } -} \ No newline at end of file +} diff --git a/modules/nf-core/multiqc/tests/nextflow.config b/modules/nf-core/multiqc/tests/nextflow.config new file mode 100644 index 00000000..c537a6a3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MULTIQC' { + ext.prefix = null + } +} diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index e37df509..1ba00c62 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -89,7 +89,9 @@ workflow SEQINSPECTOR { .collect(), ch_multiqc_config.toList(), Channel.empty().toList(), - ch_multiqc_logo.toList() + ch_multiqc_logo.toList(), + Channel.empty().toList(), + Channel.empty().toList() ) ch_tags = ch_multiqc_files @@ -131,7 +133,9 @@ workflow SEQINSPECTOR { tagged_mqc_files.samples_per_tag, ch_multiqc_config.toList(), tagged_mqc_files.config, - ch_multiqc_logo.toList() + ch_multiqc_logo.toList(), + Channel.empty().toList(), + Channel.empty().toList() ) emit: From 2e269ecc236a66866f8413a1129656ed9c1d816f Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 3 Sep 2024 16:36:59 +0200 Subject: [PATCH 050/172] Allow for uppercase letters --- assets/schema_input.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 18941557..f2440839 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -36,7 +36,7 @@ }, "tags": { "type": "string", - "pattern": "^([a-z0-9_-]+:)*([a-z0-9_-]+)$", + "pattern": "^([A-Za-z0-9_-]+:)*([A-Za-z0-9_-]+)$", "errorMessage": "Tags must be separated by colons and only consist of lowercase letters, numbers, underscores and hyphens.", "meta": ["tags"] } From c97a597e8165081565d5acb1afdfd61e83f6309c Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 10 Sep 2024 08:54:03 +0200 Subject: [PATCH 051/172] Use samplesheet row in id Co-authored-by: Matthias Zepper --- .../local/utils_nfcore_seqinspector_pipeline/main.nf | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index 632acb38..3c63037b 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -82,19 +82,21 @@ workflow PIPELINE_INITIALISATION { // Channel .fromSamplesheet("input") // Validates samplesheet against $projectDir/assets/schema_input.json. Path to validation schema is defined by $projectDir/nextflow_schema.json + .toList() + .flatMap { it.withIndex().collect { entry, idx -> entry + "${idx+1}" } } .map { - meta, fastq_1, fastq_2 -> + meta, fastq_1, fastq_2, idx -> def tags = meta.tags ? meta.tags.tokenize(":") : [] - def updated_meta = meta + [ id:meta.sample, tags:tags ] + def updated_meta = meta + [ id:"${meta.sample}_${idx}", tags:tags ] if (!fastq_2) { return [ - updated_meta.id + fastq_1.toString().replaceAll('/', '_'), + updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] } else { return [ - updated_meta.id + fastq_1.toString().replaceAll('/', '_') + '_' + fastq_2.toString().replaceAll('/', '_'), + updated_meta.id, updated_meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] From 1f33928fdc2f9350fbb4ea86e253f5f7cc9b6670 Mon Sep 17 00:00:00 2001 From: Adrien Coulier Date: Tue, 10 Sep 2024 09:03:50 +0200 Subject: [PATCH 052/172] Update snapshots --- tests/MiSeq.main.nf.test.snap | 12 ++++++---- tests/NovaSeq6000.main.nf.test.snap | 36 ++++++++++++++++------------- tests/PromethION.main.nf.test.snap | 12 ++++++---- 3 files changed, 36 insertions(+), 24 deletions(-) diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap index 26f0c2a8..9dacaf8a 100644 --- a/tests/MiSeq.main.nf.test.snap +++ b/tests/MiSeq.main.nf.test.snap @@ -2,10 +2,14 @@ "MiSeq data test": { "content": [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,e46e7baa8f57d4cf54d973925b5eadf9", - "multiqc_general_stats.txt:md5,a5e626a2e1a3c986092e4f89091cc41c", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + "multiqc_fastqc.txt:md5,7b1b7fd457b60404768045b148d4c0a8", + "multiqc_general_stats.txt:md5,962713a1473a318f2cb29bb5290c4c8e", + "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5" ], - "timestamp": "2024-08-26T17:55:16.152573" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-10T08:57:05.870194" } } \ No newline at end of file diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap index f5c4776b..3f27e5a2 100644 --- a/tests/NovaSeq6000.main.nf.test.snap +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -2,26 +2,30 @@ "NovaSeq6000 data test": { "content": [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,e8ed6dca928396b8873d24e60ea1a133", - "multiqc_general_stats.txt:md5,fd9d46c5b441908cd07e5373d116db17", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_fastqc.txt:md5,3730f9046b20ac5c17a86db0a33f8d5d", + "multiqc_general_stats.txt:md5,d521de54d1e659bf7892105f7d23d4db", + "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,ff9b31c6024f11a8135456e7ea01fc8f", - "multiqc_general_stats.txt:md5,f36bd6e27e92c25be076efea411d3a8e", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_fastqc.txt:md5,8284e25ccc21041cf3b5a32eb6a51e78", + "multiqc_general_stats.txt:md5,d52544eb1a505c889a2f9117cf94a5fa", + "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,62d51280dcd7634f6bed95ffe0d8dab8", - "multiqc_general_stats.txt:md5,2012002b6a057be981a97fcc96142a6c", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_fastqc.txt:md5,f38ffdc112c73af3a41ed15848a3761f", + "multiqc_general_stats.txt:md5,5b1190093085ef073d4bd5818c9cde79", + "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,63749e803a2d5fc7ecc7cd93fa68df1f", - "multiqc_general_stats.txt:md5,656931993032400dea3d441b8b61b4d2", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95", + "multiqc_fastqc.txt:md5,7ff71ceb8ecdf086331047f8860c3347", + "multiqc_general_stats.txt:md5,79c1090dd8a97912893f8491641b9dc9", + "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,91cc62e1b4059bdbe4b88affa43378af", - "multiqc_general_stats.txt:md5,6e500f82550e00b07c3e7aa1d46ab9e9", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + "multiqc_fastqc.txt:md5,519ff344a896ac369bba4d5c5b8be7b5", + "multiqc_general_stats.txt:md5,41611bd5ab9e79425c466bf976b03bdc", + "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5" ], - "timestamp": "2024-08-26T18:03:33.089142" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-10T08:58:26.732622" } } \ No newline at end of file diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap index e3b34f23..fb8cda25 100644 --- a/tests/PromethION.main.nf.test.snap +++ b/tests/PromethION.main.nf.test.snap @@ -2,10 +2,14 @@ "PromethION data test": { "content": [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,cecee3cb343c75c80180d3169c6f3ea1", - "multiqc_general_stats.txt:md5,e63c25089c4fc10618414ba2254d18c7", - "multiqc_software_versions.txt:md5,7452f1f7aae2a8a4066c2ef6cd5ceb95" + "multiqc_fastqc.txt:md5,35984961f25a0d4e7352cab4d5650178", + "multiqc_general_stats.txt:md5,1465b0b1959e3864b28ecc2340df351b", + "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5" ], - "timestamp": "2024-08-26T17:55:38.755385" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-10T08:58:57.180636" } } \ No newline at end of file From 7ce086a6fddbe96de440a234128576bfc4d6bff5 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Mon, 28 Oct 2024 11:12:03 +0000 Subject: [PATCH 053/172] nf-core tools 3.0.2 modules update --- modules.json | 4 +- modules/nf-core/fastqc/environment.yml | 2 - modules/nf-core/fastqc/main.nf | 5 +- modules/nf-core/fastqc/meta.yml | 57 +-- modules/nf-core/fastqc/tests/main.nf.test | 225 ++++++++--- .../nf-core/fastqc/tests/main.nf.test.snap | 370 ++++++++++++++++-- modules/nf-core/multiqc/environment.yml | 4 +- modules/nf-core/multiqc/main.nf | 6 +- modules/nf-core/multiqc/meta.yml | 91 +++-- .../nf-core/multiqc/tests/main.nf.test.snap | 26 +- 10 files changed, 603 insertions(+), 187 deletions(-) diff --git a/modules.json b/modules.json index 70f3486c..a84d111b 100644 --- a/modules.json +++ b/modules.json @@ -7,12 +7,12 @@ "nf-core": { "fastqc": { "branch": "master", - "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "19ca321db5d8bd48923262c2eca6422359633491", + "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", "installed_by": ["modules"] } } diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml index 1787b38a..691d4c76 100644 --- a/modules/nf-core/fastqc/environment.yml +++ b/modules/nf-core/fastqc/environment.yml @@ -1,7 +1,5 @@ -name: fastqc channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index d79f1c86..d8989f48 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -26,7 +26,10 @@ process FASTQC { def rename_to = old_new_pairs*.join(' ').join(' ') def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') - def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') + // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) + // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 + // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') / task.cpus // FastQC memory value allowed range (100 - 10000) def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index ee5507e0..4827da7a 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -16,35 +16,44 @@ tools: homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ licence: ["GPL-2.0-only"] + identifier: biotools:fastqc input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - html: - type: file - description: FastQC report - pattern: "*_{fastqc.html}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: FastQC report + pattern: "*_{fastqc.html}" - zip: - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.zip": + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@grst" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index 70edae4d..e9d79a07 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -23,17 +23,14 @@ nextflow_process { then { assertAll ( - { assert process.success }, - - // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. - // looks like this:
Mon 2 Oct 2023
test.gz
- // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - - { assert snapshot(process.out.versions).match("fastqc_versions_single") } + { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } ) } } @@ -54,16 +51,14 @@ nextflow_process { then { assertAll ( - { assert process.success }, - - { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, - { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, - { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, - { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, - { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, - - { assert snapshot(process.out.versions).match("fastqc_versions_paired") } + { assert process.success }, + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } ) } } @@ -83,13 +78,11 @@ nextflow_process { then { assertAll ( - { assert process.success }, - - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - - { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") } + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } ) } } @@ -109,13 +102,11 @@ nextflow_process { then { assertAll ( - { assert process.success }, - - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - - { assert snapshot(process.out.versions).match("fastqc_versions_bam") } + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } ) } } @@ -138,22 +129,20 @@ nextflow_process { then { assertAll ( - { assert process.success }, - - { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, - { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, - { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, - { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, - { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, - { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, - { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, - { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, - { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, - - { assert snapshot(process.out.versions).match("fastqc_versions_multiple") } + { assert process.success }, + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } ) } } @@ -173,21 +162,18 @@ nextflow_process { then { assertAll ( - { assert process.success }, - - { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - - { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") } + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } ) } } test("sarscov2 single-end [fastq] - stub") { - options "-stub" - + options "-stub" when { process { """ @@ -201,12 +187,123 @@ nextflow_process { then { assertAll ( - { assert process.success }, - { assert snapshot(process.out.html.collect { file(it[1]).getName() } + - process.out.zip.collect { file(it[1]).getName() } + - process.out.versions ).match("fastqc_stub") } + { assert process.success }, + { assert snapshot(process.out).match() } ) } } + test("sarscov2 paired-end [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 interleaved [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 paired-end [bam] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 multiple [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 custom_prefix - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap index 86f7c311..d5db3092 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -1,88 +1,392 @@ { - "fastqc_versions_interleaved": { + "sarscov2 custom_prefix": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:40:07.293713" + "timestamp": "2024-07-22T11:02:16.374038" }, - "fastqc_stub": { + "sarscov2 single-end [fastq] - stub": { "content": [ - [ - "test.html", - "test.zip", - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": true + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:24.993809" + }, + "sarscov2 custom_prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:31:01.425198" + "timestamp": "2024-07-22T11:03:10.93942" }, - "fastqc_versions_multiple": { + "sarscov2 interleaved [fastq]": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:40:55.797907" + "timestamp": "2024-07-22T11:01:42.355718" }, - "fastqc_versions_bam": { + "sarscov2 paired-end [bam]": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:40:26.795862" + "timestamp": "2024-07-22T11:01:53.276274" }, - "fastqc_versions_single": { + "sarscov2 multiple [fastq]": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:39:27.043675" + "timestamp": "2024-07-22T11:02:05.527626" }, - "fastqc_versions_paired": { + "sarscov2 paired-end [fastq]": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:31.188871" + }, + "sarscov2 paired-end [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:34.273566" + }, + "sarscov2 multiple [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:39:47.584191" + "timestamp": "2024-07-22T11:03:02.304411" }, - "fastqc_versions_custom_prefix": { + "sarscov2 single-end [fastq]": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:19.095607" + }, + "sarscov2 interleaved [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:44.640184" + }, + "sarscov2 paired-end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:41:14.576531" + "timestamp": "2024-07-22T11:02:53.550742" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index a31464c9..6f5b867b 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,7 +1,5 @@ -name: multiqc channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::multiqc=1.24.1 + - bioconda::multiqc=1.25.1 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index ceaec139..cc0643e1 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.24.1--pyhdfd78af_0' : - 'biocontainers/multiqc:1.24.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' : + 'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -52,7 +52,7 @@ process MULTIQC { stub: """ mkdir multiqc_data - touch multiqc_plots + mkdir multiqc_plots touch multiqc_report.html cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index 382c08cb..b16c1879 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,6 @@ name: multiqc -description: Aggregate results from bioinformatics analyses across many samples into a single report +description: Aggregate results from bioinformatics analyses across many samples into + a single report keywords: - QC - bioinformatics tools @@ -12,53 +13,59 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] + identifier: biotools:multiqc input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. - pattern: "*.{yml,yaml}" - - multiqc_logo: - type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" - - replace_names: - type: file - description: | - Optional two-column sample renaming file. First column a set of - patterns, second column a set of corresponding replacements. Passed via - MultiQC's `--replace-names` option. - pattern: "*.{tsv}" - - sample_names: - type: file - description: | - Optional TSV file with headers, passed to the MultiQC --sample_names - argument. - pattern: "*.{tsv}" + - - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + - - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + - - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" output: - report: - type: file - description: MultiQC report file - pattern: "multiqc_report.html" + - "*multiqc_report.html": + type: file + description: MultiQC report file + pattern: "multiqc_report.html" - data: - type: directory - description: MultiQC data dir - pattern: "multiqc_data" + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" - plots: - type: file - description: Plots created by MultiQC - pattern: "*_data" + - "*_plots": + type: file + description: Plots created by MultiQC + pattern: "*_data" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@abhi18av" - "@bunop" diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index 83fa080c..2fcbb5ff 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,6eb13f3b11bbcbfc98ad3166420ff760" + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-10T12:41:34.562023" + "timestamp": "2024-10-02T17:51:46.317523" }, "multiqc_stub": { "content": [ @@ -17,25 +17,25 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,6eb13f3b11bbcbfc98ad3166420ff760" + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-10T11:27:11.933869532" + "timestamp": "2024-10-02T17:52:20.680978" }, "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,6eb13f3b11bbcbfc98ad3166420ff760" + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-10T11:26:56.709849369" + "timestamp": "2024-10-02T17:52:09.185842" } -} +} \ No newline at end of file From a5951302f2b783a4ffe9fd1d3b11474fe48d3db6 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Mon, 28 Oct 2024 13:30:37 +0000 Subject: [PATCH 054/172] merge fix --- .../local/utils_nfcore_seqinspector_pipeline/main.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index c2270583..a84e064f 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -74,6 +74,7 @@ workflow PIPELINE_INITIALISATION { Channel .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + .toList() .flatMap { it.withIndex().collect { entry, idx -> entry + "${idx+1}" } } .map { meta, fastq_1, fastq_2, idx -> @@ -94,8 +95,8 @@ workflow PIPELINE_INITIALISATION { } } .groupTuple() - .map { samplesheet -> - validateInputSamplesheet(samplesheet) + .map { + validateInputSamplesheet(it) // Applies additional group validation checks that schema_input.json cannot do. } .transpose() // Replace the map below // .map { From 036408867cb7b852cf1e35089d9bd4edab29bb89 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Mon, 28 Oct 2024 13:30:48 +0000 Subject: [PATCH 055/172] update snapshots --- tests/MiSeq.main.nf.test.snap | 8 ++++---- tests/NovaSeq6000.main.nf.test.snap | 24 ++++++++++++------------ tests/PromethION.main.nf.test.snap | 10 +++++----- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap index 9dacaf8a..4613d525 100644 --- a/tests/MiSeq.main.nf.test.snap +++ b/tests/MiSeq.main.nf.test.snap @@ -3,13 +3,13 @@ "content": [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,7b1b7fd457b60404768045b148d4c0a8", - "multiqc_general_stats.txt:md5,962713a1473a318f2cb29bb5290c4c8e", - "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5" + "multiqc_general_stats.txt:md5,5b28a83b14cb2fe88d084d08900ebdbf", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3" ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nextflow": "24.10.0" }, - "timestamp": "2024-09-10T08:57:05.870194" + "timestamp": "2024-10-28T13:18:10.3675973" } } \ No newline at end of file diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap index 3f27e5a2..ee3c22b7 100644 --- a/tests/NovaSeq6000.main.nf.test.snap +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -3,29 +3,29 @@ "content": [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,3730f9046b20ac5c17a86db0a33f8d5d", - "multiqc_general_stats.txt:md5,d521de54d1e659bf7892105f7d23d4db", - "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5", + "multiqc_general_stats.txt:md5,25abe0f6a35eb4a3b056fc3cf5c13732", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,8284e25ccc21041cf3b5a32eb6a51e78", - "multiqc_general_stats.txt:md5,d52544eb1a505c889a2f9117cf94a5fa", - "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5", + "multiqc_general_stats.txt:md5,90ee35137492b80aab36ef67f72d8921", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,f38ffdc112c73af3a41ed15848a3761f", - "multiqc_general_stats.txt:md5,5b1190093085ef073d4bd5818c9cde79", - "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5", + "multiqc_general_stats.txt:md5,d62a2fc39e674d98783d408791803148", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,7ff71ceb8ecdf086331047f8860c3347", - "multiqc_general_stats.txt:md5,79c1090dd8a97912893f8491641b9dc9", - "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5", + "multiqc_general_stats.txt:md5,2f09b8f199ac40cf67ba50843cebd29c", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "multiqc_fastqc.txt:md5,519ff344a896ac369bba4d5c5b8be7b5", - "multiqc_general_stats.txt:md5,41611bd5ab9e79425c466bf976b03bdc", - "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5" + "multiqc_general_stats.txt:md5,6a1c16f068d7ba3a9225a17eb570ed9a", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3" ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nextflow": "24.10.0" }, - "timestamp": "2024-09-10T08:58:26.732622" + "timestamp": "2024-10-28T13:19:13.226135825" } } \ No newline at end of file diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap index fb8cda25..026a8cd2 100644 --- a/tests/PromethION.main.nf.test.snap +++ b/tests/PromethION.main.nf.test.snap @@ -2,14 +2,14 @@ "PromethION data test": { "content": [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_fastqc.txt:md5,35984961f25a0d4e7352cab4d5650178", - "multiqc_general_stats.txt:md5,1465b0b1959e3864b28ecc2340df351b", - "multiqc_software_versions.txt:md5,49e3596d49ee49d967d3b6c363b486d5" + "multiqc_fastqc.txt:md5,1a4b472e13cadc770832b0e20d1de7b0", + "multiqc_general_stats.txt:md5,409cefc7f17f95d176ced6032bf8fb32", + "multiqc_software_versions.txt:md5,a3698a2d32e8695c38d50e3d17de5fe3" ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nextflow": "24.10.0" }, - "timestamp": "2024-09-10T08:58:57.180636" + "timestamp": "2024-10-28T13:19:57.261730412" } } \ No newline at end of file From 545af046c6660824322f982f4ac322776c25284c Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Mon, 28 Oct 2024 13:38:38 +0000 Subject: [PATCH 056/172] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3aa5bacc..b0b12de1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c - [#20](https://github.com/nf-core/seqinspector/pull/20) Use tags to generate group reports - [#13](https://github.com/nf-core/seqinspector/pull/13) Generate reports per run, per project and per lane. +- [#49](https://github.com/nf-core/seqinspector/pull/49) Merge with template 3.0.2. ### `Fixed` From 88bb65d602ab1434a82b0b9406556cab674194fe Mon Sep 17 00:00:00 2001 From: ngarcia Date: Mon, 28 Oct 2024 13:16:54 +0000 Subject: [PATCH 057/172] add subsampling --- modules.json | 5 + modules/nf-core/seqtk/sample/environment.yml | 5 + modules/nf-core/seqtk/sample/main.nf | 58 +++++++++++ modules/nf-core/seqtk/sample/meta.yml | 52 ++++++++++ .../nf-core/seqtk/sample/tests/main.nf.test | 80 ++++++++++++++++ .../seqtk/sample/tests/main.nf.test.snap | 95 +++++++++++++++++++ .../seqtk/sample/tests/standard.config | 6 ++ modules/nf-core/seqtk/sample/tests/tags.yml | 2 + nextflow.config | 2 +- nextflow_schema.json | 7 ++ workflows/seqinspector.nf | 17 +++- 11 files changed, 327 insertions(+), 2 deletions(-) create mode 100644 modules/nf-core/seqtk/sample/environment.yml create mode 100644 modules/nf-core/seqtk/sample/main.nf create mode 100644 modules/nf-core/seqtk/sample/meta.yml create mode 100644 modules/nf-core/seqtk/sample/tests/main.nf.test create mode 100644 modules/nf-core/seqtk/sample/tests/main.nf.test.snap create mode 100644 modules/nf-core/seqtk/sample/tests/standard.config create mode 100644 modules/nf-core/seqtk/sample/tests/tags.yml diff --git a/modules.json b/modules.json index 8e632d50..f01f2a64 100644 --- a/modules.json +++ b/modules.json @@ -14,6 +14,11 @@ "branch": "master", "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", "installed_by": ["modules"] + }, + "seqtk/sample": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] } } }, diff --git a/modules/nf-core/seqtk/sample/environment.yml b/modules/nf-core/seqtk/sample/environment.yml new file mode 100644 index 00000000..693aa5c1 --- /dev/null +++ b/modules/nf-core/seqtk/sample/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::seqtk=1.4 diff --git a/modules/nf-core/seqtk/sample/main.nf b/modules/nf-core/seqtk/sample/main.nf new file mode 100644 index 00000000..ea9b839e --- /dev/null +++ b/modules/nf-core/seqtk/sample/main.nf @@ -0,0 +1,58 @@ +process SEQTK_SAMPLE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqtk:1.4--he4a0461_1' : + 'biocontainers/seqtk:1.4--he4a0461_1' }" + + input: + tuple val(meta), path(reads), val(sample_size) + + output: + tuple val(meta), path("*.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (!(args ==~ /.*-s[0-9]+.*/)) { + args += " -s100" + } + if ( !sample_size ) { + error "SEQTK/SAMPLE must have a sample_size value included" + } + """ + printf "%s\\n" $reads | while read f; + do + seqtk \\ + sample \\ + $args \\ + \$f \\ + $sample_size \\ + | gzip --no-name > ${prefix}_\$(basename \$f) + done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + echo "" | gzip > ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + +} diff --git a/modules/nf-core/seqtk/sample/meta.yml b/modules/nf-core/seqtk/sample/meta.yml new file mode 100644 index 00000000..42f67d8f --- /dev/null +++ b/modules/nf-core/seqtk/sample/meta.yml @@ -0,0 +1,52 @@ +name: seqtk_sample +description: Subsample reads from FASTQ files +keywords: + - sample + - fastx + - reads +tools: + - seqtk: + description: Seqtk is a fast and lightweight tool for processing sequences in + the FASTA or FASTQ format. Seqtk sample command subsamples sequences. + homepage: https://github.com/lh3/seqtk + documentation: https://docs.csc.fi/apps/seqtk/ + tool_dev_url: https://github.com/lh3/seqtk + licence: ["MIT"] + identifier: biotools:seqtk +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: List of input FastQ files + pattern: "*.{fastq.gz}" + - sample_size: + type: integer + description: Number of reads to sample. +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastq.gz": + type: file + description: Subsampled FastQ files + pattern: "*.{fastq.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kaurravneet4123" + - "@sidorov-si" + - "@adamrtalbot" +maintainers: + - "@kaurravneet4123" + - "@sidorov-si" + - "@adamrtalbot" diff --git a/modules/nf-core/seqtk/sample/tests/main.nf.test b/modules/nf-core/seqtk/sample/tests/main.nf.test new file mode 100644 index 00000000..c121c9d9 --- /dev/null +++ b/modules/nf-core/seqtk/sample/tests/main.nf.test @@ -0,0 +1,80 @@ +nextflow_process { + + name "Test Process SEQTK_SAMPLE" + script "modules/nf-core/seqtk/sample/main.nf" + process "SEQTK_SAMPLE" + config "./standard.config" + + tag "modules" + tag "modules_nfcore" + tag "seqtk" + tag "seqtk/sample" + + test("sarscov2_sample_singleend_fqgz") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + 50 + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2_sample_pairedend_fqgz") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + 50 + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2_sample_singlend_fqgz_stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + 50 + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/seqtk/sample/tests/main.nf.test.snap b/modules/nf-core/seqtk/sample/tests/main.nf.test.snap new file mode 100644 index 00000000..a9fec3c4 --- /dev/null +++ b/modules/nf-core/seqtk/sample/tests/main.nf.test.snap @@ -0,0 +1,95 @@ +{ + "sarscov2_sample_singlend_fqgz_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sampled.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,0529f2d163df9e2cd2ae8254dfb63806" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sampled.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,0529f2d163df9e2cd2ae8254dfb63806" + ] + } + ], + "timestamp": "2024-02-22T15:58:45.902956" + }, + "sarscov2_sample_pairedend_fqgz": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sampled_test_1.fastq.gz:md5,e5f44fafd7351c5abb9925a075132941" + ] + ], + "1": [ + "versions.yml:md5,0529f2d163df9e2cd2ae8254dfb63806" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sampled_test_1.fastq.gz:md5,e5f44fafd7351c5abb9925a075132941" + ] + ], + "versions": [ + "versions.yml:md5,0529f2d163df9e2cd2ae8254dfb63806" + ] + } + ], + "timestamp": "2024-02-22T15:58:37.679954" + }, + "sarscov2_sample_singleend_fqgz": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sampled_test_1.fastq.gz:md5,e5f44fafd7351c5abb9925a075132941" + ] + ], + "1": [ + "versions.yml:md5,0529f2d163df9e2cd2ae8254dfb63806" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sampled_test_1.fastq.gz:md5,e5f44fafd7351c5abb9925a075132941" + ] + ], + "versions": [ + "versions.yml:md5,0529f2d163df9e2cd2ae8254dfb63806" + ] + } + ], + "timestamp": "2024-02-22T15:58:29.474491" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqtk/sample/tests/standard.config b/modules/nf-core/seqtk/sample/tests/standard.config new file mode 100644 index 00000000..b2dd4b9f --- /dev/null +++ b/modules/nf-core/seqtk/sample/tests/standard.config @@ -0,0 +1,6 @@ +process { + withName: SEQTK_SAMPLE { + ext.args = '-s100' + ext.prefix = { "${meta.id}.sampled" } + } +} \ No newline at end of file diff --git a/modules/nf-core/seqtk/sample/tests/tags.yml b/modules/nf-core/seqtk/sample/tests/tags.yml new file mode 100644 index 00000000..e5d113b8 --- /dev/null +++ b/modules/nf-core/seqtk/sample/tests/tags.yml @@ -0,0 +1,2 @@ +seqtk/sample: + - "modules/nf-core/seqtk/sample/**" diff --git a/nextflow.config b/nextflow.config index 50c1ecbb..10bcb897 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,7 +12,7 @@ params { // TODO nf-core: Specify your pipeline's command line flags // Input options input = null - + sample_size = null // References genome = null fasta = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 88fd607b..f87e525f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -23,8 +23,15 @@ "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/seqinspector/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" }, + "sample_size": { + "type": "integer", + "default": null, + "description": "Subset this number of reads.", + "help_text": "Samples will be subsetted to this number of reads. If null, no subsampling will be performed." + }, "outdir": { "type": "string", + "default": null, "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index ea628117..70b89679 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -3,6 +3,8 @@ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + +include { SEQTK_SAMPLE } from '../modules/nf-core/seqtk/sample/main' include { FASTQC } from '../modules/nf-core/fastqc/main' include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main' @@ -30,11 +32,24 @@ workflow SEQINSPECTOR { ch_multiqc_extra_files = Channel.empty() ch_multiqc_reports = Channel.empty() + // + // MODULE: Run Seqkit sample to perform subsampling + // + if (params.sample_size) { + ch_sample_sized = SEQTK_SAMPLE(ch_samplesheet.map { + meta, reads -> [meta, reads, params.sample_size] + }).reads + ch_versions = ch_versions.mix(SEQTK_SAMPLE.out.versions.first()) + } else { + // No do subsample + ch_sample_sized = ch_samplesheet + } + // // MODULE: Run FastQC // FASTQC ( - ch_samplesheet + ch_sample_sized ) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) From dbd64bfa4f311a9001f48a89df56a84581193d20 Mon Sep 17 00:00:00 2001 From: ngarcia Date: Mon, 28 Oct 2024 14:03:06 +0000 Subject: [PATCH 058/172] add subsampling nf-test --- tests/NovaSeq6000.main_subsample.nf.test | 50 +++++++++++++++++++ .../NovaSeq6000.main_subsample.nf.test.config | 8 +++ 2 files changed, 58 insertions(+) create mode 100644 tests/NovaSeq6000.main_subsample.nf.test create mode 100644 tests/NovaSeq6000.main_subsample.nf.test.config diff --git a/tests/NovaSeq6000.main_subsample.nf.test b/tests/NovaSeq6000.main_subsample.nf.test new file mode 100644 index 00000000..0919bc23 --- /dev/null +++ b/tests/NovaSeq6000.main_subsample.nf.test @@ -0,0 +1,50 @@ +nextflow_pipeline { + + name "Test Workflow main.nf on NovaSeq6000 data" + script "../main.nf" + tag "seqinspector" + tag "PIPELINE" + + test("NovaSeq6000 data test") { + + when { + config "./NovaSeq6000.main_subsample.nf.test.config" + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/global_report/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/lane1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/lane1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/lane1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/lane1/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/group1/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/group1/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/group1/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/group1/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/group2/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/group2/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/group2/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/group2/multiqc_data/multiqc_software_versions.txt"), + + path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/group_reports/test/multiqc_data/multiqc_software_versions.txt"), + ).match() + }, + ) + } + } +} diff --git a/tests/NovaSeq6000.main_subsample.nf.test.config b/tests/NovaSeq6000.main_subsample.nf.test.config new file mode 100644 index 00000000..acda74dd --- /dev/null +++ b/tests/NovaSeq6000.main_subsample.nf.test.config @@ -0,0 +1,8 @@ +// Load the basic test config +includeConfig 'nextflow.config' + +// Load the correct samplesheet for that test +params { + input = params.pipelines_testdata_base_path + 'seqinspector/testdata/NovaSeq6000/samplesheet.csv' + sample_size = 90 +} From 44fb55455961c1d1cc56f04341a7d2a382dfbc42 Mon Sep 17 00:00:00 2001 From: ngarcia Date: Mon, 28 Oct 2024 14:09:57 +0000 Subject: [PATCH 059/172] add snapshots --- tests/NovaSeq6000.main_subsample.nf.test.snap | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/NovaSeq6000.main_subsample.nf.test.snap diff --git a/tests/NovaSeq6000.main_subsample.nf.test.snap b/tests/NovaSeq6000.main_subsample.nf.test.snap new file mode 100644 index 00000000..d1dbe4a7 --- /dev/null +++ b/tests/NovaSeq6000.main_subsample.nf.test.snap @@ -0,0 +1,31 @@ +{ + "NovaSeq6000 data test": { + "content": [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,aba942d1e6996b579f19798e5673f514", + "multiqc_general_stats.txt:md5,c4f40f2313aadc38619e7487226e8d93", + "multiqc_software_versions.txt:md5,b7d1ca14785a9361f0a39ce1b6a02686", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,aa1b8d6adae86005ea7a8b2e901099b8", + "multiqc_general_stats.txt:md5,d5d73d2888cd9895e5f116e5b869e73c", + "multiqc_software_versions.txt:md5,b7d1ca14785a9361f0a39ce1b6a02686", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,ff996e1d3dc4a46e0c9535e54d51ccab", + "multiqc_general_stats.txt:md5,d01ec30a262b69bc5749b0ed108a950a", + "multiqc_software_versions.txt:md5,b7d1ca14785a9361f0a39ce1b6a02686", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,3df36ecfe76b25b0c22dcda84bce2b3b", + "multiqc_general_stats.txt:md5,4dffc0d1169c49adde819d4467ffb775", + "multiqc_software_versions.txt:md5,b7d1ca14785a9361f0a39ce1b6a02686", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_fastqc.txt:md5,ce61b4ce4b1d76ec3f20de3bf0c9ec7f", + "multiqc_general_stats.txt:md5,05f8dfeea9fca7f4c16ba9d553af4c69", + "multiqc_software_versions.txt:md5,b7d1ca14785a9361f0a39ce1b6a02686" + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-28T14:05:44.868455454" + } +} \ No newline at end of file From d781db1c98312bafc9419955aa43a26faa9dd076 Mon Sep 17 00:00:00 2001 From: ngarcia Date: Mon, 28 Oct 2024 14:20:03 +0000 Subject: [PATCH 060/172] better name --- tests/NovaSeq6000.main_subsample.nf.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/NovaSeq6000.main_subsample.nf.test b/tests/NovaSeq6000.main_subsample.nf.test index 0919bc23..72851f91 100644 --- a/tests/NovaSeq6000.main_subsample.nf.test +++ b/tests/NovaSeq6000.main_subsample.nf.test @@ -1,11 +1,11 @@ nextflow_pipeline { - name "Test Workflow main.nf on NovaSeq6000 data" + name "Test Workflow main.nf on NovaSeq6000 data sample size 90" script "../main.nf" tag "seqinspector" tag "PIPELINE" - test("NovaSeq6000 data test") { + test("NovaSeq6000 data test sample size") { when { config "./NovaSeq6000.main_subsample.nf.test.config" From ab30119cb07a2df13f5c6606747485796116434c Mon Sep 17 00:00:00 2001 From: ngarcia Date: Mon, 28 Oct 2024 14:52:09 +0000 Subject: [PATCH 061/172] update docs --- docs/output.md | 13 +++++++++++++ docs/usage.md | 5 +++++ nextflow.config | 2 +- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index e14c3ad6..a8257513 100644 --- a/docs/output.md +++ b/docs/output.md @@ -10,10 +10,23 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: +- [seqkit](#seqkit) - Subsample a specific number of reads per sample - [FastQC](#fastqc) - Raw read QC - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +### Seqkit + +
+Output files + +- `seqtk/` + - `*_fastq`: FastQ file after being subsampled to the sample_size value. + +
+ +[SeqKit](https://bioinf.shenwei.me/seqkit/) samples sequences by number. For further reading and documentation see the [FastQC help pages](https://bioinf.shenwei.me/seqkit/usage/#sample). + ### FastQC
diff --git a/docs/usage.md b/docs/usage.md index d75e0fbd..c11b270f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -93,6 +93,11 @@ genome: 'GRCh37' You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). +Optionally, the `sample_size` parameter allows you to subset a random number of reads to be analysed. +```bash +nextflow run nf-core/seqinspector --input ./samplesheet.csv --outdir ./results --sample_size 90 -profile docker +``` + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: diff --git a/nextflow.config b/nextflow.config index 10bcb897..38eb3127 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,7 +12,7 @@ params { // TODO nf-core: Specify your pipeline's command line flags // Input options input = null - sample_size = null + sample_size = 0 // References genome = null fasta = null From b8befe4b35f94132418c81640af710ab4af486fb Mon Sep 17 00:00:00 2001 From: ngarcia Date: Mon, 28 Oct 2024 14:52:21 +0000 Subject: [PATCH 062/172] change default to 0 --- nextflow_schema.json | 4 ++-- workflows/seqinspector.nf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index f87e525f..edb18ea0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -25,9 +25,9 @@ }, "sample_size": { "type": "integer", - "default": null, + "default": 0, "description": "Subset this number of reads.", - "help_text": "Samples will be subsetted to this number of reads. If null, no subsampling will be performed." + "help_text": "Samples will be subsetted to this number of reads. If 0 (default), no subsampling will be performed." }, "outdir": { "type": "string", diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 70b89679..91a699da 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -35,7 +35,7 @@ workflow SEQINSPECTOR { // // MODULE: Run Seqkit sample to perform subsampling // - if (params.sample_size) { + if (params.sample_size > 0 ) { ch_sample_sized = SEQTK_SAMPLE(ch_samplesheet.map { meta, reads -> [meta, reads, params.sample_size] }).reads From b2e7be55427d0bc063373d45e505d0c6b5509ed2 Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Mon, 28 Oct 2024 14:56:37 +0000 Subject: [PATCH 063/172] Fran's draft --- modules.json | 5 ++ .../fastqscreen/fastqscreen/environment.yml | 9 ++ .../nf-core/fastqscreen/fastqscreen/main.nf | 54 ++++++++++++ .../nf-core/fastqscreen/fastqscreen/meta.yml | 44 ++++++++++ .../fastqscreen/tests/main.nf.test | 87 +++++++++++++++++++ .../fastqscreen/tests/main.nf.test.snap | 81 +++++++++++++++++ .../fastqscreen/fastqscreen/tests/tags.yml | 2 + nextflow.config | 1 + nextflow_schema.json | 5 ++ workflows/seqinspector.nf | 12 +++ 10 files changed, 300 insertions(+) create mode 100644 modules/nf-core/fastqscreen/fastqscreen/environment.yml create mode 100644 modules/nf-core/fastqscreen/fastqscreen/main.nf create mode 100644 modules/nf-core/fastqscreen/fastqscreen/meta.yml create mode 100644 modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test create mode 100644 modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap create mode 100644 modules/nf-core/fastqscreen/fastqscreen/tests/tags.yml diff --git a/modules.json b/modules.json index 8e632d50..cb726e0d 100644 --- a/modules.json +++ b/modules.json @@ -10,6 +10,11 @@ "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, + "fastqscreen/fastqscreen": { + "branch": "master", + "git_sha": "e1316cdcbef318b9cdfd35586423f8337c3d45f0", + "installed_by": ["modules"] + }, "multiqc": { "branch": "master", "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", diff --git a/modules/nf-core/fastqscreen/fastqscreen/environment.yml b/modules/nf-core/fastqscreen/fastqscreen/environment.yml new file mode 100644 index 00000000..5097f091 --- /dev/null +++ b/modules/nf-core/fastqscreen/fastqscreen/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "fastqscreen_fastqscreen" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::fastq-screen=0.15.3" diff --git a/modules/nf-core/fastqscreen/fastqscreen/main.nf b/modules/nf-core/fastqscreen/fastqscreen/main.nf new file mode 100644 index 00000000..8686f200 --- /dev/null +++ b/modules/nf-core/fastqscreen/fastqscreen/main.nf @@ -0,0 +1,54 @@ +process FASTQSCREEN_FASTQSCREEN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastq-screen:0.15.3--pl5321hdfd78af_0': + 'biocontainers/fastq-screen:0.15.3--pl5321hdfd78af_0'}" + + input: + tuple val(meta), path(reads) // .fastq files + path database + + output: + tuple val(meta), path("*.txt") , emit: txt + tuple val(meta), path("*.png") , emit: png + tuple val(meta), path("*.html"), emit: html + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: "" + + """ + fastq_screen --threads ${task.cpus} \\ + --aligner bowtie2 \\ + --conf ${database}/fastq_screen.conf \\ + $reads \\ + $args \\ + --outdir . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqscreen: \$(echo \$(fastq_screen --version 2>&1) | sed 's/^.*FastQ Screen v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch test_1_screen.html + touch test_1_screen.png + touch test_1_screen.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqscreen: \$(echo \$(fastq_screen --version 2>&1) | sed 's/^.*FastQ Screen v//; s/ .*\$//') + END_VERSIONS + """ + +} diff --git a/modules/nf-core/fastqscreen/fastqscreen/meta.yml b/modules/nf-core/fastqscreen/fastqscreen/meta.yml new file mode 100644 index 00000000..623dacf7 --- /dev/null +++ b/modules/nf-core/fastqscreen/fastqscreen/meta.yml @@ -0,0 +1,44 @@ +name: fastqscreen_fastqscreen +description: Align reads to multiple reference genomes using fastq-screen +keywords: + - align + - map + - fasta + - fastq + - genome + - reference +tools: + - "fastqscreen": + description: "FastQ Screen allows you to screen a library of sequences in FastQ format against a set of sequence databases so you can see if the composition of the library matches with what you expect." + homepage: "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/" + documentation: "https://stevenwingett.github.io/FastQ-Screen/" + tool_dev_url: "https://github.com/StevenWingett/FastQ-Screen/archive/refs/tags/v0.15.3.zip" + doi: "10.5281/zenodo.5838377" + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - database: + type: directory + description: fastq screen database folder containing config file and index folders + pattern: "FastQ_Screen_Genomes" +output: + - fastq_screen: + type: directory + description: Output fastq_screen file containing alignment statistics + pattern: "*.{_fq_screen}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@snesic" + - "@JPejovicApis" diff --git a/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test new file mode 100644 index 00000000..6d858a4d --- /dev/null +++ b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process FASTQSCREEN_FASTQSCREEN" + script "../main.nf" + process "FASTQSCREEN_FASTQSCREEN" + + tag "modules" + tag "modules_nfcore" + tag "bowtie2/build" + tag "fastqscreen" + tag "fastqscreen/buildfromindex" + tag "fastqscreen/fastqscreen" + + setup { + + run("BOWTIE2_BUILD") { + script "../../../bowtie2/build/main.nf" + process { + """ + input[0] = Channel.from([ + [[id: "sarscov2"], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)], + [[id: "human"] , file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)] + ]) + """ + } + } + + run("FASTQSCREEN_BUILDFROMINDEX") { + script "../../../fastqscreen/buildfromindex/main.nf" + process { + """ + input[0] = BOWTIE2_BUILD.out.index.map{meta, index -> meta.id}.collect() + input[1] = BOWTIE2_BUILD.out.index.map{meta, index -> index}.collect() + """ + } + } + } + + test("sarscov2 - human") { + + when { + process { + """ + input[0] = [[ id:'test', single_end:true ], + [file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + input[1] = FASTQSCREEN_BUILDFROMINDEX.out.database + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match("version") }, + { assert file(process.out.txt.get(0).get(1)).exists() }, + { assert file(process.out.png.get(0).get(1)).exists() }, + { assert file(process.out.html.get(0).get(1)).exists() } + ) + } + + } + + test("sarscov2 - human - stub") { + + options "-stub" + when { + process { + """ + input[0] = [[ id:'test', single_end:true ], + [file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + input[1] = FASTQSCREEN_BUILDFROMINDEX.out.database + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap new file mode 100644 index 00000000..b2450191 --- /dev/null +++ b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap @@ -0,0 +1,81 @@ +{ + "version": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-30T14:22:56.541922683" + }, + "sarscov2 - human - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1_screen.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1_screen.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1_screen.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,8ac0239b5103352958d9a9e562b23103" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1_screen.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "png": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1_screen.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test_1_screen.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,8ac0239b5103352958d9a9e562b23103" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-30T14:23:12.70922619" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqscreen/fastqscreen/tests/tags.yml b/modules/nf-core/fastqscreen/fastqscreen/tests/tags.yml new file mode 100644 index 00000000..b03bfb45 --- /dev/null +++ b/modules/nf-core/fastqscreen/fastqscreen/tests/tags.yml @@ -0,0 +1,2 @@ +fastqscreen/fastqscreen: + - "modules/nf-core/fastqscreen/fastqscreen/**" diff --git a/nextflow.config b/nextflow.config index 50c1ecbb..210f03ef 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,6 +17,7 @@ params { genome = null fasta = null igenomes_base = 's3://ngi-igenomes/igenomes/' + config_fastq_screen = "${projectDir}/modules/nf-core/fastqscreen/references" igenomes_ignore = false // MultiQC options diff --git a/nextflow_schema.json b/nextflow_schema.json index 88fd607b..f18a5f19 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -65,6 +65,11 @@ "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", "fa_icon": "far fa-file-code" }, + "config_fastq_screen": { + "type": "string", + "description": "path to directory with fastq_screen config (fastq_screen.conf)", + "fa_icon": "fas fa-braille" + }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index ea628117..6ec9ca9f 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -4,6 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ include { FASTQC } from '../modules/nf-core/fastqc/main' +include { FASTQSCREEN_FASTQSCREEN } from '../modules/nf-core/fastqscreen/fastqscreen/main' include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_TAG } from '../modules/nf-core/multiqc/main' @@ -39,6 +40,17 @@ workflow SEQINSPECTOR { ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + // + // MODULE: Run FastQ Screen + // + + FASTQSCREEN_FASTQSCREEN ( + ch_samplesheet, + Channel.fromPath(params.config_fastq_screen) + ) + ch_multiqc_files = ch_multiqc_files.mix(FASTQSCREEN_FASTQSCREEN.out.txt) + ch_versions = ch_versions.mix(FASTQSCREEN_FASTQSCREEN.out.versions.first()) + // // Collate and save software versions // From d833d913c5f0da94eeee1c9eaf44fd14c5fdb2fa Mon Sep 17 00:00:00 2001 From: Alfred Kedhammar Date: Mon, 28 Oct 2024 15:02:08 +0000 Subject: [PATCH 064/172] nf-core modules update --> fastqscreen --- modules.json | 2 +- .../fastqscreen/fastqscreen/environment.yml | 3 +- .../nf-core/fastqscreen/fastqscreen/main.nf | 10 +-- .../nf-core/fastqscreen/fastqscreen/meta.yml | 78 +++++++++++++------ .../fastqscreen/tests/main.nf.test | 32 +++++++- .../fastqscreen/tests/main.nf.test.snap | 65 ++++++++++++++-- .../fastqscreen/tests/nextflow.config | 5 ++ 7 files changed, 157 insertions(+), 38 deletions(-) create mode 100644 modules/nf-core/fastqscreen/fastqscreen/tests/nextflow.config diff --git a/modules.json b/modules.json index cb726e0d..f8b48550 100644 --- a/modules.json +++ b/modules.json @@ -12,7 +12,7 @@ }, "fastqscreen/fastqscreen": { "branch": "master", - "git_sha": "e1316cdcbef318b9cdfd35586423f8337c3d45f0", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "multiqc": { diff --git a/modules/nf-core/fastqscreen/fastqscreen/environment.yml b/modules/nf-core/fastqscreen/fastqscreen/environment.yml index 5097f091..c63c61e2 100644 --- a/modules/nf-core/fastqscreen/fastqscreen/environment.yml +++ b/modules/nf-core/fastqscreen/fastqscreen/environment.yml @@ -1,9 +1,8 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "fastqscreen_fastqscreen" channels: - conda-forge - bioconda - - defaults dependencies: - "bioconda::fastq-screen=0.15.3" + - bioconda::perl-gdgraph=1.54 diff --git a/modules/nf-core/fastqscreen/fastqscreen/main.nf b/modules/nf-core/fastqscreen/fastqscreen/main.nf index 8686f200..88c4e5c5 100644 --- a/modules/nf-core/fastqscreen/fastqscreen/main.nf +++ b/modules/nf-core/fastqscreen/fastqscreen/main.nf @@ -12,10 +12,11 @@ process FASTQSCREEN_FASTQSCREEN { path database output: - tuple val(meta), path("*.txt") , emit: txt - tuple val(meta), path("*.png") , emit: png - tuple val(meta), path("*.html"), emit: html - path "versions.yml" , emit: versions + tuple val(meta), path("*.txt") , emit: txt + tuple val(meta), path("*.png") , emit: png , optional: true + tuple val(meta), path("*.html") , emit: html + tuple val(meta), path("*.fastq.gz"), emit: fastq, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -30,7 +31,6 @@ process FASTQSCREEN_FASTQSCREEN { --conf ${database}/fastq_screen.conf \\ $reads \\ $args \\ - --outdir . cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/fastqscreen/fastqscreen/meta.yml b/modules/nf-core/fastqscreen/fastqscreen/meta.yml index 623dacf7..39c86b4f 100644 --- a/modules/nf-core/fastqscreen/fastqscreen/meta.yml +++ b/modules/nf-core/fastqscreen/fastqscreen/meta.yml @@ -9,36 +9,70 @@ keywords: - reference tools: - "fastqscreen": - description: "FastQ Screen allows you to screen a library of sequences in FastQ format against a set of sequence databases so you can see if the composition of the library matches with what you expect." + description: "FastQ Screen allows you to screen a library of sequences in FastQ + format against a set of sequence databases so you can see if the composition + of the library matches with what you expect." homepage: "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/" documentation: "https://stevenwingett.github.io/FastQ-Screen/" tool_dev_url: "https://github.com/StevenWingett/FastQ-Screen/archive/refs/tags/v0.15.3.zip" doi: "10.5281/zenodo.5838377" licence: ["GPL-3.0-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. - - database: - type: directory - description: fastq screen database folder containing config file and index folders - pattern: "FastQ_Screen_Genomes" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - - database: + type: directory + description: fastq screen database folder containing config file and index folders + pattern: "FastQ_Screen_Genomes" output: - - fastq_screen: - type: directory - description: Output fastq_screen file containing alignment statistics - pattern: "*.{_fq_screen}" + - txt: + - meta: + type: map + description: Groovy Map containing sample information + - "*.txt": + type: file + description: TXT file containing alignment statistics + pattern: "*.txt" + - png: + - meta: + type: map + description: Groovy Map containing sample information + - "*.png": + type: file + description: PNG file with graphical representation of alignments + pattern: "*.png" + - html: + - meta: + type: map + description: Groovy Map containing sample information + - "*.html": + type: file + description: HTML file containing mapping results as a table and graphical representation + pattern: "*.html" + - fastq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastq.gz": + type: file + description: FastQ file containing reads that did not align to any database (optional) + pattern: "*.fastq.gz" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@snesic" - "@JPejovicApis" diff --git a/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test index 6d858a4d..71230a22 100644 --- a/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test +++ b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test @@ -10,6 +10,8 @@ nextflow_process { tag "fastqscreen" tag "fastqscreen/buildfromindex" tag "fastqscreen/fastqscreen" + tag "buildfromindex" + tag "modules_fastqscreen" setup { @@ -52,7 +54,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.version).match("version") }, + { assert snapshot(process.out.version).match() }, { assert file(process.out.txt.get(0).get(1)).exists() }, { assert file(process.out.png.get(0).get(1)).exists() }, { assert file(process.out.html.get(0).get(1)).exists() } @@ -61,6 +63,34 @@ nextflow_process { } + test("sarscov2 - human - tags") { + config './nextflow.config' + when { + process { + """ + input[0] = [[ id:'test', single_end:false ], + [file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] + ] + input[1] = FASTQSCREEN_BUILDFROMINDEX.out.database + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.version, + process.out.txt, + process.out.fastq, + path(process.out.html.get(0).get(1)).readLines()[0..10], + path(process.out.png.get(0).get(1)).exists() + ).match() } + ) + } + + } + test("sarscov2 - human - stub") { options "-stub" diff --git a/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap index b2450191..2afffdea 100644 --- a/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap +++ b/modules/nf-core/fastqscreen/fastqscreen/tests/main.nf.test.snap @@ -1,11 +1,56 @@ { - "version": { + "sarscov2 - human": { "content": null, "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-04-30T14:22:56.541922683" + "timestamp": "2024-08-31T05:42:29.972454812" + }, + "sarscov2 - human - tags": { + "content": [ + null, + [ + [ + { + "id": "test", + "single_end": false + }, + "test_1_screen.txt:md5,b0b0ea58bc26ebaa4d573a85e7898f25" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.tagged.fastq.gz:md5,f742b162c43ce28f80b89608d5c47f3d", + "test_1.tagged_filter.fastq.gz:md5,28527a76bb0bb3fce0ee76afe01e90aa" + ] + ] + ], + [ + "", + "", + "", + "", + "", + "", + "\t", + "\tFastQ Screen Processing Report - test_1.fastq.gz", + "\t