From 8e19d59bef4cb30b7bad387b5ccc89c413eaa475 Mon Sep 17 00:00:00 2001 From: Nicolas Servant Date: Thu, 18 Jul 2024 10:56:51 +0200 Subject: [PATCH 01/17] [MODIF] update modules --- modules.json | 30 +-- modules/nf-core/bowtie2/align/main.nf | 28 ++- modules/nf-core/bowtie2/align/meta.yml | 43 +++- .../bowtie2/align/tests/cram_crai.config | 5 + .../nf-core/bowtie2/align/tests/main.nf.test | 162 ++++++++---- .../bowtie2/align/tests/main.nf.test.snap | 216 +++++++++------- modules/nf-core/bwa/index/environment.yml | 2 +- modules/nf-core/bwa/index/main.nf | 4 +- modules/nf-core/bwa/index/meta.yml | 3 +- modules/nf-core/bwa/index/tests/main.nf.test | 4 +- .../nf-core/bwa/index/tests/main.nf.test.snap | 10 +- modules/nf-core/bwa/mem/environment.yml | 5 +- modules/nf-core/bwa/mem/main.nf | 35 ++- modules/nf-core/bwa/mem/meta.yml | 20 +- modules/nf-core/bwa/mem/tests/main.nf.test | 205 ++++++++++++++-- .../nf-core/bwa/mem/tests/main.nf.test.snap | 230 ++++++++++-------- .../custom/getchromsizes/environment.yml | 3 +- modules/nf-core/custom/getchromsizes/main.nf | 7 +- .../custom/getchromsizes/tests/main.nf.test | 51 +++- .../getchromsizes/tests/main.nf.test.snap | 140 ++++++++++- modules/nf-core/multiqc/environment.yml | 2 +- modules/nf-core/multiqc/main.nf | 10 +- modules/nf-core/multiqc/meta.yml | 13 + modules/nf-core/multiqc/tests/main.nf.test | 6 + .../nf-core/multiqc/tests/main.nf.test.snap | 18 +- modules/nf-core/pairtools/dedup/main.nf | 2 +- modules/nf-core/pairtools/dedup/meta.yml | 3 + modules/nf-core/pairtools/merge/main.nf | 2 +- modules/nf-core/pairtools/parse/main.nf | 2 +- modules/nf-core/pairtools/parse/meta.yml | 2 + modules/nf-core/pairtools/restrict/main.nf | 2 +- modules/nf-core/pairtools/restrict/meta.yml | 4 +- modules/nf-core/pairtools/select/main.nf | 2 +- modules/nf-core/pairtools/select/meta.yml | 2 + modules/nf-core/pairtools/sort/main.nf | 2 +- modules/nf-core/pairtools/stats/main.nf | 2 +- .../nf-core/samtools/flagstat/environment.yml | 3 +- modules/nf-core/samtools/flagstat/main.nf | 4 +- .../samtools/flagstat/tests/main.nf.test | 12 +- .../samtools/flagstat/tests/main.nf.test.snap | 20 +- .../nf-core/samtools/index/environment.yml | 3 +- modules/nf-core/samtools/index/main.nf | 4 +- .../nf-core/samtools/index/tests/main.nf.test | 36 +-- .../samtools/index/tests/main.nf.test.snap | 54 +++- modules/nf-core/samtools/sort/environment.yml | 3 +- modules/nf-core/samtools/sort/main.nf | 32 ++- modules/nf-core/samtools/sort/meta.yml | 32 ++- .../nf-core/samtools/sort/tests/main.nf.test | 81 ++++-- .../samtools/sort/tests/main.nf.test.snap | 140 ++++++++--- .../samtools/sort/tests/nextflow.config | 3 +- 50 files changed, 1245 insertions(+), 459 deletions(-) create mode 100644 modules/nf-core/bowtie2/align/tests/cram_crai.config diff --git a/modules.json b/modules.json index 5cc2f42..ac76f9f 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "bowtie2/align": { "branch": "master", - "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde", + "git_sha": "e4bad511789f16d0df39ee306b2cd50418365048", "installed_by": ["modules"] }, "bowtie2/build": { @@ -17,12 +17,12 @@ }, "bwa/index": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "e0ff65e1fb313677de09f5f477ae3da30ce19b7b", "installed_by": ["modules"] }, "bwa/mem": { "branch": "master", - "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "git_sha": "e0ff65e1fb313677de09f5f477ae3da30ce19b7b", "installed_by": ["modules"] }, "calder2": { @@ -57,7 +57,7 @@ }, "custom/getchromsizes": { "branch": "master", - "git_sha": "1b0ffa4e5aed5b7e3cd4311af31bd3b2c8345051", + "git_sha": "1ceaa8ba4d0fd886dbca0e545815d905b7407de7", "installed_by": ["modules"] }, "fastqc": { @@ -67,7 +67,7 @@ }, "multiqc": { "branch": "master", - "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", + "git_sha": "b80f5fd12ff7c43938f424dd76392a2704fa2396", "installed_by": ["modules"] }, "pairix": { @@ -77,52 +77,52 @@ }, "pairtools/dedup": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "4d3743f71f43cc40505dee2bc0747dca2df5f69a", "installed_by": ["modules"] }, "pairtools/merge": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "4d3743f71f43cc40505dee2bc0747dca2df5f69a", "installed_by": ["modules"] }, "pairtools/parse": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "4d3743f71f43cc40505dee2bc0747dca2df5f69a", "installed_by": ["modules"] }, "pairtools/restrict": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "4d3743f71f43cc40505dee2bc0747dca2df5f69a", "installed_by": ["modules"] }, "pairtools/select": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "4d3743f71f43cc40505dee2bc0747dca2df5f69a", "installed_by": ["modules"] }, "pairtools/sort": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "4d3743f71f43cc40505dee2bc0747dca2df5f69a", "installed_by": ["modules"] }, "pairtools/stats": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "4d3743f71f43cc40505dee2bc0747dca2df5f69a", "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", - "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", "installed_by": ["modules"] }, "samtools/index": { "branch": "master", - "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", - "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", "installed_by": ["modules"] } } diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf index 8c405ee..809525a 100644 --- a/modules/nf-core/bowtie2/align/main.nf +++ b/modules/nf-core/bowtie2/align/main.nf @@ -1,6 +1,6 @@ process BOWTIE2_ALIGN { tag "$meta.id" - label "process_high" + label 'process_high' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -10,13 +10,18 @@ process BOWTIE2_ALIGN { input: tuple val(meta) , path(reads) tuple val(meta2), path(index) + tuple val(meta3), path(fasta) val save_unaligned val sort_bam output: - tuple val(meta), path("*.{bam,sam}"), emit: aligned + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram , optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + tuple val(meta), path("*.crai") , emit: crai , optional:true tuple val(meta), path("*.log") , emit: log - tuple val(meta), path("*fastq.gz") , emit: fastq, optional:true + tuple val(meta), path("*fastq.gz") , emit: fastq , optional:true path "versions.yml" , emit: versions when: @@ -39,7 +44,10 @@ process BOWTIE2_ALIGN { def samtools_command = sort_bam ? 'sort' : 'view' def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ - def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" """ INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` @@ -53,7 +61,7 @@ process BOWTIE2_ALIGN { $unaligned \\ $args \\ 2> >(tee ${prefix}.bowtie2.log >&2) \\ - | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.${extension} - + | samtools $samtools_command $args2 --threads $task.cpus ${reference} -o ${prefix}.${extension} - if [ -f ${prefix}.unmapped.fastq.1.gz ]; then mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz @@ -82,9 +90,19 @@ process BOWTIE2_ALIGN { } else { create_unmapped = save_unaligned ? "touch ${prefix}.unmapped_1.fastq.gz && touch ${prefix}.unmapped_2.fastq.gz" : "" } + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } """ touch ${prefix}.${extension} + ${create_index} touch ${prefix}.bowtie2.log ${create_unmapped} diff --git a/modules/nf-core/bowtie2/align/meta.yml b/modules/nf-core/bowtie2/align/meta.yml index e66811d..38610e0 100644 --- a/modules/nf-core/bowtie2/align/meta.yml +++ b/modules/nf-core/bowtie2/align/meta.yml @@ -36,6 +36,15 @@ input: type: file description: Bowtie2 genome index files pattern: "*.ebwt" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Bowtie2 genome fasta file + pattern: "*.fasta" - save_unaligned: type: boolean description: | @@ -46,22 +55,38 @@ input: description: use samtools sort (true) or samtools view (false) pattern: "true or false" output: - - aligned: + - sam: type: file - description: Output BAM/SAM file containing read alignments - pattern: "*.{bam,sam}" - - versions: + description: Output SAM file containing read alignments + pattern: "*.sam" + - bam: type: file - description: File containing software versions - pattern: "versions.yml" - - fastq: + description: Output BAM file containing read alignments + pattern: "*.bam" + - cram: type: file - description: Unaligned FastQ files - pattern: "*.fastq.gz" + description: Output CRAM file containing read alignments + pattern: "*.cram" + - csi: + type: file + description: Output SAM/BAM index for large inputs + pattern: "*.csi" + - crai: + type: file + description: Output CRAM index + pattern: "*.crai" - log: type: file description: Aligment log pattern: "*.log" + - fastq: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@joseespinosa" - "@drpatelh" diff --git a/modules/nf-core/bowtie2/align/tests/cram_crai.config b/modules/nf-core/bowtie2/align/tests/cram_crai.config new file mode 100644 index 0000000..03f1d5e --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/cram_crai.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_ALIGN { + ext.args2 = '--output-fmt cram --write-index' + } +} diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test b/modules/nf-core/bowtie2/align/tests/main.nf.test index a478d17..03aeaf9 100644 --- a/modules/nf-core/bowtie2/align/tests/main.nf.test +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test @@ -6,9 +6,10 @@ nextflow_process { tag "modules" tag "modules_nfcore" tag "bowtie2" + tag "bowtie2/build" tag "bowtie2/align" - test("sarscov2 - fastq, index, false, false - bam") { + test("sarscov2 - fastq, index, fasta, false, false - bam") { setup { run("BOWTIE2_BUILD") { @@ -32,8 +33,9 @@ nextflow_process { file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] input[1] = BOWTIE2_BUILD.out.index - input[2] = false //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -42,7 +44,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, process.out.log, process.out.fastq, process.out.versions @@ -52,7 +54,7 @@ nextflow_process { } - test("sarscov2 - fastq, index, false, false - sam") { + test("sarscov2 - fastq, index, fasta, false, false - sam") { config "./sam.config" setup { @@ -77,8 +79,9 @@ nextflow_process { file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] input[1] = BOWTIE2_BUILD.out.index - input[2] = false //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -87,7 +90,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).readLines()[0..4], + file(process.out.sam[0][1]).readLines()[0..4], process.out.log, process.out.fastq, process.out.versions @@ -97,7 +100,7 @@ nextflow_process { } - test("sarscov2 - fastq, index, false, false - sam2") { + test("sarscov2 - fastq, index, fasta, false, false - sam2") { config "./sam2.config" setup { @@ -122,8 +125,9 @@ nextflow_process { file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] input[1] = BOWTIE2_BUILD.out.index - input[2] = false //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -132,7 +136,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).readLines()[0..4], + file(process.out.sam[0][1]).readLines()[0..4], process.out.log, process.out.fastq, process.out.versions @@ -142,7 +146,7 @@ nextflow_process { } - test("sarscov2 - fastq, index, false, true - bam") { + test("sarscov2 - fastq, index, fasta, false, true - bam") { setup { run("BOWTIE2_BUILD") { @@ -166,8 +170,9 @@ nextflow_process { file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] input[1] = BOWTIE2_BUILD.out.index - input[2] = false //save_unaligned - input[3] = true //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -176,7 +181,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, process.out.log, process.out.fastq, process.out.versions @@ -186,7 +191,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, false, false - bam") { + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - bam") { setup { run("BOWTIE2_BUILD") { @@ -213,8 +218,9 @@ nextflow_process { ] ] input[1] = BOWTIE2_BUILD.out.index - input[2] = false //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -223,7 +229,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, process.out.log, process.out.fastq, process.out.versions @@ -233,7 +239,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, false, true - bam") { + test("sarscov2 - [fastq1, fastq2], index, fasta, false, true - bam") { setup { run("BOWTIE2_BUILD") { @@ -260,8 +266,9 @@ nextflow_process { ] ] input[1] = BOWTIE2_BUILD.out.index - input[2] = false //save_unaligned - input[3] = true //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -270,7 +277,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, process.out.log, process.out.fastq, process.out.versions @@ -280,7 +287,7 @@ nextflow_process { } - test("sarscov2 - fastq, large_index, false, false - bam") { + test("sarscov2 - fastq, large_index, fasta, false, false - bam") { config "./large_index.config" setup { @@ -305,8 +312,9 @@ nextflow_process { file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] input[1] = BOWTIE2_BUILD.out.index - input[2] = false //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -315,7 +323,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, process.out.log, process.out.fastq, process.out.versions @@ -325,7 +333,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], large_index, false, false - bam") { + test("sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam") { config "./large_index.config" setup { @@ -353,8 +361,9 @@ nextflow_process { ] ] input[1] = BOWTIE2_BUILD.out.index - input[2] = false //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -363,7 +372,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, process.out.log, process.out.fastq, process.out.versions @@ -373,7 +382,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, true, false - bam") { + test("sarscov2 - [fastq1, fastq2], index, fasta, true, false - bam") { setup { run("BOWTIE2_BUILD") { @@ -400,8 +409,9 @@ nextflow_process { ] ] input[1] = BOWTIE2_BUILD.out.index - input[2] = true //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -410,7 +420,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, process.out.log, process.out.fastq, process.out.versions @@ -420,7 +430,7 @@ nextflow_process { } - test("sarscov2 - fastq, index, true, false - bam") { + test("sarscov2 - fastq, index, fasta, true, false - bam") { setup { run("BOWTIE2_BUILD") { @@ -444,8 +454,9 @@ nextflow_process { file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] input[1] = BOWTIE2_BUILD.out.index - input[2] = true //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -454,7 +465,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, process.out.log, process.out.fastq, process.out.versions @@ -465,7 +476,54 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, false, false - stub") { + test("sarscov2 - [fastq1, fastq2], index, fasta, true, true - cram") { + + config "./cram_crai.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = true //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.cram[0][1]).name, + file(process.out.crai[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - stub") { options "-stub" setup { @@ -493,8 +551,9 @@ nextflow_process { ] ] input[1] = BOWTIE2_BUILD.out.index - input[2] = false //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -503,7 +562,8 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, file(process.out.log[0][1]).name, process.out.fastq, process.out.versions @@ -513,7 +573,7 @@ nextflow_process { } - test("sarscov2 - fastq, index, true, false - stub") { + test("sarscov2 - fastq, index, fasta, true, false - stub") { options "-stub" setup { @@ -538,8 +598,9 @@ nextflow_process { file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] input[1] = BOWTIE2_BUILD.out.index - input[2] = true //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -548,14 +609,15 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, file(process.out.log[0][1]).name, - file(process.out.fastq[0][1]).name, + process.out.fastq, process.out.versions ).match() } ) } } - + } diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test.snap b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap index 883dc7e..028e7da 100644 --- a/modules/nf-core/bowtie2/align/tests/main.nf.test.snap +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap @@ -1,34 +1,38 @@ { - "sarscov2 - fastq, index, false, false - sam2": { + "sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam": { "content": [ - [ - "ERR5069949.2151832\t16\tMT192765.1\t17453\t42\t150M\t*\t0\t0\tACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGA\tAAAA versions.yml "${task.process}": @@ -42,9 +53,19 @@ process BWA_MEM { """ stub: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + def extension = args2.contains("--output-fmt sam") ? "sam" : + args2.contains("--output-fmt cram") ? "cram": + sort_bam && args2.contains("-O cram")? "cram": + !sort_bam && args2.contains("-C") ? "cram": + "bam" """ - touch ${prefix}.bam + touch ${prefix}.${extension} + touch ${prefix}.csi + touch ${prefix}.crai cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bwa/mem/meta.yml b/modules/nf-core/bwa/mem/meta.yml index 440fb1f..b126dd8 100644 --- a/modules/nf-core/bwa/mem/meta.yml +++ b/modules/nf-core/bwa/mem/meta.yml @@ -14,7 +14,7 @@ tools: BWA is a software package for mapping DNA sequences against a large reference genome, such as the human genome. homepage: http://bio-bwa.sourceforge.net/ - documentation: http://www.htslib.org/doc/samtools.html + documentation: https://bio-bwa.sourceforge.net/bwa.shtml arxiv: arXiv:1303.3997 licence: ["GPL-3.0-or-later"] input: @@ -37,6 +37,10 @@ input: type: file description: BWA genome index files pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" + - fasta: + type: file + description: Reference genome in FASTA format + pattern: "*.{fasta,fa}" - sort_bam: type: boolean description: use samtools sort (true) or samtools view (false) @@ -46,6 +50,18 @@ output: type: file description: Output BAM file containing read alignments pattern: "*.{bam}" + - cram: + type: file + description: Output CRAM file containing read alignments + pattern: "*.{cram}" + - csi: + type: file + description: Optional index file for BAM file + pattern: "*.{csi}" + - crai: + type: file + description: Optional index file for CRAM file + pattern: "*.{crai}" - versions: type: file description: File containing software versions @@ -53,6 +69,8 @@ output: authors: - "@drpatelh" - "@jeremy1805" + - "@matthdsm" maintainers: - "@drpatelh" - "@jeremy1805" + - "@matthdsm" diff --git a/modules/nf-core/bwa/mem/tests/main.nf.test b/modules/nf-core/bwa/mem/tests/main.nf.test index b199bb7..463b76f 100644 --- a/modules/nf-core/bwa/mem/tests/main.nf.test +++ b/modules/nf-core/bwa/mem/tests/main.nf.test @@ -5,6 +5,7 @@ nextflow_process { tag "modules" tag "bwa" tag "bwa/mem" + tag "bwa/index" script "../main.nf" process "BWA_MEM" @@ -17,7 +18,7 @@ nextflow_process { """ input[0] = [ [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -30,11 +31,12 @@ nextflow_process { input[0] = [ [ id:'test', single_end:true ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] ] input[1] = BWA_INDEX.out.index - input[2] = false + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false """ } } @@ -42,7 +44,14 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.versions, + file(process.out.bam[0][1]).name + ).match() + } ) } @@ -57,7 +66,7 @@ nextflow_process { """ input[0] = [ [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -70,11 +79,12 @@ nextflow_process { input[0] = [ [ id:'test', single_end:true ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] ] input[1] = BWA_INDEX.out.index - input[2] = true + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = true """ } } @@ -82,7 +92,14 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.versions, + file(process.out.bam[0][1]).name + ).match() + } ) } @@ -97,7 +114,7 @@ nextflow_process { """ input[0] = [ [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -110,12 +127,13 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ] input[1] = BWA_INDEX.out.index - input[2] = false + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false """ } } @@ -123,7 +141,14 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.versions, + file(process.out.bam[0][1]).name + ).match() + } ) } @@ -138,7 +163,56 @@ nextflow_process { """ input[0] = [ [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.versions, + file(process.out.bam[0][1]).name + ).match() + } + ) + } + + } + + test("Paired-End - no fasta") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -151,12 +225,60 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[:],[]] + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.versions, + file(process.out.bam[0][1]).name + ).match() + } + ) + } + + } + + test("Single-end - stub") { + options "-stub" + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] ] input[1] = BWA_INDEX.out.index - input[2] = true + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false """ } } @@ -164,9 +286,56 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } ) } + } + + test("Paired-end - stub") { + options "-stub" + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } } } diff --git a/modules/nf-core/bwa/mem/tests/main.nf.test.snap b/modules/nf-core/bwa/mem/tests/main.nf.test.snap index bfb55fc..038ee7b 100644 --- a/modules/nf-core/bwa/mem/tests/main.nf.test.snap +++ b/modules/nf-core/bwa/mem/tests/main.nf.test.snap @@ -1,126 +1,140 @@ { "Single-End": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "test.bam:md5,df203d7c7e8fef351408a909570c7952" - ] - ], - "1": [ - "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" - ], - "bam": [ - [ - { - "id": "test", - "single_end": true - }, - "test.bam:md5,df203d7c7e8fef351408a909570c7952" - ] - ], - "versions": [ - "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" - ] - } + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ], + "test.bam" ], - "timestamp": "2023-12-04T11:01:22.483594641" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:44:32.953673185" }, "Single-End Sort": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "test.bam:md5,8a52bd78fdcecb994c1f63897d5b431c" - ] - ], - "1": [ - "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" - ], - "bam": [ - [ - { - "id": "test", - "single_end": true - }, - "test.bam:md5,8a52bd78fdcecb994c1f63897d5b431c" - ] - ], - "versions": [ - "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" - ] - } + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ], + "test.bam" ], - "timestamp": "2023-12-04T11:01:30.180783483" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:44:45.27066093" }, "Paired-End": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,9815aef9ec763a60c53c3879be2d73ae" - ] - ], - "1": [ - "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" - ], - "bam": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,9815aef9ec763a60c53c3879be2d73ae" - ] - ], - "versions": [ - "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" - ] - } + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ], + "test.bam" ], - "timestamp": "2023-12-04T11:01:38.761983007" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:44:57.706852274" }, "Paired-End Sort": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,0f0cda73704c4f7ba08af482edcbbe88" - ] - ], - "1": [ - "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" - ], - "bam": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,0f0cda73704c4f7ba08af482edcbbe88" - ] - ], - "versions": [ - "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" - ] - } + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ], + "test.bam" ], - "timestamp": "2023-12-04T11:01:46.284587802" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:45:10.376505036" + }, + "Single-end - stub": { + "content": [ + "test.bam", + "test.csi", + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:46:07.182072398" + }, + "Paired-End - no fasta": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ], + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:45:53.813076501" + }, + "Paired-end - stub": { + "content": [ + "test.bam", + "test.csi", + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:46:18.412916364" } } \ No newline at end of file diff --git a/modules/nf-core/custom/getchromsizes/environment.yml b/modules/nf-core/custom/getchromsizes/environment.yml index 2a01695..2ecd012 100644 --- a/modules/nf-core/custom/getchromsizes/environment.yml +++ b/modules/nf-core/custom/getchromsizes/environment.yml @@ -4,4 +4,5 @@ channels: - bioconda - defaults dependencies: - - bioconda::samtools=1.16.1 + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/custom/getchromsizes/main.nf b/modules/nf-core/custom/getchromsizes/main.nf index e8084ea..3edf7c2 100644 --- a/modules/nf-core/custom/getchromsizes/main.nf +++ b/modules/nf-core/custom/getchromsizes/main.nf @@ -4,8 +4,8 @@ process CUSTOM_GETCHROMSIZES { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: tuple val(meta), path(fasta) @@ -35,6 +35,9 @@ process CUSTOM_GETCHROMSIZES { """ touch ${fasta}.fai touch ${fasta}.sizes + if [[ "${fasta.extension}" == "gz" ]]; then + touch ${fasta}.gzi + fi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/custom/getchromsizes/tests/main.nf.test b/modules/nf-core/custom/getchromsizes/tests/main.nf.test index 9f6b564..2f741a4 100644 --- a/modules/nf-core/custom/getchromsizes/tests/main.nf.test +++ b/modules/nf-core/custom/getchromsizes/tests/main.nf.test @@ -12,9 +12,6 @@ nextflow_process { test("test_custom_getchromsizes") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -31,15 +28,11 @@ nextflow_process { { assert snapshot(process.out).match() } ) } - } test("test_custom_getchromsizes_bgzip") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -56,7 +49,51 @@ nextflow_process { { assert snapshot(process.out).match() } ) } + } + + test("test_custom_getchromsizes - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } } + test("test_custom_getchromsizes_bgzip - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } } diff --git a/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap b/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap index 2e560bd..c37b284 100644 --- a/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap +++ b/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap @@ -1,4 +1,69 @@ { + "test_custom_getchromsizes_bgzip - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,0d5a7c33bddcb1edad6bf0705b258e6f" + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gzi": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,0d5a7c33bddcb1edad6bf0705b258e6f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:38:36.927106" + }, "test_custom_getchromsizes": { "content": [ { @@ -22,7 +87,7 @@ ], "3": [ - "versions.yml:md5,45a83c5f3dddbc5dcab30035169f7ce8" + "versions.yml:md5,0d5a7c33bddcb1edad6bf0705b258e6f" ], "fai": [ [ @@ -44,11 +109,15 @@ ] ], "versions": [ - "versions.yml:md5,45a83c5f3dddbc5dcab30035169f7ce8" + "versions.yml:md5,0d5a7c33bddcb1edad6bf0705b258e6f" ] } ], - "timestamp": "2024-01-17T17:48:35.562918" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-20T13:22:34.14237" }, "test_custom_getchromsizes_bgzip": { "content": [ @@ -78,7 +147,7 @@ ] ], "3": [ - "versions.yml:md5,45a83c5f3dddbc5dcab30035169f7ce8" + "versions.yml:md5,0d5a7c33bddcb1edad6bf0705b258e6f" ], "fai": [ [ @@ -105,10 +174,69 @@ ] ], "versions": [ - "versions.yml:md5,45a83c5f3dddbc5dcab30035169f7ce8" + "versions.yml:md5,0d5a7c33bddcb1edad6bf0705b258e6f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-20T13:23:06.241379" + }, + "test_custom_getchromsizes - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,0d5a7c33bddcb1edad6bf0705b258e6f" + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gzi": [ + + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,0d5a7c33bddcb1edad6bf0705b258e6f" ] } ], - "timestamp": "2024-01-17T17:49:02.562311" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:24:05.697845" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index ca39fb6..2121492 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::multiqc=1.21 + - bioconda::multiqc=1.23 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 47ac352..459dfea 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,14 +3,16 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : - 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.23--pyhdfd78af_0' : + 'biocontainers/multiqc:1.23--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" path(multiqc_config) path(extra_multiqc_config) path(multiqc_logo) + path(replace_names) + path(sample_names) output: path "*multiqc_report.html", emit: report @@ -26,6 +28,8 @@ process MULTIQC { def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' + def replace = replace_names ? "--replace-names ${replace_names}" : '' + def samples = sample_names ? "--sample-names ${sample_names}" : '' """ multiqc \\ --force \\ @@ -33,6 +37,8 @@ process MULTIQC { $config \\ $extra_config \\ $logo \\ + $replace \\ + $samples \\ . cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index 45a9bc3..382c08c 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -29,6 +29,19 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" + - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" output: - report: type: file diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index f1c4242..6aa27f4 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -17,6 +17,8 @@ nextflow_process { input[1] = [] input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } @@ -41,6 +43,8 @@ nextflow_process { input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } @@ -66,6 +70,8 @@ nextflow_process { input[1] = [] input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index bfebd80..45e95e5 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,87904cd321df21fac35d18f0fc01bb19" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-29T08:48:55.657331" + "timestamp": "2024-07-10T12:41:34.562023" }, "multiqc_stub": { "content": [ @@ -17,25 +17,25 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,87904cd321df21fac35d18f0fc01bb19" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-29T08:49:49.071937" + "timestamp": "2024-07-10T11:27:11.933869532" }, "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,87904cd321df21fac35d18f0fc01bb19" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-29T08:49:25.457567" + "timestamp": "2024-07-10T11:26:56.709849369" } } \ No newline at end of file diff --git a/modules/nf-core/pairtools/dedup/main.nf b/modules/nf-core/pairtools/dedup/main.nf index 90d9f81..b5a3f75 100644 --- a/modules/nf-core/pairtools/dedup/main.nf +++ b/modules/nf-core/pairtools/dedup/main.nf @@ -32,7 +32,7 @@ process PAIRTOOLS_DEDUP { cat <<-END_VERSIONS > versions.yml "${task.process}": - pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + pairtools: \$(pairtools --version | tr '\\n' ',' | sed 's/.*pairtools.*version //' | sed 's/,\$/\\n/') END_VERSIONS """ } diff --git a/modules/nf-core/pairtools/dedup/meta.yml b/modules/nf-core/pairtools/dedup/meta.yml index 54f06ea..3c67e3f 100644 --- a/modules/nf-core/pairtools/dedup/meta.yml +++ b/modules/nf-core/pairtools/dedup/meta.yml @@ -2,6 +2,9 @@ name: pairtools_dedup description: Find and remove PCR/optical duplicates keywords: - dedup + - deduplication + - PCR/optical duplicates + - pairs tools: - pairtools: description: CLI tools to process mapped Hi-C data diff --git a/modules/nf-core/pairtools/merge/main.nf b/modules/nf-core/pairtools/merge/main.nf index ab468fa..4893347 100644 --- a/modules/nf-core/pairtools/merge/main.nf +++ b/modules/nf-core/pairtools/merge/main.nf @@ -31,7 +31,7 @@ process PAIRTOOLS_MERGE { cat <<-END_VERSIONS > versions.yml "${task.process}": - pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools, version //') + pairtools: \$(pairtools --version | tr '\\n' ',' | sed 's/.*pairtools.*version //' | sed 's/,\$/\\n/') END_VERSIONS """ } diff --git a/modules/nf-core/pairtools/parse/main.nf b/modules/nf-core/pairtools/parse/main.nf index 10fa42b..b939492 100644 --- a/modules/nf-core/pairtools/parse/main.nf +++ b/modules/nf-core/pairtools/parse/main.nf @@ -35,7 +35,7 @@ process PAIRTOOLS_PARSE { cat <<-END_VERSIONS > versions.yml "${task.process}": - pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + pairtools: \$(pairtools --version | tr '\\n' ',' | sed 's/.*pairtools.*version //' | sed 's/,\$/\\n/') END_VERSIONS """ } diff --git a/modules/nf-core/pairtools/parse/meta.yml b/modules/nf-core/pairtools/parse/meta.yml index 396fe79..e7e448e 100644 --- a/modules/nf-core/pairtools/parse/meta.yml +++ b/modules/nf-core/pairtools/parse/meta.yml @@ -1,7 +1,9 @@ name: pairtools_parse description: Find ligation junctions in .sam, make .pairs keywords: + - ligation junctions - parse + - pairtools tools: - pairtools: description: CLI tools to process mapped Hi-C data diff --git a/modules/nf-core/pairtools/restrict/main.nf b/modules/nf-core/pairtools/restrict/main.nf index 4982274..dd57ceb 100644 --- a/modules/nf-core/pairtools/restrict/main.nf +++ b/modules/nf-core/pairtools/restrict/main.nf @@ -33,7 +33,7 @@ process PAIRTOOLS_RESTRICT { cat <<-END_VERSIONS > versions.yml "${task.process}": - pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + pairtools: \$(pairtools --version | tr '\\n' ',' | sed 's/.*pairtools.*version //' | sed 's/,\$/\\n/') END_VERSIONS """ } diff --git a/modules/nf-core/pairtools/restrict/meta.yml b/modules/nf-core/pairtools/restrict/meta.yml index 8773353..12f6b98 100644 --- a/modules/nf-core/pairtools/restrict/meta.yml +++ b/modules/nf-core/pairtools/restrict/meta.yml @@ -1,7 +1,9 @@ name: pairtools_restrict description: Assign restriction fragments to pairs keywords: - - sort + - pairs + - pairstools + - restriction fragments tools: - pairtools: description: CLI tools to process mapped Hi-C data diff --git a/modules/nf-core/pairtools/select/main.nf b/modules/nf-core/pairtools/select/main.nf index e8cd8a5..69e8d47 100644 --- a/modules/nf-core/pairtools/select/main.nf +++ b/modules/nf-core/pairtools/select/main.nf @@ -32,7 +32,7 @@ process PAIRTOOLS_SELECT { cat <<-END_VERSIONS > versions.yml "${task.process}": - pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + pairtools: \$(pairtools --version | tr '\\n' ',' | sed 's/.*pairtools.*version //' | sed 's/,\$/\\n/') END_VERSIONS """ } diff --git a/modules/nf-core/pairtools/select/meta.yml b/modules/nf-core/pairtools/select/meta.yml index 47f4ce4..e9aef9a 100644 --- a/modules/nf-core/pairtools/select/meta.yml +++ b/modules/nf-core/pairtools/select/meta.yml @@ -2,6 +2,8 @@ name: pairtools_select description: Select pairs according to given condition by options.args keywords: - select + - pairs + - filter tools: - pairtools: description: CLI tools to process mapped Hi-C data diff --git a/modules/nf-core/pairtools/sort/main.nf b/modules/nf-core/pairtools/sort/main.nf index 674e120..653a8d3 100644 --- a/modules/nf-core/pairtools/sort/main.nf +++ b/modules/nf-core/pairtools/sort/main.nf @@ -34,7 +34,7 @@ process PAIRTOOLS_SORT { cat <<-END_VERSIONS > versions.yml "${task.process}": - pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + pairtools: \$(pairtools --version | tr '\\n' ',' | sed 's/.*pairtools.*version //' | sed 's/,\$/\\n/') END_VERSIONS """ } diff --git a/modules/nf-core/pairtools/stats/main.nf b/modules/nf-core/pairtools/stats/main.nf index 66e48c6..b07d061 100644 --- a/modules/nf-core/pairtools/stats/main.nf +++ b/modules/nf-core/pairtools/stats/main.nf @@ -31,7 +31,7 @@ process PAIRTOOLS_STATS { cat <<-END_VERSIONS > versions.yml "${task.process}": - pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools, version //') + pairtools: \$(pairtools --version | tr '\\n' ',' | sed 's/.*pairtools.*version //' | sed 's/,\$/\\n/') END_VERSIONS """ } diff --git a/modules/nf-core/samtools/flagstat/environment.yml b/modules/nf-core/samtools/flagstat/environment.yml index 5efae05..68b8155 100644 --- a/modules/nf-core/samtools/flagstat/environment.yml +++ b/modules/nf-core/samtools/flagstat/environment.yml @@ -4,4 +4,5 @@ channels: - bioconda - defaults dependencies: - - bioconda::samtools=1.18 + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf index f1893d7..754d84b 100644 --- a/modules/nf-core/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_FLAGSTAT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : - 'biocontainers/samtools:1.18--h50ea8bc_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test b/modules/nf-core/samtools/flagstat/tests/main.nf.test index c8dd8dc..24c3c04 100644 --- a/modules/nf-core/samtools/flagstat/tests/main.nf.test +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test @@ -16,11 +16,11 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) - ] + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) """ } } @@ -28,8 +28,8 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out.flagstat).match() }, - { assert path(process.out.versions.get(0)).getText().contains("samtools") } + { assert snapshot(process.out.flagstat).match("flagstat") }, + { assert snapshot(process.out.versions).match("versions") } ) } } diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap index 880019f..e9f85ef 100644 --- a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "BAM": { + "flagstat": { "content": [ [ [ @@ -11,6 +11,22 @@ ] ] ], - "timestamp": "2023-11-14T15:49:22.577133" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:31:37.783927" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,f606681ef971cbb548a4d9e3fbabdbc2" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:41:52.516253882" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml index 296ed99..260d516 100644 --- a/modules/nf-core/samtools/index/environment.yml +++ b/modules/nf-core/samtools/index/environment.yml @@ -4,4 +4,5 @@ channels: - bioconda - defaults dependencies: - - bioconda::samtools=1.18 + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 8ad18fd..b523c21 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_INDEX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : - 'biocontainers/samtools:1.18--h50ea8bc_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test index c76a916..bb7756d 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -8,7 +8,7 @@ nextflow_process { tag "samtools" tag "samtools/index" - test("sarscov2 [BAI]") { + test("bai") { when { params { @@ -16,10 +16,10 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) """ } } @@ -28,12 +28,12 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot(process.out.bai).match("bai") }, - { assert path(process.out.versions.get(0)).getText().contains("samtools") } + { assert snapshot(process.out.versions).match("bai_versions") } ) } } - test("homo_sapiens [CRAI]") { + test("crai") { when { params { @@ -41,10 +41,10 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) """ } } @@ -53,12 +53,12 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot(process.out.crai).match("crai") }, - { assert path(process.out.versions.get(0)).getText().contains("samtools") } + { assert snapshot(process.out.versions).match("crai_versions") } ) } } - test("homo_sapiens [CSI]") { + test("csi") { config "./csi.nextflow.config" @@ -68,10 +68,10 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) """ } } @@ -80,7 +80,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert path(process.out.csi.get(0).get(1)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("samtools") } + { assert snapshot(process.out.versions).match("csi_versions") } ) } } diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap index b3baee7..52756e8 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -1,28 +1,74 @@ { + "crai_versions": { + "content": [ + [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:04.203740976" + }, + "csi_versions": { + "content": [ + [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:09.57475878" + }, "crai": { "content": [ [ [ { - "id": "test" + "id": "test", + "single_end": false }, "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" ] ] ], - "timestamp": "2023-11-15T15:17:37.30801" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:41:38.446424" }, "bai": { "content": [ [ [ { - "id": "test" + "id": "test", + "single_end": false }, "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" ] ] ], - "timestamp": "2023-11-15T15:17:30.869234" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:40:46.579747" + }, + "bai_versions": { + "content": [ + [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:41:57.929287369" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml index cd50868..36a12ea 100644 --- a/modules/nf-core/samtools/sort/environment.yml +++ b/modules/nf-core/samtools/sort/environment.yml @@ -4,4 +4,5 @@ channels: - bioconda - defaults dependencies: - - bioconda::samtools=1.18 + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index 4a666d4..596c6f7 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -4,15 +4,18 @@ process SAMTOOLS_SORT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : - 'biocontainers/samtools:1.18--h50ea8bc_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: - tuple val(meta), path(bam) + tuple val(meta) , path(bam) + tuple val(meta2), path(fasta) output: - tuple val(meta), path("*.bam"), emit: bam - tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true path "versions.yml" , emit: versions when: @@ -21,14 +24,24 @@ process SAMTOOLS_SORT { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + def reference = fasta ? "--reference ${fasta}" : "" if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools cat \\ + --threads $task.cpus \\ + ${bam} \\ + | \\ samtools sort \\ $args \\ - -@ $task.cpus \\ - -o ${prefix}.bam \\ - -T $prefix \\ - $bam + -T ${prefix} \\ + --threads $task.cpus \\ + ${reference} \\ + -o ${prefix}.${extension} \\ + - cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -40,6 +53,7 @@ process SAMTOOLS_SORT { def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.bam + touch ${prefix}.bam.csi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml index 2200de7..341a7d0 100644 --- a/modules/nf-core/samtools/sort/meta.yml +++ b/modules/nf-core/samtools/sort/meta.yml @@ -23,8 +23,18 @@ input: e.g. [ id:'test', single_end:false ] - bam: type: file - description: BAM/CRAM/SAM file + description: BAM/CRAM/SAM file(s) pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta,fna}" + optional: true output: - meta: type: map @@ -33,19 +43,29 @@ output: e.g. [ id:'test', single_end:false ] - bam: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - versions: + description: Sorted BAM file + pattern: "*.{bam}" + - cram: type: file - description: File containing software versions - pattern: "versions.yml" + description: Sorted CRAM file + pattern: "*.{cram}" + - crai: + type: file + description: CRAM index file (optional) + pattern: "*.crai" - csi: type: file description: BAM index file (optional) pattern: "*.csi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@ewels" + - "@matthdsm" maintainers: - "@drpatelh" - "@ewels" + - "@matthdsm" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test index abb8097..fb38ed9 100644 --- a/modules/nf-core/samtools/sort/tests/main.nf.test +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -8,22 +8,21 @@ nextflow_process { tag "samtools" tag "samtools/sort" - test("test_samtools_sort") { + test("bam") { config "./nextflow.config" when { - params { - outdir = "$outputDir" - } process { """ - input[0] = [ - [ id:'test', single_end:false ], - [ - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) - ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) """ } } @@ -31,16 +30,50 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match("test_bam") + } ) } + } + + test("cram") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match("test_cram") + } + ) + } } - test("test_samtools_sort_stub") { + test("bam_stub") { config "./nextflow.config" - options "-stub-run" + options "-stub" when { params { @@ -48,12 +81,14 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test', single_end:false ], - [ - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) - ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) """ } } @@ -61,13 +96,9 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot( - file(process.out.bam[0][1]).name, - process.out.versions - ).match() } + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, + { assert snapshot(process.out.versions).match("bam_stub_versions") } ) } - } - } diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap index ff72225..5a27de1 100644 --- a/modules/nf-core/samtools/sort/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -1,48 +1,114 @@ { - "test_samtools_sort": { + "cram": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam:md5,ea6a0fef94eb534e901f107a05a33a06" - ] - ], - "1": [ - - ], - "2": [ - "versions.yml:md5,33b6a403dc19a0d28e4219ccab0a1d80" - ], - "bam": [ - [ - { - "id": "test", - "single_end": false - }, - "test.sorted.bam:md5,ea6a0fef94eb534e901f107a05a33a06" - ] - ], - "csi": [ - - ], - "versions": [ - "versions.yml:md5,33b6a403dc19a0d28e4219ccab0a1d80" + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,21c992d59615936b99f2ad008aa54400" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-31T08:13:54.512837189" + }, + "bam_stub_bam": { + "content": [ + "test.sorted.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-31T07:29:00.761845507" + }, + "test_cram": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,22b2093be34a7637f5fbc84272b89d06" ] - } + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi" + ] + ] ], - "timestamp": "2023-12-04T11:11:22.005628301" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-31T09:16:51.924951855" }, - "test_samtools_sort_stub": { + "test_bam": { "content": [ - "test.sorted.bam", [ - "versions.yml:md5,33b6a403dc19a0d28e4219ccab0a1d80" + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,21c992d59615936b99f2ad008aa54400" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-31T08:28:12.15952312" + }, + "bam_stub_versions": { + "content": [ + [ + "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-31T07:29:00.765038811" + }, + "bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,21c992d59615936b99f2ad008aa54400" + ] ] ], - "timestamp": "2023-12-04T17:47:22.314445935" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-05-31T08:13:48.538030517" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/tests/nextflow.config b/modules/nf-core/samtools/sort/tests/nextflow.config index d0f3508..f642771 100644 --- a/modules/nf-core/samtools/sort/tests/nextflow.config +++ b/modules/nf-core/samtools/sort/tests/nextflow.config @@ -1,7 +1,8 @@ process { withName: SAMTOOLS_SORT { - ext.prefix = { "${meta.id}.sorted" } + ext.prefix = { "${meta.id}.sorted" } + ext.args = "--write-index" } } From c7d999ce07174734b5a20c8a0de5db7653ed41ec Mon Sep 17 00:00:00 2001 From: Nicolas Servant Date: Thu, 18 Jul 2024 11:39:10 +0200 Subject: [PATCH 02/17] [MODIF] versions --- modules/local/hicpro/dnase_mapping_stats.nf | 6 ++++++ subworkflows/local/hicpro_mapping.nf | 1 + subworkflows/local/pairtools.nf | 13 +++++++++++++ .../local/utils_nfcore_hic_pipeline/main.nf | 14 +++++++++++--- 4 files changed, 31 insertions(+), 3 deletions(-) diff --git a/modules/local/hicpro/dnase_mapping_stats.nf b/modules/local/hicpro/dnase_mapping_stats.nf index 8e85113..3b47154 100644 --- a/modules/local/hicpro/dnase_mapping_stats.nf +++ b/modules/local/hicpro/dnase_mapping_stats.nf @@ -14,6 +14,7 @@ process MAPPING_STATS_DNASE { output: tuple val(meta), path(bam), emit:bam tuple val(meta), path("${prefix}.mapstat"), emit:stats + path("versions.yml"), emit: versions script: prefix = meta.id + "_" + meta.chunk + "_" + meta.mates @@ -27,5 +28,10 @@ process MAPPING_STATS_DNASE { echo -n "global_${tag}\t" >> ${prefix}.mapstat samtools view -c -F 4 ${bam} >> ${prefix}.mapstat echo -n "local_${tag}\t0" >> ${prefix}.mapstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS """ } diff --git a/subworkflows/local/hicpro_mapping.nf b/subworkflows/local/hicpro_mapping.nf index 15166e9..633707f 100644 --- a/subworkflows/local/hicpro_mapping.nf +++ b/subworkflows/local/hicpro_mapping.nf @@ -85,6 +85,7 @@ workflow HICPRO_MAPPING { MAPPING_STATS_DNASE( BOWTIE2_ALIGN.out.aligned ) + ch_versions = ch_versions.mix(MAPPING_STATS_DNASE.out.versions) ch_mapping_stats = MAPPING_STATS_DNASE.out.stats ch_bams = BOWTIE2_ALIGN.out.aligned diff --git a/subworkflows/local/pairtools.nf b/subworkflows/local/pairtools.nf index 7eec885..5df52d9 100644 --- a/subworkflows/local/pairtools.nf +++ b/subworkflows/local/pairtools.nf @@ -39,21 +39,25 @@ workflow PAIRTOOLS { index.collect(), Channel.value([]) ) + ch_versions = ch_versions.mix(BWA_MEM.out.versions) PAIRTOOLS_PARSE( BWA_MEM.out.bam, chrsize.collect() ) + ch_versions = ch_versions.mix(PAIRTOOLS_PARSE.out.versions) PAIRTOOLS_RESTRICT( PAIRTOOLS_PARSE.out.pairsam, frag.map{it->it[1]}.collect() ) + ch_versions = ch_versions.mix(PAIRTOOLS_RESTRICT.out.versions) ch_pairsam = params.dnase ? PAIRTOOLS_PARSE.out.pairsam : PAIRTOOLS_RESTRICT.out.restrict PAIRTOOLS_SORT( ch_pairsam ) + ch_versions = ch_versions.mix(PAIRTOOLS_SORT.out.versions) ch_valid_pairs = PAIRTOOLS_SORT.out.sorted .map{ meta, pairs -> @@ -70,41 +74,50 @@ workflow PAIRTOOLS { PAIRTOOLS_MERGE( ch_valid_pairs.multiple ) + ch_versions = ch_versions.mix(PAIRTOOLS_MERGE.out.versions) // Separate pairs/bam files PAIRTOOLS_SPLIT( PAIRTOOLS_MERGE.out.pairs.mix(ch_valid_pairs.single) ) + ch_versions = ch_versions.mix(PAIRTOOLS_SPLIT.out.versions) // Manage BAM files SAMTOOLS_SORT( PAIRTOOLS_SPLIT.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) SAMTOOLS_INDEX( SAMTOOLS_SORT.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) SAMTOOLS_FLAGSTAT( SAMTOOLS_SORT.out.bam.join(SAMTOOLS_INDEX.out.bai) ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) PAIRTOOLS_DEDUP( PAIRTOOLS_SPLIT.out.pairs ) + ch_versions = ch_versions.mix(PAIRTOOLS_DEDUP.out.versions) ch_pairselect = params.keep_dups ? PAIRTOOLS_SPLIT.out.pairs : PAIRTOOLS_DEDUP.out.pairs PAIRTOOLS_SELECT( ch_pairselect ) + ch_versions = ch_versions.mix(PAIRTOOLS_SELECT.out.versions) PAIRTOOLS_STATS( PAIRTOOLS_SELECT.out.selected ) + ch_versions = ch_versions.mix(PAIRTOOLS_STATS.out.versions) PAIRIX( PAIRTOOLS_SELECT.out.selected ) + ch_versions = ch_versions.mix(PAIRIX.out.versions) emit: versions = ch_versions diff --git a/subworkflows/local/utils_nfcore_hic_pipeline/main.nf b/subworkflows/local/utils_nfcore_hic_pipeline/main.nf index 978f66b..94f1c28 100644 --- a/subworkflows/local/utils_nfcore_hic_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_hic_pipeline/main.nf @@ -212,12 +212,16 @@ def genomeExistsError() { // Generate methods description for MultiQC // def toolCitationText() { - // TODO nf-core: Optionally add in-text citation tools to this list. // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ "Tools used in the workflow included:", "FastQC (Andrews 2010),", + "Bowtie2 (Langmead 2012),", + "BWA-MEM (Li 2013),", + "HiC-Pro (Servant 2015),", + "Pairtools (Open2C 2023),", + "Cooltools (Open2C 2024),", "MultiQC (Ewels et al. 2016)", "." ].join(' ').trim() @@ -226,12 +230,16 @@ def toolCitationText() { } def toolBibliographyText() { - // TODO nf-core: Optionally add bibliographic entries to this list. // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def reference_text = [ "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + "
  • Langmead, B., Salzberg, S. (2012) Fast gapped-read alignment with Bowtie 2. Nat Methods 9, 357–359. https://doi.org/10.1038/nmeth.1923
  • ", + "
  • Li, H. (2013) Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. arXiv:1303.3997v2
  • ", + "
  • Servant, N., Varoquaux, N., Lajoie, B.R., Viara, E., Chen, CJ., Vert, JP., Heard E., Dekker J., Barillot, E. (2015) HiC-Pro: an optimized and flexible pipeline for Hi-C data processing. Genome Biol 16, 259. https://doi.org/10.1186/s13059-015-0831-x
  • ", + "
  • Open2C, Abdennur, N., Fudenberg, G., Flyamer, IM., Galitsyna, AA., Goloborodko, A., Imakaev, M., Venev, SV. (2023). Pairtools: from sequencing data to chromosome contacts. PloS Comput Biol. 20(5):e1012164. doi: 10.1371/journal.pcbi.1012164
  • ", + "
  • Open2C, Abdennur, N., Abraham, S., Fudenberg, G., Flyamer, IM., Galitsyna, AA., Goloborodko, A., Imakaev, M., Oksuz, BA., & Venev, SV. (2024). Cooltools: Enabling High-Resolution Hi-C Analysis in Python. PLoS Comput Biol. 6;20(5):e1012067. doi: 10.1371/journal.pcbi.1012067
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " ].join(' ').trim() return reference_text From 65f6ee1de3c71a4dac17081f17b63113909fb16e Mon Sep 17 00:00:00 2001 From: Nicolas Servant Date: Thu, 18 Jul 2024 11:39:31 +0200 Subject: [PATCH 03/17] [MODIF] add CALDER2 compartment caller --- CHANGELOG.md | 2 ++ nextflow.config | 1 + nextflow_schema.json | 5 +++++ subworkflows/local/compartments.nf | 28 ++++++++++++++++++++-------- 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e3c8302..9544318 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` + Add CALDER2 compartments calling (see '--compartments_caller' option) + - Add new '--balancing_opts' to update `cooler balance` arguments - New subworkflow based on `pairtools` to detect valid pairs. The user diff --git a/nextflow.config b/nextflow.config index 66dbaf2..8e105f1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -90,6 +90,7 @@ params { res_dist_decay = '250000' tads_caller = 'insulation' res_tads = '40000' + compartments_caller = 'cooltools' res_compartments = '250000' // Workflow diff --git a/nextflow_schema.json b/nextflow_schema.json index d4f08cf..4e20ba7 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -314,6 +314,11 @@ "default": "insulation", "description": "Define methods for TADs calling" }, + "compartments_caller": { + "type": "string", + "default": "cooltools", + "description": "Define methods for compartments calling" + }, "res_tads": { "type": "string", "pattern": "^(\\d+)(,\\d+)*$", diff --git a/subworkflows/local/compartments.nf b/subworkflows/local/compartments.nf index 7497e36..0ab2298 100644 --- a/subworkflows/local/compartments.nf +++ b/subworkflows/local/compartments.nf @@ -1,5 +1,5 @@ include { COOLTOOLS_EIGSCIS } from '../../modules/local/cooltools/eigscis' - +include { CALDER2 } from '../../modules/nfcore/calder2/main' workflow COMPARTMENTS { take: @@ -10,14 +10,26 @@ workflow COMPARTMENTS { main: ch_versions = Channel.empty() - COOLTOOLS_EIGSCIS( - cool, - fasta.map{it -> it[1]}.collect(), - chrsize.map{it -> it[1]}.collect() - ) - ch_versions = ch_versions.mix(COOLTOOLS_EIGSCIS.out.versions) + if (params.compartments_caller =~ 'cooltools'){ + COOLTOOLS_EIGSCIS( + cool, + fasta.map{it -> it[1]}.collect(), + chrsize.map{it -> it[1]}.collect() + ) + ch_versions = ch_versions.mix(COOLTOOLS_EIGSCIS.out.versions) + ch_comp = COOLTOOLS_EIGSCIS.out.results + } + + if (params.compartments_caller =~ 'calder2'){ + CALDER2( + cool, + Channel.value([]) + ) + ch_versions = ch_versions.mix(CALDER2.out.versions + ch_comp = CALDER2.out.output_folder + } emit: versions = ch_versions - compartments = COOLTOOLS_EIGSCIS.out.results + compartments = ch_comp } From 0e19e4f31c0953ce064cfd685ad116fead4f3fba Mon Sep 17 00:00:00 2001 From: Nicolas Servant Date: Thu, 18 Jul 2024 11:54:40 +0200 Subject: [PATCH 04/17] [MODIF] lint --- modules/local/hicpro/dnase_mapping_stats.nf | 2 +- subworkflows/local/compartments.nf | 2 +- .../local/utils_nfcore_hic_pipeline/main.nf | 22 +++++++++---------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/modules/local/hicpro/dnase_mapping_stats.nf b/modules/local/hicpro/dnase_mapping_stats.nf index 3b47154..7b5abb3 100644 --- a/modules/local/hicpro/dnase_mapping_stats.nf +++ b/modules/local/hicpro/dnase_mapping_stats.nf @@ -28,7 +28,7 @@ process MAPPING_STATS_DNASE { echo -n "global_${tag}\t" >> ${prefix}.mapstat samtools view -c -F 4 ${bam} >> ${prefix}.mapstat echo -n "local_${tag}\t0" >> ${prefix}.mapstat - + cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') diff --git a/subworkflows/local/compartments.nf b/subworkflows/local/compartments.nf index 0ab2298..3dcd53e 100644 --- a/subworkflows/local/compartments.nf +++ b/subworkflows/local/compartments.nf @@ -17,7 +17,7 @@ workflow COMPARTMENTS { chrsize.map{it -> it[1]}.collect() ) ch_versions = ch_versions.mix(COOLTOOLS_EIGSCIS.out.versions) - ch_comp = COOLTOOLS_EIGSCIS.out.results + ch_comp = COOLTOOLS_EIGSCIS.out.results } if (params.compartments_caller =~ 'calder2'){ diff --git a/subworkflows/local/utils_nfcore_hic_pipeline/main.nf b/subworkflows/local/utils_nfcore_hic_pipeline/main.nf index 94f1c28..2ba617d 100644 --- a/subworkflows/local/utils_nfcore_hic_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_hic_pipeline/main.nf @@ -217,11 +217,11 @@ def toolCitationText() { def citation_text = [ "Tools used in the workflow included:", "FastQC (Andrews 2010),", - "Bowtie2 (Langmead 2012),", - "BWA-MEM (Li 2013),", - "HiC-Pro (Servant 2015),", - "Pairtools (Open2C 2023),", - "Cooltools (Open2C 2024),", + "Bowtie2 (Langmead 2012),", + "BWA-MEM (Li 2013),", + "HiC-Pro (Servant 2015),", + "Pairtools (Open2C 2023),", + "Cooltools (Open2C 2024),", "MultiQC (Ewels et al. 2016)", "." ].join(' ').trim() @@ -234,12 +234,12 @@ def toolBibliographyText() { // Uncomment function in methodsDescriptionText to render in MultiQC report def reference_text = [ "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Langmead, B., Salzberg, S. (2012) Fast gapped-read alignment with Bowtie 2. Nat Methods 9, 357–359. https://doi.org/10.1038/nmeth.1923
  • ", - "
  • Li, H. (2013) Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. arXiv:1303.3997v2
  • ", - "
  • Servant, N., Varoquaux, N., Lajoie, B.R., Viara, E., Chen, CJ., Vert, JP., Heard E., Dekker J., Barillot, E. (2015) HiC-Pro: an optimized and flexible pipeline for Hi-C data processing. Genome Biol 16, 259. https://doi.org/10.1186/s13059-015-0831-x
  • ", - "
  • Open2C, Abdennur, N., Fudenberg, G., Flyamer, IM., Galitsyna, AA., Goloborodko, A., Imakaev, M., Venev, SV. (2023). Pairtools: from sequencing data to chromosome contacts. PloS Comput Biol. 20(5):e1012164. doi: 10.1371/journal.pcbi.1012164
  • ", - "
  • Open2C, Abdennur, N., Abraham, S., Fudenberg, G., Flyamer, IM., Galitsyna, AA., Goloborodko, A., Imakaev, M., Oksuz, BA., & Venev, SV. (2024). Cooltools: Enabling High-Resolution Hi-C Analysis in Python. PLoS Comput Biol. 6;20(5):e1012067. doi: 10.1371/journal.pcbi.1012067
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + "
  • Langmead, B., Salzberg, S. (2012) Fast gapped-read alignment with Bowtie 2. Nat Methods 9, 357–359. https://doi.org/10.1038/nmeth.1923
  • ", + "
  • Li, H. (2013) Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. arXiv:1303.3997v2
  • ", + "
  • Servant, N., Varoquaux, N., Lajoie, B.R., Viara, E., Chen, CJ., Vert, JP., Heard E., Dekker J., Barillot, E. (2015) HiC-Pro: an optimized and flexible pipeline for Hi-C data processing. Genome Biol 16, 259. https://doi.org/10.1186/s13059-015-0831-x
  • ", + "
  • Open2C, Abdennur, N., Fudenberg, G., Flyamer, IM., Galitsyna, AA., Goloborodko, A., Imakaev, M., Venev, SV. (2023). Pairtools: from sequencing data to chromosome contacts. PloS Comput Biol. 20(5):e1012164. doi: 10.1371/journal.pcbi.1012164
  • ", + "
  • Open2C, Abdennur, N., Abraham, S., Fudenberg, G., Flyamer, IM., Galitsyna, AA., Goloborodko, A., Imakaev, M., Oksuz, BA., & Venev, SV. (2024). Cooltools: Enabling High-Resolution Hi-C Analysis in Python. PLoS Comput Biol. 6;20(5):e1012067. doi: 10.1371/journal.pcbi.1012067
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " ].join(' ').trim() return reference_text From 9b3194c55f06e8ca8097e9f69024e25dfcf09331 Mon Sep 17 00:00:00 2001 From: Nicolas Servant Date: Thu, 18 Jul 2024 11:59:44 +0200 Subject: [PATCH 05/17] [MODIF] lint --- subworkflows/local/compartments.nf | 5 +++-- subworkflows/local/hicpro_mapping.nf | 2 +- subworkflows/local/utils_nfcore_hic_pipeline/main.nf | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/compartments.nf b/subworkflows/local/compartments.nf index 3dcd53e..0d45f61 100644 --- a/subworkflows/local/compartments.nf +++ b/subworkflows/local/compartments.nf @@ -1,5 +1,6 @@ include { COOLTOOLS_EIGSCIS } from '../../modules/local/cooltools/eigscis' include { CALDER2 } from '../../modules/nfcore/calder2/main' + workflow COMPARTMENTS { take: @@ -17,7 +18,7 @@ workflow COMPARTMENTS { chrsize.map{it -> it[1]}.collect() ) ch_versions = ch_versions.mix(COOLTOOLS_EIGSCIS.out.versions) - ch_comp = COOLTOOLS_EIGSCIS.out.results + ch_comp = COOLTOOLS_EIGSCIS.out.results } if (params.compartments_caller =~ 'calder2'){ @@ -25,7 +26,7 @@ workflow COMPARTMENTS { cool, Channel.value([]) ) - ch_versions = ch_versions.mix(CALDER2.out.versions + ch_versions = ch_versions.mix(CALDER2.out.versions) ch_comp = CALDER2.out.output_folder } diff --git a/subworkflows/local/hicpro_mapping.nf b/subworkflows/local/hicpro_mapping.nf index 633707f..8967e3d 100644 --- a/subworkflows/local/hicpro_mapping.nf +++ b/subworkflows/local/hicpro_mapping.nf @@ -85,7 +85,7 @@ workflow HICPRO_MAPPING { MAPPING_STATS_DNASE( BOWTIE2_ALIGN.out.aligned ) - ch_versions = ch_versions.mix(MAPPING_STATS_DNASE.out.versions) + ch_versions = ch_versions.mix(MAPPING_STATS_DNASE.out.versions) ch_mapping_stats = MAPPING_STATS_DNASE.out.stats ch_bams = BOWTIE2_ALIGN.out.aligned diff --git a/subworkflows/local/utils_nfcore_hic_pipeline/main.nf b/subworkflows/local/utils_nfcore_hic_pipeline/main.nf index 2ba617d..fd92f40 100644 --- a/subworkflows/local/utils_nfcore_hic_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_hic_pipeline/main.nf @@ -239,7 +239,7 @@ def toolBibliographyText() { "
  • Servant, N., Varoquaux, N., Lajoie, B.R., Viara, E., Chen, CJ., Vert, JP., Heard E., Dekker J., Barillot, E. (2015) HiC-Pro: an optimized and flexible pipeline for Hi-C data processing. Genome Biol 16, 259. https://doi.org/10.1186/s13059-015-0831-x
  • ", "
  • Open2C, Abdennur, N., Fudenberg, G., Flyamer, IM., Galitsyna, AA., Goloborodko, A., Imakaev, M., Venev, SV. (2023). Pairtools: from sequencing data to chromosome contacts. PloS Comput Biol. 20(5):e1012164. doi: 10.1371/journal.pcbi.1012164
  • ", "
  • Open2C, Abdennur, N., Abraham, S., Fudenberg, G., Flyamer, IM., Galitsyna, AA., Goloborodko, A., Imakaev, M., Oksuz, BA., & Venev, SV. (2024). Cooltools: Enabling High-Resolution Hi-C Analysis in Python. PLoS Comput Biol. 6;20(5):e1012067. doi: 10.1371/journal.pcbi.1012067
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " ].join(' ').trim() return reference_text From 7ae946610907b723a4b1c10aae9dc090fd9925c1 Mon Sep 17 00:00:00 2001 From: Nicolas Servant Date: Thu, 18 Jul 2024 14:37:10 +0200 Subject: [PATCH 06/17] [MODIF] test --- modules/local/hicexplorer/hicFindTADs.nf | 1 + nextflow.config | 2 ++ subworkflows/local/compartments.nf | 4 ++-- subworkflows/local/hicpro.nf | 2 ++ subworkflows/local/hicpro_mapping.nf | 7 +++++-- subworkflows/local/pairtools.nf | 5 ++++- workflows/hic.nf | 10 ++++++++-- 7 files changed, 24 insertions(+), 7 deletions(-) diff --git a/modules/local/hicexplorer/hicFindTADs.nf b/modules/local/hicexplorer/hicFindTADs.nf index d86dc83..a241039 100644 --- a/modules/local/hicexplorer/hicFindTADs.nf +++ b/modules/local/hicexplorer/hicFindTADs.nf @@ -4,6 +4,7 @@ process HIC_FIND_TADS { label 'process_medium' + tag "meta.id" conda "bioconda::hicexplorer=3.7.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/nextflow.config b/nextflow.config index 8e105f1..11acba2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -21,6 +21,8 @@ params { save_reference = false // Mapping + bwt2_index = null + bwa_index = null split_fastq = false fastq_chunks_size = 20000000 save_interaction_bam = false diff --git a/subworkflows/local/compartments.nf b/subworkflows/local/compartments.nf index 0d45f61..1e8c222 100644 --- a/subworkflows/local/compartments.nf +++ b/subworkflows/local/compartments.nf @@ -1,5 +1,5 @@ include { COOLTOOLS_EIGSCIS } from '../../modules/local/cooltools/eigscis' -include { CALDER2 } from '../../modules/nfcore/calder2/main' +include { CALDER2 } from '../../modules/nf-core/calder2/main' workflow COMPARTMENTS { @@ -24,7 +24,7 @@ workflow COMPARTMENTS { if (params.compartments_caller =~ 'calder2'){ CALDER2( cool, - Channel.value([]) + Channel.value([]) ) ch_versions = ch_versions.mix(CALDER2.out.versions) ch_comp = CALDER2.out.output_folder diff --git a/subworkflows/local/hicpro.nf b/subworkflows/local/hicpro.nf index 0ee5fbc..db56954 100644 --- a/subworkflows/local/hicpro.nf +++ b/subworkflows/local/hicpro.nf @@ -24,6 +24,7 @@ workflow HICPRO { take: reads // [meta, read1, read2] + fasta // [meta, fasta] index // path fragments // path chrsize // path @@ -36,6 +37,7 @@ workflow HICPRO { // Fastq to paired-end bam HICPRO_MAPPING( reads, + fasta, index, ligation_site ) diff --git a/subworkflows/local/hicpro_mapping.nf b/subworkflows/local/hicpro_mapping.nf index 8967e3d..39d2cbe 100644 --- a/subworkflows/local/hicpro_mapping.nf +++ b/subworkflows/local/hicpro_mapping.nf @@ -15,6 +15,7 @@ workflow HICPRO_MAPPING { take: reads // [meta, read1, read2] + fasta // [meta, fasta] index // [meta, path] ligation_site // value @@ -40,6 +41,7 @@ workflow HICPRO_MAPPING { BOWTIE2_ALIGN( ch_reads, index.collect(), + fasta.collect(), true, false ) @@ -58,14 +60,15 @@ workflow HICPRO_MAPPING { BOWTIE2_ALIGN_TRIMMED( TRIM_READS.out.fastq, index.collect(), + fasta.collect(), false, false ) ch_versions = ch_versions.mix(BOWTIE2_ALIGN_TRIMMED.out.versions) // Merge the two mapping steps - ch_bowtie2_align = BOWTIE2_ALIGN.out.aligned - .combine(BOWTIE2_ALIGN_TRIMMED.out.aligned, by:[0]) + ch_bowtie2_align = BOWTIE2_ALIGN.out.bam + .combine(BOWTIE2_ALIGN_TRIMMED.out.bam, by:[0]) MERGE_BOWTIE2( ch_bowtie2_align diff --git a/subworkflows/local/pairtools.nf b/subworkflows/local/pairtools.nf index 5df52d9..6a4a6ff 100644 --- a/subworkflows/local/pairtools.nf +++ b/subworkflows/local/pairtools.nf @@ -27,6 +27,7 @@ workflow PAIRTOOLS { take: reads // [meta, read1, read2] + fasta // [meta, fasta] index // [meta2, path] frag // path chrsize // path @@ -37,6 +38,7 @@ workflow PAIRTOOLS { BWA_MEM( reads, index.collect(), + fasta.collect(), Channel.value([]) ) ch_versions = ch_versions.mix(BWA_MEM.out.versions) @@ -84,7 +86,8 @@ workflow PAIRTOOLS { // Manage BAM files SAMTOOLS_SORT( - PAIRTOOLS_SPLIT.out.bam + PAIRTOOLS_SPLIT.out.bam, + fasta ) ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) diff --git a/workflows/hic.nf b/workflows/hic.nf index bf6b1d0..35e9ef9 100644 --- a/workflows/hic.nf +++ b/workflows/hic.nf @@ -127,6 +127,7 @@ workflow HIC { if (params.processing == 'hicpro'){ HICPRO ( ch_samplesheet, + ch_fasta, PREPARE_GENOME.out.index, PREPARE_GENOME.out.res_frag, PREPARE_GENOME.out.chromosome_size, @@ -139,6 +140,7 @@ workflow HIC { }else if (params.processing == 'pairtools'){ PAIRTOOLS( ch_samplesheet, + ch_fasta, PREPARE_GENOME.out.index, PREPARE_GENOME.out.res_frag, PREPARE_GENOME.out.chromosome_size @@ -251,13 +253,17 @@ workflow HIC { ) ) - ch_multiqc_files = ch_multiqc_files.mix(HICPRO.out.mqc) + if (params.processing == 'hicpro'){ + ch_multiqc_files = ch_multiqc_files.mix(HICPRO.out.mqc) + } MULTIQC ( ch_multiqc_files.collect(), ch_multiqc_config.toList(), ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() + ch_multiqc_logo.toList(), + Channel.value([]), + Channel.value([]) ) emit: From 602c1c8e27aff1f4a95a0c72b8d2d2f9cfd5d3eb Mon Sep 17 00:00:00 2001 From: Nicolas Servant Date: Thu, 18 Jul 2024 15:17:53 +0200 Subject: [PATCH 07/17] [MODIF] new --no_digestion params replace --dnase --- CHANGELOG.md | 4 +++- conf/modules.config | 6 +++--- docs/output.md | 6 +++--- docs/usage.md | 14 +++++++------- nextflow.config | 4 ++-- nextflow_schema.json | 10 +++++----- subworkflows/local/hicpro.nf | 2 +- subworkflows/local/hicpro_mapping.nf | 2 +- subworkflows/local/pairtools.nf | 2 +- subworkflows/local/prepare_genome.nf | 4 ++-- workflows/hic.nf | 4 ++-- 11 files changed, 30 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9544318..f55e4a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` + Parameter `--dnase`is now replaced by `--no_digestion` + Add CALDER2 compartments calling (see '--compartments_caller' option) -- Add new '--balancing_opts' to update `cooler balance` arguments +- Add new '--balancing_opts' to update `cooler balance` arguments (#193) - New subworkflow based on `pairtools` to detect valid pairs. The user can now choose between `--processing hicpro` (default) or `--processing pairtools` diff --git a/conf/modules.config b/conf/modules.config index b6947bc..a166a0d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -64,7 +64,7 @@ process { ] ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" } ext.args = params.bwt2_opts_end2end ?: '' - ext.args2 = !params.dnase ? "-F 4" :"" + ext.args2 = !params.no_digestion ? "-F 4" :"" } withName: 'TRIM_READS' { @@ -217,7 +217,7 @@ process { enabled: false ] ext.prefix = { "${meta.id}_${meta.chunk}_restrict" } - ext.when = !params.dnase + ext.when = !params.no_digestion } withName: 'PAIRTOOLS_SORT' { @@ -287,7 +287,7 @@ process { params.min_mapq > 0 ? "(mapq1>${params.min_mapq} and mapq2>${params.min_mapq})" : '', params.min_cis_dist > 0 ? " and ((chrom1==chrom2 and abs(pos1-pos2) > ${params.min_cis_dist}) or chrom1!=chrom2)" : '', params.keep_multi ? " and ((pair_type.upper()=='UU') or (pair_type.upper()=='UR') or (pair_type.upper()=='RU') or (pair_type.upper()=='MM') or (pair_type.upper()=='MU'))" : " and ((pair_type.upper()=='UU') or (pair_type.upper()=='UR') or (pair_type.upper()=='RU'))", - params.dnase ? '' : " and ((chrom1==chrom2 and abs(int(rfrag1) - int(rfrag2)) > 1) or chrom1!=chrom2)", + params.no_digestion ? '' : " and ((chrom1==chrom2 and abs(int(rfrag1) - int(rfrag2)) > 1) or chrom1!=chrom2)", //params.min_insert_size > 0 ? " and ( (rfrag_end1 - r1pos) + (rfrag_end2 - r2pos)) > ${params.min_insert_size}" : '', //params.max_insert_size > 0 ? " and ( (rfrag_end1 - r1pos) + (rfrag_end2 - r2pos)) < ${params.max_insert_size}" : '', //params.min_restriction_fragment_size > 0 ? " -t ${params.min_restriction_fragment_size}" : '', diff --git a/docs/output.md b/docs/output.md index a107f13..88470cf 100644 --- a/docs/output.md +++ b/docs/output.md @@ -45,7 +45,7 @@ and aligned back on the genome. Aligned reads for both fragment mates are then paired in a single paired-end BAM file. Singletons and low quality mapped reads are filtered (`--min_mapq`). -Note that if the `--dnase` mode is activated, HiC-Pro will skip the second +Note that if the `--no_digestion` mode is activated, HiC-Pro will skip the second mapping step. **Output directory: `results/hicpro/mapping`** @@ -95,8 +95,8 @@ Duplicated valid pairs associated to PCR artefacts are discarded (see `--keep_dup` to not discard them). In case of Hi-C protocols that do not require a restriction enzyme such as -DNase Hi-C or micro Hi-C, the assignment to a restriction is not possible -(see `--dnase`). +DNase Hi-C or micro-C, the assignment to a restriction is not possible +(see `--no_digestion`). Short range interactions that are likely to be spurious ligation products can thus be discarded using the `--min_cis_dist` parameter. diff --git a/docs/usage.md b/docs/usage.md index d409a4b..f596f38 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -251,15 +251,15 @@ Note that by default, no filters are applied on DNA and restriction fragment siz nextflow run main.nf --input './*_R{1,2}.fastq.gz' --genome 'mm10' --digestion 'dnpii' ``` -### DNase Hi-C protocol +### DNase Hi-C / Micro-C protocol -Here is an command line example for DNase protocol. +Here is an command line example for DNase or Micro-C protocol. Alignment will be performed on the `mm10` genome with default paramters. Multi-hits will not be considered and duplicates will be removed. Contacts involving fragments separated by less than 1000bp will be discarded. ```bash -nextflow run main.nf --input './*_R{1,2}.fastq.gz' --genome 'mm10' --dnase --min_cis 1000 +nextflow run main.nf --input './*_R{1,2}.fastq.gz' --genome 'mm10' --no_digestion --min_cis 1000 ``` ## Inputs @@ -456,9 +456,9 @@ Default: 'AAGCTAGCTT' Exemple of the ARIMA kit: GATCGATC,GANTGATC,GANTANTC,GATCANTC -### DNAse Hi-C +### DNAse/Micro-C -#### `--dnase` +#### `--no_digestion` In DNAse Hi-C mode, all options related to digestion Hi-C (see previous section) are ignored. @@ -466,7 +466,7 @@ In this case, it is highly recommended to use the `--min_cis_dist` parameter to remove spurious ligation products. ```bash ---dnase +--no_digestion ``` ### HiC-pro processing @@ -510,7 +510,7 @@ Default: '0' - no filter #### `--min_cis_dist` Filter short range contact below the specified distance. -Mainly useful for DNase Hi-C. Default: '0' +Mainly useful for DNAse/Micro-C. Default: '0' ```bash --min_cis_dist '[numeric]' diff --git a/nextflow.config b/nextflow.config index 11acba2..547eddd 100644 --- a/nextflow.config +++ b/nextflow.config @@ -70,8 +70,8 @@ params { max_insert_size = 0 save_pairs_intermediates = false - // Dnase Hi-C - dnase = false + // Dnase/Micro-C Hi-C + no_digestion = false min_cis_dist = 0 // Pairtools diff --git a/nextflow_schema.json b/nextflow_schema.json index 4e20ba7..284adf6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -142,18 +142,18 @@ } }, "no_digestion_hi_c": { - "title": "DNAse Hi-C", + "title": "Micro-C/DNAse Hi-C", "type": "object", - "description": "Parameters for protocols based on DNAse digestion", + "description": "Parameters for protocols which are not based on restriction enzymes", "default": "", "properties": { - "dnase": { + "no_digestion": { "type": "boolean", - "description": "For Hi-C protocols which are not based on enzyme digestion such as DNase Hi-C" + "description": "For Hi-C protocols which are not based on enzyme digestion such as DNAse Hi-C or Micro-C" }, "min_cis_dist": { "type": "integer", - "description": "Minimum distance between loci to consider. Useful for --dnase mode to remove spurious ligation products. Only values > 0 are considered" + "description": "Minimum distance between loci to consider. Useful for --no-digestion mode to remove spurious ligation products. Only values > 0 are considered" } } }, diff --git a/subworkflows/local/hicpro.nf b/subworkflows/local/hicpro.nf index db56954..ac13f4f 100644 --- a/subworkflows/local/hicpro.nf +++ b/subworkflows/local/hicpro.nf @@ -46,7 +46,7 @@ workflow HICPRO { //*************************************** // DIGESTION PROTOCOLS - if (!params.dnase){ + if (!params.no_digestion){ GET_VALID_INTERACTION ( HICPRO_MAPPING.out.bam, fragments.collect() diff --git a/subworkflows/local/hicpro_mapping.nf b/subworkflows/local/hicpro_mapping.nf index 39d2cbe..7278285 100644 --- a/subworkflows/local/hicpro_mapping.nf +++ b/subworkflows/local/hicpro_mapping.nf @@ -47,7 +47,7 @@ workflow HICPRO_MAPPING { ) ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions) - if (!params.dnase){ + if (!params.no_digestion){ // trim reads TRIM_READS( diff --git a/subworkflows/local/pairtools.nf b/subworkflows/local/pairtools.nf index 6a4a6ff..6a5c4ae 100644 --- a/subworkflows/local/pairtools.nf +++ b/subworkflows/local/pairtools.nf @@ -55,7 +55,7 @@ workflow PAIRTOOLS { ) ch_versions = ch_versions.mix(PAIRTOOLS_RESTRICT.out.versions) - ch_pairsam = params.dnase ? PAIRTOOLS_PARSE.out.pairsam : PAIRTOOLS_RESTRICT.out.restrict + ch_pairsam = params.no_digestion ? PAIRTOOLS_PARSE.out.pairsam : PAIRTOOLS_RESTRICT.out.restrict PAIRTOOLS_SORT( ch_pairsam ) diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index 2eee2b4..7b7c974 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -66,14 +66,14 @@ workflow PREPARE_GENOME { //*************************************** // Restriction fragments - if(!params.restriction_fragments && !params.dnase){ + if(!params.restriction_fragments && !params.no_digestion){ GET_RESTRICTION_FRAGMENTS( fasta, restriction_site ) ch_resfrag = GET_RESTRICTION_FRAGMENTS.out.results ch_versions = ch_versions.mix(GET_RESTRICTION_FRAGMENTS.out.versions) - }else if (!params.dnase){ + }else if (!params.no_digestion){ Channel.fromPath( params.restriction_fragments, checkIfExists: true ) .map { it -> [[:], it] } .set {ch_resfrag} diff --git a/workflows/hic.nf b/workflows/hic.nf index 35e9ef9..21cdbdb 100644 --- a/workflows/hic.nf +++ b/workflows/hic.nf @@ -32,11 +32,11 @@ if (params.digestion){ }else if (params.restriction_site && params.ligation_site){ ch_restriction_site = Channel.value(params.restriction_site) ch_ligation_site = Channel.value(params.ligation_site) -}else if (params.dnase){ +}else if (params.no_digestion){ ch_restriction_site = Channel.empty() ch_ligation_site = Channel.empty() }else{ - exit 1, "Ligation motif not found. Please either use the `--digestion` parameters or specify the `--restriction_site` and `--ligation_site`. For DNase Hi-C, please use '--dnase' option" + exit 1, "Ligation motif not found. Please either use the `--digestion` parameters or specify the `--restriction_site` and `--ligation_site`. For DNase/Micro-C Hi-C, please use '--no_digestion' option" } //**************************************** From d7d970a183597676e134bf388c3e9ba6e8e1b58b Mon Sep 17 00:00:00 2001 From: Nicolas Servant Date: Thu, 18 Jul 2024 15:23:59 +0200 Subject: [PATCH 08/17] [lint] prettier --- subworkflows/local/compartments.nf | 2 +- subworkflows/local/hicpro.nf | 2 +- subworkflows/local/hicpro_mapping.nf | 4 ++-- subworkflows/local/pairtools.nf | 4 ++-- workflows/hic.nf | 8 ++++---- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/subworkflows/local/compartments.nf b/subworkflows/local/compartments.nf index 1e8c222..a02611b 100644 --- a/subworkflows/local/compartments.nf +++ b/subworkflows/local/compartments.nf @@ -24,7 +24,7 @@ workflow COMPARTMENTS { if (params.compartments_caller =~ 'calder2'){ CALDER2( cool, - Channel.value([]) + Channel.value([]) ) ch_versions = ch_versions.mix(CALDER2.out.versions) ch_comp = CALDER2.out.output_folder diff --git a/subworkflows/local/hicpro.nf b/subworkflows/local/hicpro.nf index ac13f4f..7730e21 100644 --- a/subworkflows/local/hicpro.nf +++ b/subworkflows/local/hicpro.nf @@ -37,7 +37,7 @@ workflow HICPRO { // Fastq to paired-end bam HICPRO_MAPPING( reads, - fasta, + fasta, index, ligation_site ) diff --git a/subworkflows/local/hicpro_mapping.nf b/subworkflows/local/hicpro_mapping.nf index 7278285..05ccd98 100644 --- a/subworkflows/local/hicpro_mapping.nf +++ b/subworkflows/local/hicpro_mapping.nf @@ -41,7 +41,7 @@ workflow HICPRO_MAPPING { BOWTIE2_ALIGN( ch_reads, index.collect(), - fasta.collect(), + fasta.collect(), true, false ) @@ -60,7 +60,7 @@ workflow HICPRO_MAPPING { BOWTIE2_ALIGN_TRIMMED( TRIM_READS.out.fastq, index.collect(), - fasta.collect(), + fasta.collect(), false, false ) diff --git a/subworkflows/local/pairtools.nf b/subworkflows/local/pairtools.nf index 6a5c4ae..00e3bb6 100644 --- a/subworkflows/local/pairtools.nf +++ b/subworkflows/local/pairtools.nf @@ -38,7 +38,7 @@ workflow PAIRTOOLS { BWA_MEM( reads, index.collect(), - fasta.collect(), + fasta.collect(), Channel.value([]) ) ch_versions = ch_versions.mix(BWA_MEM.out.versions) @@ -87,7 +87,7 @@ workflow PAIRTOOLS { // Manage BAM files SAMTOOLS_SORT( PAIRTOOLS_SPLIT.out.bam, - fasta + fasta ) ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) diff --git a/workflows/hic.nf b/workflows/hic.nf index 21cdbdb..75731b4 100644 --- a/workflows/hic.nf +++ b/workflows/hic.nf @@ -127,7 +127,7 @@ workflow HIC { if (params.processing == 'hicpro'){ HICPRO ( ch_samplesheet, - ch_fasta, + ch_fasta, PREPARE_GENOME.out.index, PREPARE_GENOME.out.res_frag, PREPARE_GENOME.out.chromosome_size, @@ -140,7 +140,7 @@ workflow HIC { }else if (params.processing == 'pairtools'){ PAIRTOOLS( ch_samplesheet, - ch_fasta, + ch_fasta, PREPARE_GENOME.out.index, PREPARE_GENOME.out.res_frag, PREPARE_GENOME.out.chromosome_size @@ -262,8 +262,8 @@ workflow HIC { ch_multiqc_config.toList(), ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList(), - Channel.value([]), - Channel.value([]) + Channel.value([]), + Channel.value([]) ) emit: From 8358dacd55d7de6e40b7074466d19661fbe56558 Mon Sep 17 00:00:00 2001 From: Nicolas Servant Date: Thu, 18 Jul 2024 15:44:05 +0200 Subject: [PATCH 09/17] [lint] prettier --- CHANGELOG.md | 4 ++-- nextflow_schema.json | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f55e4a6..031a952 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - Parameter `--dnase`is now replaced by `--no_digestion` +- Parameter `--dnase`is now replaced by `--no_digestion` - Add CALDER2 compartments calling (see '--compartments_caller' option) +- Add CALDER2 compartments calling (see '--compartments_caller' option) - Add new '--balancing_opts' to update `cooler balance` arguments (#193) diff --git a/nextflow_schema.json b/nextflow_schema.json index 284adf6..0981029 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -314,11 +314,11 @@ "default": "insulation", "description": "Define methods for TADs calling" }, - "compartments_caller": { - "type": "string", - "default": "cooltools", - "description": "Define methods for compartments calling" - }, + "compartments_caller": { + "type": "string", + "default": "cooltools", + "description": "Define methods for compartments calling" + }, "res_tads": { "type": "string", "pattern": "^(\\d+)(,\\d+)*$", From b1714a9fe6a848effea54759de2a01ed17964a73 Mon Sep 17 00:00:00 2001 From: Nicolas Servant Date: Thu, 18 Jul 2024 15:58:23 +0200 Subject: [PATCH 10/17] [MODIF] README --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 7890672..4f620cb 100644 --- a/README.md +++ b/README.md @@ -35,12 +35,12 @@ On release, automated continuous integration tests run the pipeline on a full-si 2. Detection of valid interaction products 3. Duplicates removal 4. Generate raw and normalized contact maps ([`iced`](https://github.com/hiclib/iced)) - 2. Generate `pairs` files for downstream analysis - 3. [`Pairtools`](https://github.com/open2c/pairtools) + 5. Generate `pairs` files for downstream analysis + 2. [`Pairtools`](https://github.com/open2c/pairtools) 1. Mapping using [`BWA-mem`](https://github.com/lh3/bwa) - 4. Detection of valid interaction products with [`pairtools`](https://github.com/open2c/pairtools) - 5. Duplicates removal - 6. Generate `pairs` files for downstream analysis + 2. Detection of valid interaction products with [`pairtools`](https://github.com/open2c/pairtools) + 3. Duplicates removal + 4. Generate `pairs` files for downstream analysis 3. Create genome-wide contact maps at various resolutions ([`cooler`](https://github.com/open2c/cooler)) 4. Contact maps normalization using balancing algorithm ([`cooler`](https://github.com/open2c/cooler)) 5. Export to various contact maps formats ([`HiC-Pro`](https://github.com/nservant/HiC-Pro), [`cooler`](https://github.com/open2c/cooler)) From cb9239ddea040361c66fade1946a9642c87d670e Mon Sep 17 00:00:00 2001 From: Nicolas Servant Date: Thu, 18 Jul 2024 15:59:44 +0200 Subject: [PATCH 11/17] [MODIF] README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4f620cb..d23a1e0 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ On release, automated continuous integration tests run the pipeline on a full-si 4. Contact maps normalization using balancing algorithm ([`cooler`](https://github.com/open2c/cooler)) 5. Export to various contact maps formats ([`HiC-Pro`](https://github.com/nservant/HiC-Pro), [`cooler`](https://github.com/open2c/cooler)) 6. Quality controls ([`HiC-Pro`](https://github.com/nservant/HiC-Pro), [`HiCExplorer`](https://github.com/deeptools/HiCExplorer)) -7. Compartments calling ([`cooltools`](https://cooltools.readthedocs.io/en/latest/)) +7. Compartments calling ([`cooltools`](https://cooltools.readthedocs.io/en/latest/), [`Calder2`](https://github.com/CSOgroup/CALDER2)) 8. TADs calling ([`HiCExplorer`](https://github.com/deeptools/HiCExplorer), [`cooltools`](https://cooltools.readthedocs.io/en/latest/)) 9. Quality control report ([`MultiQC`](https://multiqc.info/)) From 91bdb76b49fa7802dcf88fdd45a52100df90e7b4 Mon Sep 17 00:00:00 2001 From: Nicolas Servant Date: Thu, 18 Jul 2024 16:47:12 +0200 Subject: [PATCH 12/17] [DOC] update --- docs/usage.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index f596f38..50fe02e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -623,7 +623,12 @@ Several resolutions can be specified (comma-separeted). Default: '250000' ### Compartment calling -Call open/close compartments for each chromosome, using the `cooltools` command. +Call open/close compartments for each chromosome, using the `cooltools` or `calder2` tools. + +#### `--compartments_caller` + +Compartments calling can be performed with either `cooltools` +or `calder2`. Multiple choices are possible (comma-separated). #### `--res_compartments` From 7c90645acff3831b68d615369d59bf7cccb50c28 Mon Sep 17 00:00:00 2001 From: Nicolas Servant Date: Thu, 18 Jul 2024 22:02:19 +0200 Subject: [PATCH 13/17] [MODIF] move prepare genome --- conf/igenomes.config | 41 +++++++++++++++-- main.nf | 27 +++++++++-- nextflow.config | 6 +-- subworkflows/local/prepare_genome.nf | 67 +++++++++++++++++++++------- workflows/hic.nf | 55 ++++++----------------- 5 files changed, 127 insertions(+), 69 deletions(-) diff --git a/conf/igenomes.config b/conf/igenomes.config index f4c32e3..2d00943 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -14,158 +14,191 @@ params { 'GRCh37' { fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" } 'GRCh38' { fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" } 'CHM13' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" - bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" - gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" - mito_name = "chrM" } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" } 'TAIR10' { fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" } 'EB2' { fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" } 'UMD3.1' { fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" } 'WBcel235' { fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" } 'CanFam3.1' { fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" } 'GRCz10' { fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" } 'BDGP6' { fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" } 'EquCab2' { fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" } 'EB1' { fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" } 'Galgal4' { fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" } 'Gm01' { fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" } 'Mmul_1' { fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" } 'IRGSP-1.0' { fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" } 'CHIMP2.1.4' { fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" } 'Rnor_6.0' { fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" } 'R64-1-1' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" } 'EF2' { fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" } 'Sbi1' { fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" } 'Sscrofa10.2' { fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" } 'AGPv3' { fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" } 'hg38' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" } 'hg19' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" } 'mm10' { fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" } 'bosTau8' { fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" } 'ce10' { fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" } 'canFam3' { fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" } 'danRer10' { fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" } 'dm6' { fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" } 'equCab2' { fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" } 'galGal4' { fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" } 'panTro4' { fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" } 'rn6' { fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" } 'sacCer3' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" } 'susScr3' { fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" } } } diff --git a/main.nf b/main.nf index e959afd..5283fad 100644 --- a/main.nf +++ b/main.nf @@ -18,9 +18,9 @@ nextflow.enable.dsl = 2 */ include { HIC } from './workflows/hic' +include { PREPARE_GENOME } from './subworkflows/local/prepare_genome' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_hic_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_hic_pipeline' - include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_hic_pipeline' /* @@ -31,7 +31,7 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_hic_ params.fasta = getGenomeAttribute('fasta') params.bwt2_index = getGenomeAttribute('bowtie2') -params.bwa_index = getGenomeAttribute('bwamem2') +params.bwa_index = getGenomeAttribute('bwa') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -49,16 +49,35 @@ workflow NFCORE_HIC { main: + ch_versions = Channel.empty() + + // + // SUBWORKFLOW: prepare genome annotation + // + PREPARE_GENOME( + params.fasta, + params.bwt2_index, + params.bwa_index + ) + ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) + // // WORKFLOW: Run pipeline // HIC ( - samplesheet + samplesheet, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.index, + PREPARE_GENOME.out.chromosome_size, + PREPARE_GENOME.out.res_frag, + PREPARE_GENOME.out.restriction_site, + PREPARE_GENOME.out.ligation_site ) + ch_versions = ch_versions.mix(HIC.out.versions) emit: multiqc_report = HIC.out.multiqc_report // channel: /path/to/multiqc_report.html - + versions = ch_versions } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/nextflow.config b/nextflow.config index 547eddd..7d4761b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,14 +15,14 @@ params { genome = null igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false - fasta = null + //fasta = null chromosome_size = null restriction_fragments = null save_reference = false // Mapping - bwt2_index = null - bwa_index = null + //bwt2_index = null + //bwa_index = null split_fastq = false fastq_chunks_size = 20000000 save_interaction_bam = false diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index 7b7c974..af2b9d0 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -11,64 +11,94 @@ workflow PREPARE_GENOME { take: fasta - restriction_site + bwt2_index + bwa_index main: ch_versions = Channel.empty() - //*************************************** + // + // Fasta reference genome + // + def genomeName = params.genome ?: fasta.substring(fasta.lastIndexOf(File.separator)+1) + ch_fasta = Channel.fromPath( fasta ) + .ifEmpty { exit 1, "Genome index: Fasta file not found: ${fasta}" } + .map{it->[[id:genomeName],it]} + + // // Bowtie index + // if (params.processing == "hicpro"){ - if(!params.bwt2_index){ + if(!bwt2_index){ BOWTIE2_BUILD ( - fasta + ch_fasta ) ch_index = BOWTIE2_BUILD.out.index ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) }else{ - Channel.fromPath( params.bwt2_index , checkIfExists: true) + ch_index = Channel.fromPath( bwt2_index , checkIfExists: true) .map { it -> [[:], it]} .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" } - .set { ch_index } } } - //*************************************** + // // Bwa-mem index + // if (params.processing == "pairtools"){ - if(!params.bwa_index){ + if(!bwa_index){ BWA_INDEX ( - fasta + ch_fasta ) ch_index = BWA_INDEX.out.index ch_versions = ch_versions.mix(BWA_INDEX.out.versions) }else{ - Channel.fromPath( params.bwa_index , checkIfExists: true) + ch_index = Channel.fromPath( bwa_index , checkIfExists: true) .map { it -> [[:], it]} .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwa_index}" } - .set { ch_index } } } - //*************************************** + // // Chromosome size + // if(!params.chromosome_size){ CUSTOM_GETCHROMSIZES( - fasta + ch_fasta ) ch_chromsize = CUSTOM_GETCHROMSIZES.out.sizes ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) }else{ - Channel.fromPath( params.chromosome_size , checkIfExists: true) + ch_chromsize = Channel.fromPath( params.chromosome_size , checkIfExists: true) .map { it -> [[:], it]} - .set {ch_chromsize} } - //*************************************** + + // + // Digestion parameters + // + if (params.digestion){ + restriction_site = params.digestion ? params.digest[ params.digestion ].restriction_site ?: false : false + ch_restriction_site = Channel.value(restriction_site) + ligation_site = params.digestion ? params.digest[ params.digestion ].ligation_site ?: false : false + ch_ligation_site = Channel.value(ligation_site) + }else if (params.restriction_site && params.ligation_site){ + ch_restriction_site = Channel.value(params.restriction_site) + ch_ligation_site = Channel.value(params.ligation_site) + }else if (params.no_digestion){ + ch_restriction_site = Channel.empty() + ch_ligation_site = Channel.empty() + }else{ + exit 1, "Ligation motif not found. Please either use the `--digestion` parameters or specify the `--restriction_site` and `--ligation_site`. For DNase/Micro-C Hi-C, please use '--no_digestion' option" + } + + + // // Restriction fragments + // if(!params.restriction_fragments && !params.no_digestion){ GET_RESTRICTION_FRAGMENTS( - fasta, + ch_fasta, restriction_site ) ch_resfrag = GET_RESTRICTION_FRAGMENTS.out.results @@ -82,8 +112,11 @@ workflow PREPARE_GENOME { } emit: + fasta = ch_fasta index = ch_index chromosome_size = ch_chromsize res_frag = ch_resfrag + restriction_site = ch_restriction_site + ligation_site = ch_ligation_site versions = ch_versions } diff --git a/workflows/hic.nf b/workflows/hic.nf index 75731b4..7ec3e83 100644 --- a/workflows/hic.nf +++ b/workflows/hic.nf @@ -15,30 +15,12 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_hic_ include { HIC_PLOT_DIST_VS_COUNTS } from '../modules/local/hicexplorer/hicPlotDistVsCounts' // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' include { HICPRO } from '../subworkflows/local/hicpro' include { PAIRTOOLS } from '../subworkflows/local/pairtools' include { COOLER } from '../subworkflows/local/cooler' include { COMPARTMENTS } from '../subworkflows/local/compartments' include { TADS } from '../subworkflows/local/tads' -//***************************************** -// Digestion parameters -if (params.digestion){ - restriction_site = params.digestion ? params.digest[ params.digestion ].restriction_site ?: false : false - ch_restriction_site = Channel.value(restriction_site) - ligation_site = params.digestion ? params.digest[ params.digestion ].ligation_site ?: false : false - ch_ligation_site = Channel.value(ligation_site) -}else if (params.restriction_site && params.ligation_site){ - ch_restriction_site = Channel.value(params.restriction_site) - ch_ligation_site = Channel.value(params.ligation_site) -}else if (params.no_digestion){ - ch_restriction_site = Channel.empty() - ch_ligation_site = Channel.empty() -}else{ - exit 1, "Ligation motif not found. Please either use the `--digestion` parameters or specify the `--restriction_site` and `--ligation_site`. For DNase/Micro-C Hi-C, please use '--no_digestion' option" -} - //**************************************** // Combine all maps resolution for downstream analysis @@ -81,12 +63,6 @@ if (params.res_compartments && !params.skip_compartments){ ch_map_res = ch_map_res.unique() -def genomeName = params.genome ?: params.fasta.substring(params.fasta.lastIndexOf(File.separator)+1) -Channel.fromPath( params.fasta ) - .ifEmpty { exit 1, "Genome index: Fasta file not found: ${params.fasta}" } - .map{it->[[id:genomeName],it]} - .set { ch_fasta } - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -97,6 +73,12 @@ workflow HIC { take: ch_samplesheet // channel: samplesheet read in from --input + ch_fasta + ch_index + ch_chromosome_size + ch_res_frag + ch_restriction_site + ch_ligation_site main: @@ -112,15 +94,6 @@ workflow HIC { ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - // - // SUBWORKFLOW: Prepare genome annotation - // - PREPARE_GENOME( - ch_fasta, - ch_restriction_site - ) - ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) - // // SUB-WORFLOW: HiC-Pro // @@ -128,9 +101,9 @@ workflow HIC { HICPRO ( ch_samplesheet, ch_fasta, - PREPARE_GENOME.out.index, - PREPARE_GENOME.out.res_frag, - PREPARE_GENOME.out.chromosome_size, + ch_index, + ch_res_frag, + ch_chromosome_size, ch_ligation_site, ch_map_res ) @@ -141,9 +114,9 @@ workflow HIC { PAIRTOOLS( ch_samplesheet, ch_fasta, - PREPARE_GENOME.out.index, - PREPARE_GENOME.out.res_frag, - PREPARE_GENOME.out.chromosome_size + ch_index, + ch_res_frag, + ch_chromosome_size ) ch_versions = ch_versions.mix(PAIRTOOLS.out.versions) ch_pairs = PAIRTOOLS.out.pairs @@ -155,7 +128,7 @@ workflow HIC { // COOLER ( ch_pairs, - PREPARE_GENOME.out.chromosome_size, + ch_chromosome_size, ch_map_res ) ch_versions = ch_versions.mix(COOLER.out.versions) @@ -189,7 +162,7 @@ workflow HIC { COMPARTMENTS ( ch_cool_compartments, ch_fasta, - PREPARE_GENOME.out.chromosome_size + ch_chromosome_size ) ch_versions = ch_versions.mix(COMPARTMENTS.out.versions) } From bf480c2252b09ee21dfbf296c4f35e37201cb84e Mon Sep 17 00:00:00 2001 From: Nicolas Servant Date: Thu, 18 Jul 2024 22:20:58 +0200 Subject: [PATCH 14/17] [MODIF] test --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 5283fad..556e259 100644 --- a/main.nf +++ b/main.nf @@ -56,7 +56,7 @@ workflow NFCORE_HIC { // PREPARE_GENOME( params.fasta, - params.bwt2_index, + params.bwt2_index, params.bwa_index ) ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) From 6d04bab0179f8e04ed36b3adfaa1a95871ca6c30 Mon Sep 17 00:00:00 2001 From: Nicolas Servant Date: Thu, 18 Jul 2024 22:28:08 +0200 Subject: [PATCH 15/17] [MODIF] test --- modules/local/hicexplorer/hicFindTADs.nf | 2 +- modules/nf-core/calder2/main.nf | 2 +- subworkflows/local/compartments.nf | 2 +- subworkflows/local/pairtools.nf | 1 - 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/modules/local/hicexplorer/hicFindTADs.nf b/modules/local/hicexplorer/hicFindTADs.nf index a241039..6f62a04 100644 --- a/modules/local/hicexplorer/hicFindTADs.nf +++ b/modules/local/hicexplorer/hicFindTADs.nf @@ -4,7 +4,7 @@ process HIC_FIND_TADS { label 'process_medium' - tag "meta.id" + tag "$meta.id" conda "bioconda::hicexplorer=3.7.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/nf-core/calder2/main.nf b/modules/nf-core/calder2/main.nf index cb77dfe..8927211 100644 --- a/modules/nf-core/calder2/main.nf +++ b/modules/nf-core/calder2/main.nf @@ -1,5 +1,5 @@ process CALDER2 { - tag '$meta.id' + tag "$meta.id" label 'process_high' conda "${moduleDir}/environment.yml" diff --git a/subworkflows/local/compartments.nf b/subworkflows/local/compartments.nf index a02611b..4f965d3 100644 --- a/subworkflows/local/compartments.nf +++ b/subworkflows/local/compartments.nf @@ -23,7 +23,7 @@ workflow COMPARTMENTS { if (params.compartments_caller =~ 'calder2'){ CALDER2( - cool, + cool.map{meta, cool, res -> [meta, cool] }, Channel.value([]) ) ch_versions = ch_versions.mix(CALDER2.out.versions) diff --git a/subworkflows/local/pairtools.nf b/subworkflows/local/pairtools.nf index 00e3bb6..7f88993 100644 --- a/subworkflows/local/pairtools.nf +++ b/subworkflows/local/pairtools.nf @@ -67,7 +67,6 @@ workflow PAIRTOOLS { [ groupKey(newMeta, meta.part), pairs ] } .groupTuple() - .view() .branch { single: it[0].part <=1 multiple: it[0].part > 1 From b564e820eaa919fc44746217969e4c4ac9c9c2d3 Mon Sep 17 00:00:00 2001 From: Nicolas Servant Date: Thu, 18 Jul 2024 22:38:11 +0200 Subject: [PATCH 16/17] [MODIF] test --- modules/nf-core/calder2/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/calder2/main.nf b/modules/nf-core/calder2/main.nf index 8927211..cb77dfe 100644 --- a/modules/nf-core/calder2/main.nf +++ b/modules/nf-core/calder2/main.nf @@ -1,5 +1,5 @@ process CALDER2 { - tag "$meta.id" + tag '$meta.id' label 'process_high' conda "${moduleDir}/environment.yml" From 690cb2816c5a23b1059fed126533a6b862c96d38 Mon Sep 17 00:00:00 2001 From: nservant Date: Mon, 22 Jul 2024 16:23:22 +0200 Subject: [PATCH 17/17] [MODIF] add tmp dir --- conf/modules.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/modules.config b/conf/modules.config index 687fd4e..70e85c8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -229,6 +229,7 @@ process { enabled: false ] ext.prefix = { "${meta.id}_merged" } + ext.args = "--tmpdir ./" } withName: 'PAIRTOOLS_SPLIT' {