diff --git a/CHANGELOG.md b/CHANGELOG.md index 75b6e128..be1d0138 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v2.3.0 - 2024-01-25 - Gray Zinc Dalmatian +- [[#307]](https://github.com/nf-core/smrnaseq/pull/307) - Clean up config file and improve output folder structure - [[#299]](https://github.com/nf-core/smrnaseq/issues/299) - Bugfix for missing inputs in BAM stats (`genome_quant.r`) - [[#164]](https://github.com/nf-core/smrnaseq/pull/164) - UMI Handling Feature implemented in the pipeline - [[#302]](https://github.com/nf-core/smrnaseq/pull/302) - Merged in nf-core template v2.11.1 diff --git a/conf/modules.config b/conf/modules.config index 127e0a34..b57e4f92 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -10,17 +10,22 @@ ---------------------------------------------------------------------------------------- */ -// -// General configuration options -// + process { + + // + // General configuration options + // publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] + // + // Input preparation + // withName: 'NFCORE_SMRNASEQ:SMRNASEQ:INPUT_CHECK:SAMPLESHEET_CHECK' { publishDir = [ path: { "${params.outdir}/pipeline_info" }, @@ -28,327 +33,342 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - - withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { + withName: 'CAT_FASTQ' { publishDir = [ - path: { "${params.outdir}/pipeline_info" }, + path: { "${params.outdir}/cat_fastq" }, mode: params.publish_dir_mode, - pattern: '*_versions.yml' + pattern: '*.fastq', + // enabled: params.save_merged_fastq //TODO ? implement save_merged_fastq param ] } -} - -// -// Genome preparation options -// -process { - withName: 'CAT_FASTQ' { + // + // FASTQ_FASTQC_UMITOOLS_FASTP + // + withName: '.*:FASTQC_UMITOOLS_FASTP:FASTP' { + ext.args = [ "", + params.trim_fastq ? "" : "--disable_adapter_trimming", + params.clip_r1 > 0 ? "--trim_front1 ${params.clip_r1}" : "", // Remove bp from the 5' end of read 1. + params.three_prime_clip_r1 > 0 ? "--trim_tail1 ${params.three_prime_clip_r1}" : "", // Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed. + params.fastp_min_length > 0 ? "-l ${params.fastp_min_length}" : "", + params.fastp_max_length > 0 ? "--max_len1 ${params.fastp_max_length}" : "", + params.three_prime_adapter == null ? '' : "--adapter_sequence ${params.three_prime_adapter}" + ].join(" ").trim() publishDir = [ - path: { "${params.outdir}/fastq" }, - mode: params.publish_dir_mode, - pattern: '*.fastq', - // enabled: params.save_merged_fastq //TODO ? implement save_merged_fastq param + [ + path: { "${params.outdir}/fastp" }, + mode: params.publish_dir_mode, + pattern: "*.{json,html}" + ], + [ + path: { "${params.outdir}/fastp/log" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: { "${params.outdir}/fastp" }, + mode: params.publish_dir_mode, + pattern: "*.fail.fastq.gz", + enabled: params.save_trimmed_fail + ] ] } - withName: 'INDEX_GENOME' { + withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTQC_RAW' { + ext.args = '--quiet' publishDir = [ - path: { "${params.outdir}/index" }, + path: { "${params.outdir}/fastqc/raw" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:FORMAT_.*' { + withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTQC_TRIM' { + ext.args = '--quiet' publishDir = [ - path: { "${params.outdir}/genome" }, + path: { "${params.outdir}/fastqc/trimmed" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'EDGER_QC' { + withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:UMITOOLS_EXTRACT' { + ext.args = [ + params.umitools_extract_method ? "--extract-method=${params.umitools_extract_method}" : '', + params.umitools_bc_pattern ? "--bc-pattern='${params.umitools_bc_pattern}'" : '', + ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/edger" }, - mode: params.publish_dir_mode, - enabled: true + [ + path: { "${params.outdir}/umi_dedup/fastq_extracted_umi" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: { "${params.outdir}/umi_dedup/fastq_extracted_umi" }, + mode: params.publish_dir_mode, + pattern: "*.fastq.gz", + enabled: params.save_umi_intermeds + ] ] } -} - -// -// Read QC and trimming options -// -process { - withName: 'MIRTRACE_RUN' { + // + // Make bowtie index + // + withName: 'INDEX_GENOME' { publishDir = [ - path: { "${params.outdir}/mirtrace" }, + path: { "${params.outdir}/bowtie_index/genome" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } -} -if (!(params.skip_fastqc)) { - process { - withName: '.*:FASTQC_UMITOOLS_FASTP:FASTQC_.*' { - ext.args = '--quiet' - } + // + // UMI deduplication + // + withName: '.*:DEDUPLICATE_UMIS:UMI_MAP_GENOME' { + publishDir = [ + path: { "${params.outdir}/umi_dedup/bam_mapped" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: ( + params.save_umi_intermeds + ) + ] } -} - -if (!params.skip_fastp) { - process { - withName: '.*:FASTQC_UMITOOLS_FASTP:FASTP' { - ext.args = [ "", - params.trim_fastq ? "" : "--disable_adapter_trimming", - params.clip_r1 > 0 ? "--trim_front1 ${params.clip_r1}" : "", // Remove bp from the 5' end of read 1. - params.three_prime_clip_r1 > 0 ? "--trim_tail1 ${params.three_prime_clip_r1}" : "", // Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed. - params.fastp_min_length > 0 ? "-l ${params.fastp_min_length}" : "", - params.fastp_max_length > 0 ? "--max_len1 ${params.fastp_max_length}" : "", - params.three_prime_adapter == null ? '' : "--adapter_sequence ${params.three_prime_adapter}" - ].join(" ").trim() - publishDir = [ - [ - path: { "${params.outdir}/fastp" }, - mode: params.publish_dir_mode, - pattern: "*.{json,html}" - ], - [ - path: { "${params.outdir}/fastp/log" }, - mode: params.publish_dir_mode, - pattern: "*.log" - ], - [ - path: { "${params.outdir}/fastp" }, - mode: params.publish_dir_mode, - pattern: "*.fail.fastq.gz", - enabled: params.save_trimmed_fail - ] - ] - } - + withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + path: { "${params.outdir}/umi_dedup/bam_mapped_sorted" }, + mode: params.publish_dir_mode, + pattern: '*.{bam}', + enabled: ( + params.save_umi_intermeds + ) + ] } - - if (!params.skip_fastqc) { - process { - withName: '.*:.*:FASTQC_UMITOOLS_FASTP:FASTQC_RAW' { - ext.args = '--quiet' - publishDir = [ - path: { "${params.outdir}/fastqc/raw" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*:.*:FASTQC_UMITOOLS_FASTP:FASTQC_TRIM' { - ext.args = '--quiet' - publishDir = [ - path: { "${params.outdir}/fastqc/trim" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } + withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + path: { "${params.outdir}/umi_dedup/bam_mapped_sorted" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}', + enabled: ( + params.save_umi_intermeds + ) + ] } -} - -if (params.with_umi && !params.skip_umi_extract) { - process { - withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:UMITOOLS_EXTRACT' { - ext.args = [ - params.umitools_extract_method ? "--extract-method=${params.umitools_extract_method}" : '', - params.umitools_bc_pattern ? "--bc-pattern='${params.umitools_bc_pattern}'" : '', - ].join(' ').trim() - publishDir = [ - [ - path: { "${params.outdir}/umitools" }, - mode: params.publish_dir_mode, - pattern: "*.log" - ], - [ - path: { "${params.outdir}/umitools" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz", - enabled: params.save_umi_intermeds - ] - ] - } + withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:.*' { + publishDir = [ + path: { "${params.outdir}/umi_dedup/bam_mapped_sorted" }, + mode: params.publish_dir_mode, + pattern: '*.{stats,flagstat,idxstats}' + ] + } + withName: '.*:DEDUPLICATE_UMIS:UMICOLLAPSE' { + ext.args = { meta.single_end ? "--algo ${params.umitools_method} --two-pass" : "--method ${params.umitools_method} --two-pass --paired --remove-unpaired --remove-chimeric" } + ext.prefix = { "${meta.id}.umi_dedup.sorted" } + publishDir = [ + path: { "${params.outdir}/umi_dedup/bam_deduplicated" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: ( + params.save_umi_intermeds + ) + ] + } + withName: '.*:DEDUPLICATE_UMIS:SAMTOOLS_BAM2FQ' { + publishDir = [ + path: { "${params.outdir}/umi_dedup/fastq_deduplicated" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: ( + params.save_umi_intermeds + ) + ] } -} - -// -// UMI tools deduplication -// - -if (params.with_umi) { - process { - withName: '.*:DEDUPLICATE_UMIS:UMICOLLAPSE' { - ext.args = { meta.single_end ? "--algo ${params.umitools_method} --two-pass" : "--method ${params.umitools_method} --two-pass --paired --remove-unpaired --remove-chimeric" } - ext.prefix = { "${meta.id}.umi_dedup.sorted" } - publishDir = [ - path: { "${params.outdir}/umi_dedup" }, - mode: params.publish_dir_mode, - pattern: '*.bam', - enabled: ( - params.save_umi_intermeds - ) - ] - } - - withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { - ext.prefix = { "${meta.id}.sorted" } - publishDir = [ - path: { "${params.outdir}/umi_dedup" }, - mode: params.publish_dir_mode, - pattern: '*.{bam}', - enabled: ( - params.save_umi_intermeds - ) - ] - } - - withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { - ext.prefix = { "${meta.id}.sorted" } - publishDir = [ - path: { "${params.outdir}/umi_dedup" }, - mode: params.publish_dir_mode, - pattern: '*.{bai,csi}', - enabled: ( - params.save_umi_intermeds - ) - ] - } - withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:.*' { - publishDir = [ - path: { "${params.outdir}/umi_dedup/samtools_stats" }, - mode: params.publish_dir_mode, - pattern: '*.{stats,flagstat,idxstats}' - ] - } + // + // MIRTRACE QC + // + withName: 'MIRTRACE_RUN' { + publishDir = [ + //"mirtrace" already part of the published folder + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } -} -// -// Quantification -// + // + // CONTAMINANT_FILTER + // + withName: 'NFCORE_SMRNASEQ:SMRNASEQ:CONTAMINANT_FILTER:.*' { + publishDir = [ + path: { "${params.outdir}/contaminant_filter/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } -process { + // + // MIRNA_QUANT + // + withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:FORMAT_.*' { + publishDir = [ + path: { "${params.outdir}/mirna_quant/reference" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:PARSE_.*' { - publishDir = [ - path: { "${params.outdir}/genome" }, + publishDir = [ + path: { "${params.outdir}/mirna_quant/reference" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:BAM_STATS_.*:SAMTOOLS_SORT' { + withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:INDEX_MATURE' { + publishDir = [ + path: { "${params.outdir}/bowtie_index/mirna_mature" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:INDEX_HAIRPIN' { + publishDir = [ + path: { "${params.outdir}/bowtie_index/mirna_hairpin" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:BOWTIE_MAP_MATURE' { + publishDir = [ + path: { "${params.outdir}/mirna_quant/bam/mature" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:BAM_STATS_MATURE:.*' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ - path: { "${params.outdir}/samtools" }, + path: { "${params.outdir}/mirna_quant/bam/mature" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:BAM_STATS_.*:BAM_STATS_SAMTOOLS:.*' { + withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:BOWTIE_MAP_HAIRPIN' { + publishDir = [ + path: { "${params.outdir}/mirna_quant/bam/hairpin" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:BAM_STATS_HAIRPIN:.*' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ - path: { "${params.outdir}/samtools/samtools_stats" }, + path: { "${params.outdir}/mirna_quant/bam/hairpin" }, mode: params.publish_dir_mode, - pattern: "*.{stats,flagstat,idxstats}" + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:BOWTIE_MAP_.*' { + withName: 'EDGER_QC' { publishDir = [ - path: { "${params.outdir}/unmapped/fastq" }, + path: { "${params.outdir}/mirna_quant/edger_qc" }, mode: params.publish_dir_mode, - pattern: "unmapped/*.gz" + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: 'SEQCLUSTER_SEQUENCES' { publishDir = [ - path: { "${params.outdir}/seqcluster" }, - enabled: false + path: { "${params.outdir}/mirna_quant/seqcluster" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'MIRTOP_QUANT' { + withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:BOWTIE_MAP_SEQCLUSTER' { publishDir = [ - path: { "${params.outdir}" }, + path: { "${params.outdir}/mirna_quant/bam/seqcluster" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:TABLE_MERGE' { + withName: 'MIRTOP_QUANT' { publishDir = [ - path: { "${params.outdir}/mirtop" }, + //mirtop already part of the output folder + path: { "${params.outdir}/mirna_quant/" }, mode: params.publish_dir_mode, - pattern: "*.tsv" + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'EDGER_QC' { + withName: 'NFCORE_SMRNASEQ:SMRNASEQ:MIRNA_QUANT:TABLE_MERGE' { publishDir = [ - path: { "${params.outdir}/edger" }, + path: { "${params.outdir}/mirna_quant/mirtop" }, mode: params.publish_dir_mode, - enabled: true + pattern: "*.tsv" ] } -} -process { + // + // GENOME_QUANT + // withName: 'NFCORE_SMRNASEQ:SMRNASEQ:GENOME_QUANT:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_.*' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ - path: { "${params.outdir}/samtools" }, + path: { "${params.outdir}/genome_quant/bam" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:GENOME_QUANT:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ - path: { "${params.outdir}/samtools/samtools_stats" }, + path: { "${params.outdir}/genome_quant/bam" }, mode: params.publish_dir_mode, pattern: "*.{stats,flagstat,idxstats}" ] } - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:GENOME_QUANT:BOWTIE_MAP_.*' { + withName: 'NFCORE_SMRNASEQ:SMRNASEQ:GENOME_QUANT:BOWTIE_MAP_GENOME' { publishDir = [ - path: { "${params.outdir}/unmapped/fastq" }, + path: { "${params.outdir}/genome_quant/bam" }, mode: params.publish_dir_mode, - pattern: "unmapped/*.gz" + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } -} -if (!params.skip_mirdeep) { - process { - withName: 'MIRDEEP2_MAPPER' { - publishDir = [ - path: { "${params.outdir}/mirdeep2/mapper" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: 'MIRDEEP2_RUN' { - publishDir = [ - path: { "${params.outdir}/mirdeep2/run" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } -} -if (!params.skip_multiqc) { - process { - withName: 'MULTIQC' { - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' - publishDir = [ - path: { "${params.outdir}/multiqc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + // + // MIRDEEP + // + withName: 'MIRDEEP2_MAPPER' { + publishDir = [ + path: { "${params.outdir}/mirdeep2/mapper" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'MIRDEEP2_RUN' { + publishDir = [ + path: { "${params.outdir}/mirdeep2/run" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } + // + // reports + // + withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + mode: params.publish_dir_mode, + pattern: '*_versions.yml' + ] + } + withName: 'MULTIQC' { + ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } }