diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7d44b9fa4..829555e45 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -111,6 +111,7 @@ jobs: test_virus_identification, test_single_end, test_concoct, + test_longread, ] steps: - name: Free some space diff --git a/CHANGELOG.md b/CHANGELOG.md index ca448d2bd..6840f609a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,10 +30,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#707](https://github.com/nf-core/mag/pull/707) - Make Bin QC a subworkflow (added by @dialvarezs) - [#707](https://github.com/nf-core/mag/pull/707) - Added CheckM2 as an alternative bin completeness and QC tool (added by @dialvarezs) - [#708](https://github.com/nf-core/mag/pull/708) - Added `--exclude_unbins_from_postbinning` parameter to exclude unbinned contigs from post-binning processes, speeding up Prokka in some cases (added by @dialvarezs) +- [#718](https://github.com/nf-core/mag/pull/718) - Added metaMDBG and (meta)Flye as long read assemblers (suggested by ljmesi [and many others] added by @muabnezor) +- [#718](https://github.com/nf-core/mag/pull/718) - Added host removal for long reads using minimap2 as aligner (added by @muabnezor) - [#732](https://github.com/nf-core/mag/pull/732) - Added support for Prokka's compliance mode with `--prokka_with_compliance --prokka_compliance_centre ` (reported by @audy and @Thomieh73, added by @jfy133) ### `Changed` +- [#718](https://github.com/nf-core/mag/pull/718) - Longread only input is now an option (added by @muabnezor) - [#731](https://github.com/nf-core/mag/pull/731) - Updated to nf-core 3.1.0 `TEMPLATE` (by @jfy133) ### `Fixed` @@ -43,16 +46,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#716](https://github.com/nf-core/mag/pull/692) - Make short read processing a subworkflow (added by @muabnezor) - [#708](https://github.com/nf-core/mag/pull/708) - Fixed channel passed as GUNC input (added by @dialvarezs) - [#729](https://github.com/nf-core/mag/pull/729) - Fixed misspecified multi-FASTQ input for single-end data in MEGAHIT (reported by John Richards, fix by @jfy133) +- [#718](https://github.com/nf-core/mag/pull/718) - refactoring assembly steps into subworkflow (added by @muabnezor) ### `Dependencies` -| Tool | Previous version | New version | -| ------- | ---------------- | ----------- | -| CheckM | 1.2.1 | 1.2.3 | -| CheckM2 | | 1.0.2 | -| chopper | | 0.9.0 | -| GUNC | 1.0.5 | 1.0.6 | -| nanoq | | 0.10.0 | +| Tool | Previous version | New version | +| -------- | ---------------- | ----------- | +| chopper | | 0.9.0 | +| nanoq | | 0.10.0 | +| flye | | 2.9.5 | +| metamdbg | | 1.0 | +| minimap2 | | 2.28 | +| CheckM | 1.2.1 | 1.2.3 | +| CheckM2 | | 1.0.2 | +| chopper | | 0.9.0 | +| GUNC | 1.0.5 | 1.0.6 | +| nanoq | | 0.10.0 | ### `Deprecated` diff --git a/CITATIONS.md b/CITATIONS.md index 2feb36934..1b0eb2fa3 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -66,6 +66,10 @@ - [Filtlong](https://github.com/rrwick/Filtlong) +- [Flye](https://www.nature.com/articles/s41592-020-00971-x) + + > Kolmogorov, M., Bickhart, D.M., Behsaz, B. et al. metaFlye: scalable long-read metagenome assembly using repeat graphs. Nat Methods 17, 1103–1110 (2020). doi: 10.1038/s41592-020-00971-x + - [Freebayes](https://arxiv.org/abs/1207.3907) > Garrison E, Marth G. Haplotype-based variant detection from short-read sequencing. arXiv preprint arXiv:1207.3907 [q-bio.GN] 2012 @@ -106,6 +110,14 @@ > Levy Karin, E., Mirdita, M. & Söding, J. MetaEuk—sensitive, high-throughput gene discovery, and annotation for large-scale eukaryotic metagenomics. Microbiome 8, 48 (2020). 10.1186/s40168-020-00808-x +- [metaMDBG](https://doi.org/10.1038/s41587-023-01983-6) + + > Benoit, G., Raguideau, S., James, R. et al. High-quality metagenome assembly from long accurate reads with metaMDBG. Nat Biotechnol 42, 1378–1383 (2024). doi:10.1038/s41587-023-01983-6 + +- [minimap2](https://doi.org/10.1093/bioinformatics/bty191) + + > Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences. Bioinformatics , 34(18), 3094–3100. doi: 10.1093/bioinformatics/bty191 + - [MMseqs2](https://www.nature.com/articles/nbt.3988) > Steinegger, M., Söding, J. MMseqs2 enables sensitive protein sequence searching for the analysis of massive data sets. Nat Biotechnol 35, 1026–1028 (2017).10.1038/nbt.3988 diff --git a/assets/schema_input.json b/assets/schema_input.json index 01b494b59..c3c70ba65 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -42,11 +42,11 @@ "pattern": "^\\S+\\.f(ast)?q\\.gz$" } }, - "required": ["sample", "group", "short_reads_1"] + "required": ["sample", "group"], + "anyOf": [{ "required": ["short_reads_1"] }, { "required": ["long_reads"] }] }, "uniqueEntries": ["sample", "run"], "dependentRequired": { - "short_reads_2": ["short_reads_1"], - "long_reads": ["short_reads_1", "short_reads_2"] + "short_reads_2": ["short_reads_1"] } } diff --git a/conf/modules.config b/conf/modules.config index e14432d63..51b090993 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -179,13 +179,13 @@ process { } withName: FILTLONG { - ext.args = [ + ext.args = { [ "--min_length ${params.longreads_min_length}", "--keep_percent ${params.longreads_keep_percent}", - "--trim", + shortreads ? "--trim" : "", "--length_weight ${params.longreads_length_weight}", - params.longreads_min_quality ? "--min_mean_q ${params.longreads_min_quality}" : '', - ].join(' ').trim() + params.longreads_min_quality ? "--min_mean_q ${params.longreads_min_quality}" : '' + ].join(' ').trim() } publishDir = [ path: { "${params.outdir}/QC_longreads/Filtlong" }, mode: params.publish_dir_mode, @@ -286,6 +286,55 @@ process { ] } + withName: MINIMAP2_HOST_INDEX { + ext.args = params.longread_mapping_mode ? "-x ${params.longread_mapping_mode}" : '-x map-ont' + publishDir = [ + path: { "${params.outdir}/QC_longreads/minimap2/index" }, + mode: params.publish_dir_mode, + pattern: '*.mmi', + ] + } + + withName: MINIMAP2_HOST_ALIGN { + ext.prefix = { "${meta.id}_run${meta.run}_host_minimap" } + publishDir = [ + path: { "${params.outdir}/QC_longreads/minimap2/align}" }, + mode: params.publish_dir_mode, + pattern: "*.bam", + enabled: params.save_hostremoved_reads + ] + } + + withName: MINIMAP2_ASSEMBLY_ALIGN { + ext.prefix = { "${meta.assembler}-${meta.id}-${meta2.id}" } + publishDir = [ + path: { "${params.outdir}/Assembly/${meta2.assembler}/QC/${meta2.id}" }, + mode: params.publish_dir_mode, + pattern: "*.{bam,bai}", + enabled: params.save_assembly_mapped_reads + ] + } + + withName: SAMTOOLS_HOSTREMOVED_UNMAPPED { + ext.args = '-f 4' + ext.prefix = { "${meta.id}_${meta.run}_hostremoved" } + publishDir = [ + path: { "${params.outdir}/QC_longreads/samtools/fastq" }, + mode: params.publish_dir_mode, + pattern: '*_hostremoved.fastq.gz', + enabled: params.save_hostremoved_reads + ] + } + + withName: SAMTOOLS_HOSTREMOVED_STATS { + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/QC_longreads/samtools/stats" }, + mode: params.publish_dir_mode, + pattern: '*stats' + ] + } + withName: CENTRIFUGE_CENTRIFUGE { publishDir = [path: { "${params.outdir}/Taxonomy/centrifuge/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.txt"] } @@ -330,6 +379,17 @@ process { publishDir = [path: { "${params.outdir}/Assembly/SPAdesHybrid" }, mode: params.publish_dir_mode, pattern: "*.{fasta.gz,gfa.gz,fa.gz,log}"] } + withName: FLYE { + ext.args = ' --meta' + ext.prefix = { "FLYE-${meta.id}" } + publishDir = [path: { "${params.outdir}/Assembly/FLYE" }, mode: params.publish_dir_mode, pattern: "*.{fasta.gz,gfa.gz,log}"] + } + + withName: METAMDBG_ASM { + ext.prefix = { "METAMDBG-${meta.id}" } + publishDir = [path: { "${params.outdir}/Assembly/METAMDBG" }, mode: params.publish_dir_mode, pattern: "*.{fasta.gz,log}"] + } + withName: QUAST { publishDir = [path: { "${params.outdir}/Assembly/${meta.assembler}/QC/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }] } @@ -579,7 +639,14 @@ process { ] } - withName: METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS { + withName: METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS_SHORTREAD { + ext.args = params.longread_percentidentity ? "--percentIdentity ${params.shortread_percentidentity}" : '' + publishDir = [path: { "${params.outdir}/GenomeBinning/depths/contigs" }, mode: params.publish_dir_mode, pattern: '*-depth.txt.gz'] + ext.prefix = { "${meta.assembler}-${meta.id}-depth" } + } + + withName: METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS_LONGREAD { + ext.args = params.longread_percentidentity ? "--percentIdentity ${params.longread_percentidentity}" : '' publishDir = [path: { "${params.outdir}/GenomeBinning/depths/contigs" }, mode: params.publish_dir_mode, pattern: '*-depth.txt.gz'] ext.prefix = { "${meta.assembler}-${meta.id}-depth" } } diff --git a/conf/test_longread.config b/conf/test_longread.config new file mode 100644 index 000000000..367aa8ace --- /dev/null +++ b/conf/test_longread.config @@ -0,0 +1,34 @@ +/* +======================================================================================== + Nextflow config file for running minimal tests +======================================================================================== + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/mag -profile test_longread, --outdir + +---------------------------------------------------------------------------------------- +*/ + +// Limit resources so that this can run on GitHub Actions +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = params.pipelines_testdata_base_path + 'mag/samplesheets/samplesheet.long_read.csv' + min_length_unbinned_contigs = 1 + max_unbinned_contigs = 2 + busco_db = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2024-01-08.tar.gz" + skip_gtdbtk = true + gtdbtk_min_completeness = 0.01 + skip_concoct = true +} diff --git a/docs/output.md b/docs/output.md index daece6f6e..24c495735 100644 --- a/docs/output.md +++ b/docs/output.md @@ -263,6 +263,37 @@ SPAdesHybrid is a part of the [SPAdes](http://cab.spbu.ru/software/spades/) soft + + +### Flye + +[Flye](https://github.com/mikolmogorov/Flye) is a _de novo_ assembler for single-molecule sequencing reads, such as those produced by PacBio and Oxford Nanopore Technologies. + +
+Output files + +- `Assembly/FLYE/` + - `[sample/group].assembly_graph.gfa.gz`: Compressed assembly graph in gfa format + - `[sample/group].assembly.fa.gz`: Compressed assembled contigs in fasta format + - `[sample/group].flye.log`: Log file + - `QC/[sample/group]/`: Directory containing QUAST files + +
+ +### metaMDBG + +[metaMDBG](https://github.com/GaetanBenoitDev/metaMDBG) is a fast and low-memory assembler for long and accurate metagenomics reads (e.g. PacBio HiFi, Nanopore r10.4) + +
+Output files + +- `Assembly/METAMDBG/` + - `[sample/group].contigs.fa.gz`: Compressed assembled contigs in fasta format + - `[sample/group].metaMDBG.log`: Log file + - `QC/[sample/group]/`: Directory containing QUAST files + +
+ ### Metagenome QC with QUAST [QUAST](http://cab.spbu.ru/software/quast/) is a tool that evaluates metagenome assemblies by computing various metrics. The QUAST output is also included in the MultiQC report, as well as in the assembly directories themselves. diff --git a/docs/usage.md b/docs/usage.md index cdddf0e08..4516d1693 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -43,6 +43,18 @@ sample2,0,0,data/sample2_R1.fastq.gz,data/sample2_R2.fastq.gz,data/sample2.fastq sample3,1,0,data/sample3_R1.fastq.gz,data/sample3_R2.fastq.gz, ``` +If only long read data is available, the columns `short_reads_1` and `short_reads_2` can be left out: + +```csv title="samplesheet.csv" +sample,run,group,long_reads +sample1,1,0,data/sample1.fastq.gz +sample1,2,0,data/sample1.fastq.gz +sample2,0,0,data/sample2.fastq.gz +sample3,1,0,data/sample3.fastq.gz +``` + +In this case only long-read only assemblies will be able to be executed (e.g. Flye or MetaMDBG). + Please note the following requirements: - a minimum 5 of comma-separated columns @@ -50,7 +62,6 @@ Please note the following requirements: - Must contain the header `sample,group,short_reads_1,short_reads_2,long_reads` (where `run` can be optionally added) - Run IDs must be unique within a multi-run sample. A sample with multiple runs will be automatically concatenated. - FastQ files must be compressed (`.fastq.gz`, `.fq.gz`) -- `long_reads` can only be provided in combination with paired-end short read data - Within one samplesheet either only single-end or only paired-end reads can be specified - If single-end reads are specified, the command line parameter `--single_end` must be specified as well diff --git a/modules.json b/modules.json index 05e3b3dd1..59eba91d7 100644 --- a/modules.json +++ b/modules.json @@ -127,6 +127,11 @@ "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, + "flye": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "freebayes": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", @@ -207,6 +212,21 @@ "git_sha": "30d06da5bd7ae67be32758bf512cd75a4325d386", "installed_by": ["modules"] }, + "metamdbg/asm": { + "branch": "master", + "git_sha": "c912aa8b29a17d6dbec3fdda71185fbe3e77ba59", + "installed_by": ["modules"] + }, + "minimap2/align": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "minimap2/index": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "mmseqs/databases": { "branch": "master", "git_sha": "699e078133f580548aeb43114f93ac29928c6143", @@ -267,6 +287,26 @@ "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", "installed_by": ["modules"] }, + "samtools/fastq": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": ["modules"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": ["modules"] + }, + "samtools/stats": { + "branch": "master", + "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", + "installed_by": ["modules"] + }, + "samtools/view": { + "branch": "master", + "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", + "installed_by": ["modules"] + }, "seqtk/mergepe": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", diff --git a/modules/local/samtools_unmapped.nf b/modules/local/samtools_unmapped.nf new file mode 100644 index 000000000..39dd3e44e --- /dev/null +++ b/modules/local/samtools_unmapped.nf @@ -0,0 +1,57 @@ +process SAMTOOLS_UNMAPPED { + tag "$meta.id" + label 'process_low' + + conda "bioconda::samtools=1.21" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9e/9edc2564215d5cd137a8b25ca8a311600987186d406b092022444adf3c4447f7/data' : + 'community.wave.seqera.io/library/htslib_samtools:1.21--6cb89bfd40cbaabf' }" + + input: + tuple val(meta), path(input), path(index) + + output: + tuple val(meta), path("*hostremoved.fastq.gz"), emit: fastq + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def mapped = "-o ${prefix}_mapped.fastq.gz" + + """ + samtools \\ + view \\ + --threads ${task.cpus-1} \\ + $args \\ + $input \\ + | \\ + samtools \\ + fastq \\ + $args2 \\ + --threads ${task.cpus-1} \\ + -0 ${prefix}.fastq.gz \\ + $mapped + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/flye/environment.yml b/modules/nf-core/flye/environment.yml new file mode 100644 index 000000000..87b97eb89 --- /dev/null +++ b/modules/nf-core/flye/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::flye=2.9.5 diff --git a/modules/nf-core/flye/main.nf b/modules/nf-core/flye/main.nf new file mode 100644 index 000000000..8803655d4 --- /dev/null +++ b/modules/nf-core/flye/main.nf @@ -0,0 +1,68 @@ +process FLYE { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/flye:2.9.5--eb07d7b7094f222c' : + 'community.wave.seqera.io/library/flye:2.9.5--0221998e9c3ec606' }" + + input: + tuple val(meta), path(reads) + val mode + + output: + tuple val(meta), path("*.fasta.gz"), emit: fasta + tuple val(meta), path("*.gfa.gz") , emit: gfa + tuple val(meta), path("*.gv.gz") , emit: gv + tuple val(meta), path("*.txt") , emit: txt + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*.json") , emit: json + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def valid_mode = ["--pacbio-raw", "--pacbio-corr", "--pacbio-hifi", "--nano-raw", "--nano-corr", "--nano-hq"] + if ( !valid_mode.contains(mode) ) { error "Unrecognised mode to run Flye. Options: ${valid_mode.join(', ')}" } + """ + flye \\ + $mode \\ + $reads \\ + --out-dir . \\ + --threads \\ + $task.cpus \\ + $args + + gzip -c assembly.fasta > ${prefix}.assembly.fasta.gz + gzip -c assembly_graph.gfa > ${prefix}.assembly_graph.gfa.gz + gzip -c assembly_graph.gv > ${prefix}.assembly_graph.gv.gz + mv assembly_info.txt ${prefix}.assembly_info.txt + mv flye.log ${prefix}.flye.log + mv params.json ${prefix}.params.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + flye: \$( flye --version ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo stub | gzip -c > ${prefix}.assembly.fasta.gz + echo stub | gzip -c > ${prefix}.assembly_graph.gfa.gz + echo stub | gzip -c > ${prefix}.assembly_graph.gv.gz + echo contig_1 > ${prefix}.assembly_info.txt + echo stub > ${prefix}.flye.log + echo stub > ${prefix}.params.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + flye: \$( flye --version ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/flye/meta.yml b/modules/nf-core/flye/meta.yml new file mode 100644 index 000000000..1e33c275d --- /dev/null +++ b/modules/nf-core/flye/meta.yml @@ -0,0 +1,103 @@ +name: "flye" +description: De novo assembler for single molecule sequencing reads +keywords: + - assembly + - genome + - de novo + - genome assembler + - single molecule +tools: + - "flye": + description: "Fast and accurate de novo assembler for single molecule sequencing + reads" + homepage: "https://github.com/fenderglass/Flye" + documentation: "https://github.com/fenderglass/Flye/blob/flye/docs/USAGE.md" + tool_dev_url: "https://github.com/fenderglass/Flye" + doi: "10.1038/s41592-020-00971-x" + licence: ["BSD-3-clause"] + identifier: biotools:Flye +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reads: + type: file + description: Input reads from Oxford Nanopore or PacBio data in FASTA/FASTQ + format. + pattern: "*.{fasta,fastq,fasta.gz,fastq.gz,fa,fq,fa.gz,fq.gz}" + - - mode: + type: string + description: Flye mode depending on the input data (source and error rate) + pattern: "--pacbio-raw|--pacbio-corr|--pacbio-hifi|--nano-raw|--nano-corr|--nano-hq" +output: + - fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.fasta.gz": + type: file + description: Assembled FASTA file + pattern: "*.fasta.gz" + - gfa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.gfa.gz": + type: file + description: Repeat graph in gfa format + pattern: "*.gfa.gz" + - gv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.gv.gz": + type: file + description: Repeat graph in gv format + pattern: "*.gv.gz" + - txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.txt": + type: file + description: Extra information and statistics about resulting contigs + pattern: "*.txt" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.log": + type: file + description: Flye log file + pattern: "*.log" + - json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.json": + type: file + description: Flye parameters + pattern: "*.json" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@mirpedrol" +maintainers: + - "@mirpedrol" diff --git a/modules/nf-core/flye/tests/main.nf.test b/modules/nf-core/flye/tests/main.nf.test new file mode 100644 index 000000000..afbf926ef --- /dev/null +++ b/modules/nf-core/flye/tests/main.nf.test @@ -0,0 +1,178 @@ +// According to the issue https://github.com/fenderglass/Flye/issues/164 +// Some fluctuations are expected because of the heuristics +// Here we check the that test.assembly_info.txt contains at least contig_1 + +nextflow_process { + + name "Test Process FLYE" + script "../main.nf" + process "FLYE" + tag "flye" + tag "modules" + tag "modules_nfcore" + + test("flye_pacbio_corr") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true) + ] + input[1] = "--pacbio-corr" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.fasta.get(0).get(1)).name, + file(process.out.gfa.get(0).get(1)).name, + file(process.out.gv.get(0).get(1)).name, + file(process.out.log.get(0).get(1)).name, + file(process.out.txt.get(0).get(1)).name, + path(process.out.txt.get(0).get(1)).readLines()[1].contains("contig_1"), + process.out.json, + process.out.versions + ).match() } + ) + } + + } + + test("flye_pacbio_hifi") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true) + ] + input[1] = "--pacbio-hifi" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.fasta.get(0).get(1)).name, + file(process.out.gfa.get(0).get(1)).name, + file(process.out.gv.get(0).get(1)).name, + file(process.out.log.get(0).get(1)).name, + file(process.out.txt.get(0).get(1)).name, + path(process.out.txt.get(0).get(1)).readLines()[1].contains("contig_1"), + process.out.json, + process.out.versions + ).match() } + ) + } + + } + + test("flye_nano_corr") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true) + ] + input[1] = "--nano-corr" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.fasta.get(0).get(1)).name, + file(process.out.gfa.get(0).get(1)).name, + file(process.out.gv.get(0).get(1)).name, + file(process.out.log.get(0).get(1)).name, + file(process.out.txt.get(0).get(1)).name, + path(process.out.txt.get(0).get(1)).readLines()[1].contains("contig_1"), + process.out.json, + process.out.versions + ).match() } + ) + } + + } + + test("flye_nano_hq") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true) + ] + input[1] = "--nano-hq" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.fasta.get(0).get(1)).name, + file(process.out.gfa.get(0).get(1)).name, + file(process.out.gv.get(0).get(1)).name, + file(process.out.log.get(0).get(1)).name, + file(process.out.txt.get(0).get(1)).name, + path(process.out.txt.get(0).get(1)).readLines()[1].contains("contig_1"), + process.out.json, + process.out.versions + ).match() } + ) + } + + } + + test("flye_pacbio_raw - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true) + ] + input[1] = "--pacbio-raw" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + path(process.out.versions.get(0)).yaml + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/flye/tests/main.nf.test.snap b/modules/nf-core/flye/tests/main.nf.test.snap new file mode 100644 index 000000000..7101f9edf --- /dev/null +++ b/modules/nf-core/flye/tests/main.nf.test.snap @@ -0,0 +1,224 @@ +{ + "flye_pacbio_hifi": { + "content": [ + "test.assembly.fasta.gz", + "test.assembly_graph.gfa.gz", + "test.assembly_graph.gv.gz", + "test.flye.log", + "test.assembly_info.txt", + false, + [ + [ + { + "id": "test" + }, + "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" + ] + ], + [ + "versions.yml:md5,80496e451401dbc0269ec404801a90e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T13:41:09.075217" + }, + "flye_pacbio_corr": { + "content": [ + "test.assembly.fasta.gz", + "test.assembly_graph.gfa.gz", + "test.assembly_graph.gv.gz", + "test.flye.log", + "test.assembly_info.txt", + true, + [ + [ + { + "id": "test" + }, + "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" + ] + ], + [ + "versions.yml:md5,80496e451401dbc0269ec404801a90e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T13:33:16.267658" + }, + "flye_nano_corr": { + "content": [ + "test.assembly.fasta.gz", + "test.assembly_graph.gfa.gz", + "test.assembly_graph.gv.gz", + "test.flye.log", + "test.assembly_info.txt", + true, + [ + [ + { + "id": "test" + }, + "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" + ] + ], + [ + "versions.yml:md5,80496e451401dbc0269ec404801a90e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T13:44:28.522592" + }, + "flye_nano_hq": { + "content": [ + "test.assembly.fasta.gz", + "test.assembly_graph.gfa.gz", + "test.assembly_graph.gv.gz", + "test.flye.log", + "test.assembly_info.txt", + true, + [ + [ + { + "id": "test" + }, + "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" + ] + ], + [ + "versions.yml:md5,80496e451401dbc0269ec404801a90e3" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T13:46:35.912198" + }, + "flye_pacbio_raw - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.assembly.fasta.gz:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.assembly_graph.gfa.gz:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.assembly_graph.gv.gz:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.assembly_info.txt:md5,e3aec731279050302fc8d6f126b3030e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.flye.log:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.params.json:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "6": [ + "versions.yml:md5,80496e451401dbc0269ec404801a90e3" + ], + "fasta": [ + [ + { + "id": "test" + }, + "test.assembly.fasta.gz:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "gfa": [ + [ + { + "id": "test" + }, + "test.assembly_graph.gfa.gz:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "gv": [ + [ + { + "id": "test" + }, + "test.assembly_graph.gv.gz:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "json": [ + [ + { + "id": "test" + }, + "test.params.json:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.flye.log:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "txt": [ + [ + { + "id": "test" + }, + "test.assembly_info.txt:md5,e3aec731279050302fc8d6f126b3030e" + ] + ], + "versions": [ + "versions.yml:md5,80496e451401dbc0269ec404801a90e3" + ] + }, + { + "FLYE": { + "flye": "2.9.5-b1801" + } + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-12T09:07:05.234775" + } +} \ No newline at end of file diff --git a/modules/nf-core/flye/tests/tags.yml b/modules/nf-core/flye/tests/tags.yml new file mode 100644 index 000000000..31103d137 --- /dev/null +++ b/modules/nf-core/flye/tests/tags.yml @@ -0,0 +1,2 @@ +flye: + - modules/nf-core/flye/** diff --git a/modules/nf-core/metamdbg/asm/environment.yml b/modules/nf-core/metamdbg/asm/environment.yml new file mode 100644 index 000000000..5641ddf77 --- /dev/null +++ b/modules/nf-core/metamdbg/asm/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::metamdbg=1.1 diff --git a/modules/nf-core/metamdbg/asm/main.nf b/modules/nf-core/metamdbg/asm/main.nf new file mode 100644 index 000000000..c258a58e1 --- /dev/null +++ b/modules/nf-core/metamdbg/asm/main.nf @@ -0,0 +1,58 @@ +process METAMDBG_ASM { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/metamdbg:1.1--h077b44d_1': + 'biocontainers/metamdbg:1.1--h077b44d_1' }" + + input: + tuple val(meta), path(reads) + val(input_type) + + output: + tuple val(meta), path("*.contigs.fasta.gz"), emit: contigs + tuple val(meta), path("*.metaMDBG.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if(!(input_type in ["hifi", "ont"])) { + error("ERROR: input_type must be one of either 'hifi' or 'ont'.") + } + """ + metaMDBG asm \\ + --threads ${task.cpus} \\ + --out-dir . \\ + ${args} \\ + --in-${input_type} ${reads} + + rm -r tmp/ + + mv contigs.fasta.gz ${prefix}.contigs.fasta.gz + mv metaMDBG.log ${prefix}.metaMDBG.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + metamdbg: \$(metaMDBG | grep "Version" | sed 's/ Version: //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.metaMDBG.log + touch ${prefix}.contigs.fasta.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + metamdbg: \$(metaMDBG | grep "Version" | sed 's/ Version: //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/metamdbg/asm/meta.yml b/modules/nf-core/metamdbg/asm/meta.yml new file mode 100644 index 000000000..2bf7f17b7 --- /dev/null +++ b/modules/nf-core/metamdbg/asm/meta.yml @@ -0,0 +1,67 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "METAMDBG_ASM" +description: Metagenome assembler for long-read sequences (HiFi and ONT). +keywords: + - assembly + - long reads + - metagenome + - metagenome assembler +tools: + - "metamdbg": + description: "MetaMDBG: a lightweight assembler for long and accurate metagenomics + reads." + homepage: "https://github.com/GaetanBenoitDev/metaMDBG" + documentation: "https://github.com/GaetanBenoitDev/metaMDBG" + tool_dev_url: "https://github.com/GaetanBenoitDev/metaMDBG" + doi: "10.1038/s41587-023-01983-6" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - reads: + type: file + description: Long read sequence data from ONT or HiFi in fasta format (can be + gzipped) + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + + - - input_type: + type: string + description: Sequencing technology for reads - either "hifi" for PacBio HiFi + reads or "ont" for Oxford Nanopore reads. + +output: + - contigs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.contigs.fasta.gz": + type: file + description: | + Gzipped fasta file containing the assembled contigs from the input + reads. + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.metaMDBG.log": + type: file + description: Log file describing the metaMDBG run. + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@prototaxites" +maintainers: + - "@prototaxites" diff --git a/modules/nf-core/metamdbg/asm/tests/main.nf.test b/modules/nf-core/metamdbg/asm/tests/main.nf.test new file mode 100644 index 000000000..6b8d59f1f --- /dev/null +++ b/modules/nf-core/metamdbg/asm/tests/main.nf.test @@ -0,0 +1,116 @@ +nextflow_process { + + name "Test Process METAMDBG_ASM" + script "../main.nf" + process "METAMDBG_ASM" + + tag "modules" + tag "modules_nfcore" + tag "metamdbg" + tag "metamdbg/asm" + + test("metamdbg_asm - ont") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/nanopore/fastq/test.fastq.gz', checkIfExists: true), + ] + input[1] = "ont" + """ + } + } + + then { + assertAll( + { assert process.success }, + // output is stochastic - contig names differ per run + // log file contains nextflow work dir paths + { assert snapshot( + file(process.out.contigs[0][1]).name, + file(process.out.log[0][1]).name, + process.out.versions + ).match() }, + { assert path(process.out.log[0][1]).readLines().last().contains("Done!") } + ) + } + } + test("metamdbg_asm - hifi") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true), + ] + input[1] = "hifi" + """ + } + } + + then { + assertAll( + { assert process.success }, + // output is stochastic - contig names differ per run + // log file contains nextflow work dir paths + { assert snapshot( + file(process.out.contigs[0][1]).name, + file(process.out.log[0][1]).name, + process.out.versions + ).match() }, + { assert path(process.out.log[0][1]).readLines().last().contains("Done!") } + ) + } + } + + test("metamdbg_asm - wrong format") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true), + ] + input[1] = "wrong" + """ + } + } + + then { + assertAll( + { assert process.failed }, + { assert process.errorReport.contains("ERROR: input_type must be one of either 'hifi' or 'ont'.") } + ) + } + + } + + test("metamdbg_asm - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true), + ] + input[1] = "hifi" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("stub_versions") } + ) + } + + } +} diff --git a/modules/nf-core/metamdbg/asm/tests/main.nf.test.snap b/modules/nf-core/metamdbg/asm/tests/main.nf.test.snap new file mode 100644 index 000000000..12a192d71 --- /dev/null +++ b/modules/nf-core/metamdbg/asm/tests/main.nf.test.snap @@ -0,0 +1,42 @@ +{ + "metamdbg_asm - hifi": { + "content": [ + "test.contigs.fasta.gz", + "test.metaMDBG.log", + [ + "versions.yml:md5,7891f9a1057e30846f3f7ec4ab0c7b4b" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-17T10:23:06.500307496" + }, + "metamdbg_asm - ont": { + "content": [ + "test.contigs.fasta.gz", + "test.metaMDBG.log", + [ + "versions.yml:md5,7891f9a1057e30846f3f7ec4ab0c7b4b" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-17T10:22:42.120580907" + }, + "stub_versions": { + "content": [ + [ + "versions.yml:md5,7891f9a1057e30846f3f7ec4ab0c7b4b" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-17T10:23:22.954484953" + } +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml new file mode 100644 index 000000000..dc6476b72 --- /dev/null +++ b/modules/nf-core/minimap2/align/environment.yml @@ -0,0 +1,8 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.20 + - bioconda::minimap2=2.28 + - bioconda::samtools=1.20 diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf new file mode 100644 index 000000000..d82dc14d9 --- /dev/null +++ b/modules/nf-core/minimap2/align/main.nf @@ -0,0 +1,78 @@ +process MINIMAP2_ALIGN { + tag "$meta.id" + label 'process_high' + + // Note: the versions here need to match the versions used in the mulled container below and minimap2/index + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' : + 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(reference) + val bam_format + val bam_index_extension + val cigar_paf_format + val cigar_bam + + output: + tuple val(meta), path("*.paf") , optional: true, emit: paf + tuple val(meta), path("*.bam") , optional: true, emit: bam + tuple val(meta), path("*.bam.${bam_index_extension}"), optional: true, emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def args4 = task.ext.args4 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def bam_index = bam_index_extension ? "${prefix}.bam##idx##${prefix}.bam.${bam_index_extension} --write-index" : "${prefix}.bam" + def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus-1} -o ${bam_index} ${args2}" : "-o ${prefix}.paf" + def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' + def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' + def bam_input = "${reads.extension}".matches('sam|bam|cram') + def samtools_reset_fastq = bam_input ? "samtools reset --threads ${task.cpus-1} $args3 $reads | samtools fastq --threads ${task.cpus-1} $args4 |" : '' + def query = bam_input ? "-" : reads + def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) + + """ + $samtools_reset_fastq \\ + minimap2 \\ + $args \\ + -t $task.cpus \\ + $target \\ + $query \\ + $cigar_paf \\ + $set_cigar_bam \\ + $bam_output + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf" + def bam_index = bam_index_extension ? "touch ${prefix}.bam.${bam_index_extension}" : "" + def bam_input = "${reads.extension}".matches('sam|bam|cram') + def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) + + """ + touch $output_file + ${bam_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml new file mode 100644 index 000000000..a4cfc891a --- /dev/null +++ b/modules/nf-core/minimap2/align/meta.yml @@ -0,0 +1,99 @@ +name: minimap2_align +description: A versatile pairwise aligner for genomic and spliced nucleotide sequences +keywords: + - align + - fasta + - fastq + - genome + - paf + - reference +tools: + - minimap2: + description: | + A versatile pairwise aligner for genomic and spliced nucleotide sequences. + homepage: https://github.com/lh3/minimap2 + documentation: https://github.com/lh3/minimap2#uguide + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FASTA or FASTQ files of size 1 and 2 for single-end + and paired-end data, respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_ref'] + - reference: + type: file + description: | + Reference database in FASTA format. + - - bam_format: + type: boolean + description: Specify that output should be in BAM format + - - bam_index_extension: + type: string + description: BAM alignment index extension (e.g. "bai") + - - cigar_paf_format: + type: boolean + description: Specify that output CIGAR should be in PAF format + - - cigar_bam: + type: boolean + description: | + Write CIGAR with >65535 ops at the CG tag. This is recommended when + doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) +output: + - paf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.paf": + type: file + description: Alignment in PAF format + pattern: "*.paf" + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Alignment in BAM format + pattern: "*.bam" + - index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam.${bam_index_extension}": + type: file + description: BAM alignment index + pattern: "*.bam.*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@heuermh" + - "@sofstam" + - "@sateeshperi" + - "@jfy133" + - "@fellen31" +maintainers: + - "@heuermh" + - "@sofstam" + - "@sateeshperi" + - "@jfy133" + - "@fellen31" diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test new file mode 100644 index 000000000..4072c1719 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/main.nf.test @@ -0,0 +1,441 @@ +nextflow_process { + + name "Test Process MINIMAP2_ALIGN" + script "../main.nf" + process "MINIMAP2_ALIGN" + + tag "modules" + tag "modules_nfcore" + tag "minimap2" + tag "minimap2/align" + + test("sarscov2 - fastq, fasta, true, [], false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, 'bai', false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], fasta, true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, [], true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, [], false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, 'bai', false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, [], true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.failed } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, 'bai', false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, false, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = false + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, 'bai', false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, [], true, false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.failed } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap new file mode 100644 index 000000000..12264a857 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -0,0 +1,476 @@ +{ + "sarscov2 - bam, fasta, true, 'bai', false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + ], + "5d426b9a5f5b2c54f1d7f1e4c238ae94", + "test.bam.bai", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:03:00.827260362" + }, + "sarscov2 - bam, fasta, true, 'bai', false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:21:37.92353539" + }, + "sarscov2 - fastq, fasta, true, 'bai', false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:29:44.669021368" + }, + "sarscov2 - fastq, fasta, false, [], false, false - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + + ], + "index": [ + + ], + "paf": [ + [ + { + "id": "test", + "single_end": true + }, + "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:15:52.738781039" + }, + "sarscov2 - fastq, fasta, true, [], false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:15:23.033808223" + }, + "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "1bc392244f228bf52cf0b5a8f6a654c9", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:18:18.964586894" + }, + "sarscov2 - fastq, fasta, true, [], false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "f194745c0ccfcb2a9c0aee094a08750", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:17:48.667488325" + }, + "sarscov2 - fastq, fasta, true, 'bai', false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + ], + "f194745c0ccfcb2a9c0aee094a08750", + "test.bam.bai", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:18:02.517416733" + }, + "sarscov2 - bam, fasta, true, [], false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "5d426b9a5f5b2c54f1d7f1e4c238ae94", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:02:49.64829488" + }, + "sarscov2 - bam, fasta, true, [], false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:21:22.162291795" + }, + "sarscov2 - fastq, [], true, false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:ERR5069949.2151832\tLN:150", + "@SQ\tSN:ERR5069949.576388\tLN:77", + "@SQ\tSN:ERR5069949.501486\tLN:146", + "@SQ\tSN:ERR5069949.1331889\tLN:132", + "@SQ\tSN:ERR5069949.2161340\tLN:80", + "@SQ\tSN:ERR5069949.973930\tLN:79", + "@SQ\tSN:ERR5069949.2417063\tLN:150", + "@SQ\tSN:ERR5069949.376959\tLN:151", + "@SQ\tSN:ERR5069949.1088785\tLN:149", + "@SQ\tSN:ERR5069949.1066259\tLN:147", + "@SQ\tSN:ERR5069949.2832676\tLN:139", + "@SQ\tSN:ERR5069949.2953930\tLN:151", + "@SQ\tSN:ERR5069949.324865\tLN:151", + "@SQ\tSN:ERR5069949.2185111\tLN:150", + "@SQ\tSN:ERR5069949.937422\tLN:151", + "@SQ\tSN:ERR5069949.2431709\tLN:150", + "@SQ\tSN:ERR5069949.1246538\tLN:148", + "@SQ\tSN:ERR5069949.1189252\tLN:98", + "@SQ\tSN:ERR5069949.2216307\tLN:147", + "@SQ\tSN:ERR5069949.3273002\tLN:148", + "@SQ\tSN:ERR5069949.3277445\tLN:151", + "@SQ\tSN:ERR5069949.3022231\tLN:147", + "@SQ\tSN:ERR5069949.184542\tLN:151", + "@SQ\tSN:ERR5069949.540529\tLN:149", + "@SQ\tSN:ERR5069949.686090\tLN:150", + "@SQ\tSN:ERR5069949.2787556\tLN:106", + "@SQ\tSN:ERR5069949.2650879\tLN:150", + "@SQ\tSN:ERR5069949.2064910\tLN:149", + "@SQ\tSN:ERR5069949.2328704\tLN:150", + "@SQ\tSN:ERR5069949.1067032\tLN:150", + "@SQ\tSN:ERR5069949.3338256\tLN:151", + "@SQ\tSN:ERR5069949.1412839\tLN:147", + "@SQ\tSN:ERR5069949.1538968\tLN:150", + "@SQ\tSN:ERR5069949.147998\tLN:94", + "@SQ\tSN:ERR5069949.366975\tLN:106", + "@SQ\tSN:ERR5069949.1372331\tLN:151", + "@SQ\tSN:ERR5069949.1709367\tLN:129", + "@SQ\tSN:ERR5069949.2388984\tLN:150", + "@SQ\tSN:ERR5069949.1132353\tLN:150", + "@SQ\tSN:ERR5069949.1151736\tLN:151", + "@SQ\tSN:ERR5069949.479807\tLN:150", + "@SQ\tSN:ERR5069949.2176303\tLN:151", + "@SQ\tSN:ERR5069949.2772897\tLN:151", + "@SQ\tSN:ERR5069949.1020777\tLN:122", + "@SQ\tSN:ERR5069949.465452\tLN:151", + "@SQ\tSN:ERR5069949.1704586\tLN:149", + "@SQ\tSN:ERR5069949.1258508\tLN:151", + "@SQ\tSN:ERR5069949.986441\tLN:119", + "@SQ\tSN:ERR5069949.2674295\tLN:148", + "@SQ\tSN:ERR5069949.885966\tLN:79", + "@SQ\tSN:ERR5069949.2342766\tLN:151", + "@SQ\tSN:ERR5069949.3122970\tLN:127", + "@SQ\tSN:ERR5069949.3279513\tLN:72", + "@SQ\tSN:ERR5069949.309410\tLN:151", + "@SQ\tSN:ERR5069949.532979\tLN:149", + "@SQ\tSN:ERR5069949.2888794\tLN:151", + "@SQ\tSN:ERR5069949.2205229\tLN:150", + "@SQ\tSN:ERR5069949.786562\tLN:151", + "@SQ\tSN:ERR5069949.919671\tLN:151", + "@SQ\tSN:ERR5069949.1328186\tLN:151", + "@SQ\tSN:ERR5069949.870926\tLN:149", + "@SQ\tSN:ERR5069949.2257580\tLN:151", + "@SQ\tSN:ERR5069949.3249622\tLN:77", + "@SQ\tSN:ERR5069949.611123\tLN:125", + "@SQ\tSN:ERR5069949.651338\tLN:142", + "@SQ\tSN:ERR5069949.169513\tLN:92", + "@SQ\tSN:ERR5069949.155944\tLN:150", + "@SQ\tSN:ERR5069949.2033605\tLN:150", + "@SQ\tSN:ERR5069949.2730382\tLN:142", + "@SQ\tSN:ERR5069949.2125592\tLN:150", + "@SQ\tSN:ERR5069949.1062611\tLN:151", + "@SQ\tSN:ERR5069949.1778133\tLN:151", + "@SQ\tSN:ERR5069949.3057020\tLN:95", + "@SQ\tSN:ERR5069949.2972968\tLN:141", + "@SQ\tSN:ERR5069949.2734474\tLN:149", + "@SQ\tSN:ERR5069949.856527\tLN:151", + "@SQ\tSN:ERR5069949.2098070\tLN:151", + "@SQ\tSN:ERR5069949.1552198\tLN:150", + "@SQ\tSN:ERR5069949.2385514\tLN:150", + "@SQ\tSN:ERR5069949.2270078\tLN:151", + "@SQ\tSN:ERR5069949.114870\tLN:150", + "@SQ\tSN:ERR5069949.2668880\tLN:147", + "@SQ\tSN:ERR5069949.257821\tLN:139", + "@SQ\tSN:ERR5069949.2243023\tLN:150", + "@SQ\tSN:ERR5069949.2605155\tLN:146", + "@SQ\tSN:ERR5069949.1340552\tLN:151", + "@SQ\tSN:ERR5069949.1561137\tLN:150", + "@SQ\tSN:ERR5069949.2361683\tLN:149", + "@SQ\tSN:ERR5069949.2521353\tLN:150", + "@SQ\tSN:ERR5069949.1261808\tLN:149", + "@SQ\tSN:ERR5069949.2734873\tLN:98", + "@SQ\tSN:ERR5069949.3017828\tLN:107", + "@SQ\tSN:ERR5069949.573706\tLN:150", + "@SQ\tSN:ERR5069949.1980512\tLN:151", + "@SQ\tSN:ERR5069949.1014693\tLN:150", + "@SQ\tSN:ERR5069949.3184655\tLN:150", + "@SQ\tSN:ERR5069949.29668\tLN:89", + "@SQ\tSN:ERR5069949.3258358\tLN:151", + "@SQ\tSN:ERR5069949.1476386\tLN:151", + "@SQ\tSN:ERR5069949.2415814\tLN:150", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "16c1c651f8ec67383bcdee3c55aed94f", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:18:34.246998277" + } +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/tests/tags.yml b/modules/nf-core/minimap2/align/tests/tags.yml new file mode 100644 index 000000000..39dba3744 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/tags.yml @@ -0,0 +1,2 @@ +minimap2/align: + - "modules/nf-core/minimap2/align/**" diff --git a/modules/nf-core/minimap2/index/environment.yml b/modules/nf-core/minimap2/index/environment.yml new file mode 100644 index 000000000..d1c1b471b --- /dev/null +++ b/modules/nf-core/minimap2/index/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::minimap2=2.28 diff --git a/modules/nf-core/minimap2/index/main.nf b/modules/nf-core/minimap2/index/main.nf new file mode 100644 index 000000000..383202142 --- /dev/null +++ b/modules/nf-core/minimap2/index/main.nf @@ -0,0 +1,44 @@ +process MINIMAP2_INDEX { + label 'process_low' + + // Note: the versions here need to match the versions used in minimap2/align + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/minimap2:2.28--he4a0461_0' : + 'biocontainers/minimap2:2.28--he4a0461_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.mmi"), emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + minimap2 \\ + -t $task.cpus \\ + -d ${fasta.baseName}.mmi \\ + $args \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ + + stub: + """ + touch ${fasta.baseName}.mmi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/minimap2/index/meta.yml b/modules/nf-core/minimap2/index/meta.yml new file mode 100644 index 000000000..57c80e29f --- /dev/null +++ b/modules/nf-core/minimap2/index/meta.yml @@ -0,0 +1,46 @@ +name: minimap2_index +description: Provides fasta index required by minimap2 alignment. +keywords: + - index + - fasta + - reference +tools: + - minimap2: + description: | + A versatile pairwise aligner for genomic and spliced nucleotide sequences. + homepage: https://github.com/lh3/minimap2 + documentation: https://github.com/lh3/minimap2#uguide + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: | + Reference database in FASTA format. +output: + - index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.mmi": + type: file + description: Minimap2 fasta index. + pattern: "*.mmi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@yuukiiwa" + - "@drpatelh" +maintainers: + - "@yuukiiwa" + - "@drpatelh" diff --git a/modules/nf-core/minimap2/index/tests/main.nf.test b/modules/nf-core/minimap2/index/tests/main.nf.test new file mode 100644 index 000000000..97840ff75 --- /dev/null +++ b/modules/nf-core/minimap2/index/tests/main.nf.test @@ -0,0 +1,32 @@ +nextflow_process { + + name "Test Process MINIMAP2_INDEX" + script "../main.nf" + process "MINIMAP2_INDEX" + + tag "modules" + tag "modules_nfcore" + tag "minimap2" + tag "minimap2/index" + + test("minimap2 index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/index/tests/main.nf.test.snap b/modules/nf-core/minimap2/index/tests/main.nf.test.snap new file mode 100644 index 000000000..0b0988283 --- /dev/null +++ b/modules/nf-core/minimap2/index/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test_ref" + }, + "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" + ] + ], + "1": [ + "versions.yml:md5,0fced0ee8015e7f50b82566e3db8f7b0" + ], + "index": [ + [ + { + "id": "test_ref" + }, + "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" + ] + ], + "versions": [ + "versions.yml:md5,0fced0ee8015e7f50b82566e3db8f7b0" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T11:46:30.000058092" + }, + "minimap2 index": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" + ] + ], + "1": [ + "versions.yml:md5,2f8340380c6741e9261a284262a90bde" + ], + "index": [ + [ + { + "id": "test" + }, + "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" + ] + ], + "versions": [ + "versions.yml:md5,2f8340380c6741e9261a284262a90bde" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-05T10:58:29.828187662" + } +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/index/tests/tags.yml b/modules/nf-core/minimap2/index/tests/tags.yml new file mode 100644 index 000000000..e5ef8e19f --- /dev/null +++ b/modules/nf-core/minimap2/index/tests/tags.yml @@ -0,0 +1,2 @@ +minimap2/index: + - modules/nf-core/minimap2/index/** diff --git a/modules/nf-core/samtools/fastq/environment.yml b/modules/nf-core/samtools/fastq/environment.yml new file mode 100644 index 000000000..62054fc97 --- /dev/null +++ b/modules/nf-core/samtools/fastq/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/fastq/main.nf b/modules/nf-core/samtools/fastq/main.nf new file mode 100644 index 000000000..136744d5a --- /dev/null +++ b/modules/nf-core/samtools/fastq/main.nf @@ -0,0 +1,44 @@ +process SAMTOOLS_FASTQ { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input) + val(interleave) + + output: + tuple val(meta), path("*_{1,2}.fastq.gz") , optional:true, emit: fastq + tuple val(meta), path("*_interleaved.fastq") , optional:true, emit: interleaved + tuple val(meta), path("*_singleton.fastq.gz") , optional:true, emit: singleton + tuple val(meta), path("*_other.fastq.gz") , optional:true, emit: other + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def output = ( interleave && ! meta.single_end ) ? "> ${prefix}_interleaved.fastq" : + meta.single_end ? "-1 ${prefix}_1.fastq.gz -s ${prefix}_singleton.fastq.gz" : + "-1 ${prefix}_1.fastq.gz -2 ${prefix}_2.fastq.gz -s ${prefix}_singleton.fastq.gz" + """ + samtools \\ + fastq \\ + $args \\ + --threads ${task.cpus-1} \\ + -0 ${prefix}_other.fastq.gz \\ + $input \\ + $output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/fastq/meta.yml b/modules/nf-core/samtools/fastq/meta.yml new file mode 100644 index 000000000..c15a0b6f8 --- /dev/null +++ b/modules/nf-core/samtools/fastq/meta.yml @@ -0,0 +1,86 @@ +name: samtools_fastq +description: Converts a SAM/BAM/CRAM file to FASTQ +keywords: + - bam + - sam + - cram + - fastq +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - - interleave: + type: boolean + description: Set true for interleaved fastq file +output: + - fastq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_{1,2}.fastq.gz": + type: file + description: Compressed FASTQ file(s) with reads with either the READ1 or READ2 + flag set in separate files. + pattern: "*_{1,2}.fastq.gz" + - interleaved: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_interleaved.fastq": + type: file + description: Compressed FASTQ file with reads with either the READ1 or READ2 + flag set in a combined file. Needs collated input file. + pattern: "*_interleaved.fastq.gz" + - singleton: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_singleton.fastq.gz": + type: file + description: Compressed FASTQ file with singleton reads + pattern: "*_singleton.fastq.gz" + - other: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_other.fastq.gz": + type: file + description: Compressed FASTQ file with reads with either both READ1 and READ2 + flags set or unset + pattern: "*_other.fastq.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@priyanka-surana" + - "@suzannejin" +maintainers: + - "@priyanka-surana" + - "@suzannejin" diff --git a/modules/nf-core/samtools/fastq/tests/main.nf.test b/modules/nf-core/samtools/fastq/tests/main.nf.test new file mode 100644 index 000000000..f6ac1123f --- /dev/null +++ b/modules/nf-core/samtools/fastq/tests/main.nf.test @@ -0,0 +1,67 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FASTQ" + script "../main.nf" + process "SAMTOOLS_FASTQ" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/fastq" + + test("bam") { + + when { + process { + """ + interleave = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = interleave + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.fastq[0][1].collect { path(it).linesGzip[0..6] }).match("bam_fastq") }, + { assert snapshot(process.out.interleaved).match("bam_interleaved") }, + { assert snapshot(file(process.out.singleton[0][1]).name).match("bam_singleton") }, + { assert snapshot(file(process.out.other[0][1]).name).match("bam_other") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("bam_interleave") { + + when { + process { + """ + interleave = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = interleave + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.fastq).match("bam_interleave_fastq") }, + { assert snapshot(path(process.out.interleaved[0][1]).readLines()[0..6]).match("bam_interlinterleave_eaved") }, + { assert snapshot(process.out.singleton).match("bam_singinterleave_leton") }, + { assert snapshot(file(process.out.other[0][1]).name).match("bam_interleave_other") }, + { assert snapshot(process.out.versions).match("bam_verinterleave_sions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/fastq/tests/main.nf.test.snap b/modules/nf-core/samtools/fastq/tests/main.nf.test.snap new file mode 100644 index 000000000..10e5cd3d7 --- /dev/null +++ b/modules/nf-core/samtools/fastq/tests/main.nf.test.snap @@ -0,0 +1,139 @@ +{ + "bam_interlinterleave_eaved": { + "content": [ + [ + "@ERR5069949.2151832/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "+", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" + """ + touch ${input}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 000000000..db8df0d50 --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,71 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: input file +output: + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: CSI index file + pattern: "*.{csi}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 000000000..0ed260efa --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 000000000..ca34fb5cd --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,140 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("bai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi") { + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("bai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi - stub") { + options "-stub" + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 000000000..72d65e81a --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,250 @@ +{ + "csi - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:25.261127166" + }, + "crai - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:12.653194876" + }, + "bai - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:01.854932651" + }, + "csi": { + "content": [ + "test.paired_end.sorted.bam.csi", + [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:51.485364222" + }, + "crai": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:40.518873972" + }, + "bai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:21.184050361" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml new file mode 100644 index 000000000..e0f58a7a3 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/index: + - modules/nf-core/samtools/index/** diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml new file mode 100644 index 000000000..62054fc97 --- /dev/null +++ b/modules/nf-core/samtools/stats/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf new file mode 100644 index 000000000..4443948b7 --- /dev/null +++ b/modules/nf-core/samtools/stats/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input), path(input_index) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.stats"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + stats \\ + --threads ${task.cpus} \\ + ${reference} \\ + ${input} \\ + > ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml new file mode 100644 index 000000000..77b020f76 --- /dev/null +++ b/modules/nf-core/samtools/stats/meta.yml @@ -0,0 +1,66 @@ +name: samtools_stats +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" +output: + - stats: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.stats": + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test b/modules/nf-core/samtools/stats/tests/main.nf.test new file mode 100644 index 000000000..5bc893095 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test @@ -0,0 +1,113 @@ +nextflow_process { + + name "Test Process SAMTOOLS_STATS" + script "../main.nf" + process "SAMTOOLS_STATS" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/stats" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("cram") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("cram - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } +} diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/modules/nf-core/samtools/stats/tests/main.nf.test.snap new file mode 100644 index 000000000..df507be7a --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test.snap @@ -0,0 +1,142 @@ +{ + "cram": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,a27fe55e49a341f92379bb20a65c6a06" + ] + ], + "1": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,a27fe55e49a341f92379bb20a65c6a06" + ] + ], + "versions": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:29:16.767396182" + }, + "bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:29:29.721580274" + }, + "cram - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:29:53.567964304" + }, + "bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d53a2584376d78942839e9933a34d11b" + ] + ], + "1": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d53a2584376d78942839e9933a34d11b" + ] + ], + "versions": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:28:50.73610604" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/stats/tests/tags.yml b/modules/nf-core/samtools/stats/tests/tags.yml new file mode 100644 index 000000000..7c28e30f3 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/stats: + - modules/nf-core/samtools/stats/** diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml new file mode 100644 index 000000000..02cda6e6a --- /dev/null +++ b/modules/nf-core/samtools/view/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.21 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf new file mode 100644 index 000000000..a6941e638 --- /dev/null +++ b/modules/nf-core/samtools/view/main.nf @@ -0,0 +1,77 @@ +process SAMTOOLS_VIEW { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9e/9edc2564215d5cd137a8b25ca8a311600987186d406b092022444adf3c4447f7/data' : + 'community.wave.seqera.io/library/htslib_samtools:1.21--6cb89bfd40cbaabf' }" + + input: + tuple val(meta), path(input), path(index) + tuple val(meta2), path(fasta) + path qname + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true + tuple val(meta), path("${prefix}.${file_type}.bai"), emit: bai, optional: true + tuple val(meta), path("${prefix}.${file_type}.csi"), emit: csi, optional: true + tuple val(meta), path("${prefix}.${file_type}.crai"), emit: crai, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}"), emit: unselected, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}.{bai,csi,crsi}"), emit: unselected_index, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + readnames = qname ? "--qname-file ${qname} --output-unselected ${prefix}.unselected.${file_type}": "" + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools \\ + view \\ + --threads ${task.cpus-1} \\ + ${reference} \\ + ${readnames} \\ + $args \\ + -o ${prefix}.${file_type} \\ + $input \\ + $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + index = args.contains("--write-index") ? "touch ${prefix}.${file_type}.csi" : "" + + """ + touch ${prefix}.${file_type} + ${index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml new file mode 100644 index 000000000..caa7b0150 --- /dev/null +++ b/modules/nf-core/samtools/view/meta.yml @@ -0,0 +1,141 @@ +name: samtools_view +description: filter/convert SAM/BAM/CRAM file +keywords: + - view + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index: + type: file + description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) + pattern: "*.{.bai,.csi,.crai}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - - qname: + type: file + description: Optional file with read names to output only select alignments + pattern: "*.{txt,list}" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: optional filtered/converted BAM file + pattern: "*.{bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: optional filtered/converted CRAM file + pattern: "*.{cram}" + - sam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.sam: + type: file + description: optional filtered/converted SAM file + pattern: "*.{sam}" + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.bai: + type: file + description: optional BAM file index + pattern: "*.{bai}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.csi: + type: file + description: optional tabix BAM file index + pattern: "*.{csi}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.crai: + type: file + description: optional CRAM file index + pattern: "*.{crai}" + - unselected: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}: + type: file + description: optional file with unselected alignments + pattern: "*.unselected.{bam,cram,sam}" + - unselected_index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}.{bai,csi,crsi}: + type: file + description: index for the "unselected" file + pattern: "*.unselected.{bai,csi,crai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" +maintainers: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/modules/nf-core/samtools/view/tests/bam.config b/modules/nf-core/samtools/view/tests/bam.config new file mode 100644 index 000000000..c10d10811 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/bam_index.config b/modules/nf-core/samtools/view/tests/bam_index.config new file mode 100644 index 000000000..771ae033a --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam_index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam --write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/main.nf.test b/modules/nf-core/samtools/view/tests/main.nf.test new file mode 100644 index 000000000..37b81a916 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test @@ -0,0 +1,214 @@ +nextflow_process { + + name "Test Process SAMTOOLS_VIEW" + script "../main.nf" + process "SAMTOOLS_VIEW" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/view" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.bai).match("bam_bai") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.sam).match("bam_sam") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("cram") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("cram_cram") }, + { assert snapshot(process.out.bai).match("cram_bai") }, + { assert snapshot(process.out.bam).match("cram_bam") }, + { assert snapshot(process.out.crai).match("cram_crai") }, + { assert snapshot(process.out.csi).match("cram_csi") }, + { assert snapshot(process.out.sam).match("cram_sam") }, + { assert snapshot(process.out.versions).match("cram_versions") } + ) + } + } + + test("cram_to_bam") { + + config "./bam.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_bam") }, + { assert snapshot(process.out.bai).match("cram_to_bam_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_cram") }, + { assert snapshot(process.out.csi).match("cram_to_bam_csi") }, + { assert snapshot(process.out.sam).match("cram_to_bam_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_versions") } + ) + } + } + + test("cram_to_bam_index") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_versions") } + ) + } + } + + test("cram_to_bam_index_qname") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of("testN:2817", "testN:2814").collectFile(name: "readnames.list", newLine: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_qname_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_qname_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_qname_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_qname_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_qname_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_qname_sam") }, + { assert snapshot(file(process.out.unselected[0][1]).name).match("cram_to_bam_index_qname_unselected") }, + { assert snapshot(file(process.out.unselected_index[0][1]).name).match("cram_to_bam_index_qname_unselected_csi") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_qname_versions") } + ) + } + } + + test("bam_stub") { + + options "-stub" + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bam_stub_csi") }, + { assert snapshot(process.out.bai).match("bam_stub_bai") }, + { assert snapshot(process.out.crai).match("bam_stub_crai") }, + { assert snapshot(process.out.cram).match("bam_stub_cram") }, + { assert snapshot(process.out.sam).match("bam_stub_sam") }, + { assert snapshot(process.out.versions).match("bam_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/view/tests/main.nf.test.snap b/modules/nf-core/samtools/view/tests/main.nf.test.snap new file mode 100644 index 000000000..63849b037 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test.snap @@ -0,0 +1,528 @@ +{ + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.256068" + }, + "cram_to_bam_index_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.958617" + }, + "bam_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.065301" + }, + "bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.258578" + }, + "bam_stub_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.071284" + }, + "bam_stub_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:26:24.461775464" + }, + "cram_to_bam_index_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.972288" + }, + "cram_to_bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.999247" + }, + "cram_to_bam_index_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.976457" + }, + "cram_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.497581" + }, + "cram_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.50038" + }, + "cram_to_bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.992239" + }, + "cram_to_bam_index_qname_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.325496" + }, + "bam_stub_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.079529" + }, + "cram_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.490286" + }, + "cram_to_bam_index_qname_unselected_csi": { + "content": [ + "test.unselected.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.262882" + }, + "cram_to_bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.989247" + }, + "cram_to_bam_index_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.967681" + }, + "cram_to_bam_index_qname_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:25:51.953436682" + }, + "cram_to_bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.982361" + }, + "cram_to_bam_index_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.95456" + }, + "cram_to_bam_index_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:25:14.475388399" + }, + "cram_to_bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.98601" + }, + "cram_to_bam_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:24:49.673441798" + }, + "cram_bam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.495512" + }, + "bam_stub_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.076908" + }, + "cram_to_bam_index_qname_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "cram_to_bam_index_qname_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.330789" + }, + "cram_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.493129" + }, + "bam_stub_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.074313" + }, + "cram_to_bam_index_qname_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "cram_to_bam_index_qname_unselected": { + "content": [ + "test.unselected.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "cram_to_bam_index_qname_unselected_csi": { + "content": [ + "test.unselected.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:23:27.151650338" + }, + "cram_to_bam_index_qname_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.333248" + }, + "bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.259774" + }, + "bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.261287" + }, + "cram_to_bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.995454" + }, + "cram_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.502625" + }, + "cram_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:24:12.95416913" + }, + "cram_to_bam_index_qname_unselected": { + "content": [ + "test.unselected.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.264651" + }, + "cram_to_bam_index_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.962863" + }, + "cram_to_bam_index_qname_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.337634" + }, + "bam_stub_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.068596" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/tags.yml b/modules/nf-core/samtools/view/tests/tags.yml new file mode 100644 index 000000000..4fdf1dd12 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/view: + - "modules/nf-core/samtools/view/**" diff --git a/nextflow.config b/nextflow.config index fdbe12828..c3d948f7d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -30,6 +30,7 @@ params { // long read preprocessing options longread_adaptertrimming_tool = "porechop_abi" longread_filtering_tool = "filtlong" + longread_mapping_mode = "map-ont" // phix_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Enterobacteria_phage_phiX174_sensu_lato/all_assembly_versions/GCA_002596845.1_ASM259684v1/GCA_002596845.1_ASM259684v1_genomic.fna.gz" phix_reference = "${baseDir}/assets/data/GCA_002596845.1_ASM259684v1_genomic.fna.gz" save_phixremoved_reads = false @@ -53,6 +54,8 @@ params { min_length_unbinned_contigs = 1000000 max_unbinned_contigs = 100 skip_prokka = false + longread_percentidentity = null + shortread_percentidentity = null prokka_with_compliance = false prokka_compliance_centre = null @@ -65,6 +68,12 @@ params { skip_megahit = false skip_quast = false skip_prodigal = false + skip_metamdbg = false + skip_flye = false + flye_mode = 'nano-raw' + metamdbg_mode = 'ont' + + // virus identification options run_virus_identification = false @@ -354,6 +363,9 @@ profiles { test_concoct { includeConfig 'conf/test_concoct.config' } + test_longread { + includeConfig 'conf/test_longread.config' + } } // Load nf-core custom profiles from different Institutions diff --git a/nextflow_schema.json b/nextflow_schema.json index 9159bb864..30b396447 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -444,6 +444,12 @@ "description": "Specify which long read filtering tool to use.", "enum": ["filtlong", "nanoq", "chopper"], "default": "filtlong" + }, + "longread_mapping_mode": { + "type": "string", + "default": "map-ont", + "enum": ["map-ont", "lr:hq", "map-pb", "map-hifi"], + "description": "parameter passed to minimap2 when building index for long read mapping." } } }, @@ -590,6 +596,28 @@ "skip_quast": { "type": "boolean", "description": "Skip metaQUAST." + }, + "skip_metamdbg": { + "type": "boolean", + "description": "Skip MetaDBG assembly." + }, + "skip_flye": { + "type": "boolean", + "description": "Skip Flye assembly." + }, + "flye_mode": { + "type": "string", + "description": "Flye assembly mode.", + "enum": ["pacbio-raw", "pacbio-corr", "pacbio-hifi", "nano-raw", "nano-corr", "nano-hq"], + "default": "nano-raw", + "help_text": "Flye run mode" + }, + "metamdbg_mode": { + "type": "string", + "description": "Input type for MetaDBG assembly.", + "enum": ["ont", "hifi"], + "default": "ont", + "help_text": "Input type for MetaDBG assembly" } } }, @@ -735,6 +763,14 @@ "type": "boolean", "description": "Exclude unbinned contigs in the post-binning steps (bin QC, taxonomic classification, and annotation steps).", "help": "If you're not interested in assemby results that are not considered 'genome level', excluding unbinned contigs can greatly speed up downstream steps such as Prokka, that can be quite slow and spin up many tasks." + }, + "longread_percentidentity": { + "type": "number", + "description": "Minimum percent identity for long reads mapping back to assembled contigs." + }, + "shortread_percentidentity": { + "type": "number", + "description": "Minimum percent identity for short reads mapping back to assembled contigs." } } }, diff --git a/subworkflows/local/assembly.nf b/subworkflows/local/assembly.nf new file mode 100644 index 000000000..b0b83dbec --- /dev/null +++ b/subworkflows/local/assembly.nf @@ -0,0 +1,148 @@ +// SUBWORKFLOWS +include { SHORTREAD_ASSEMBLY } from './shortread_assembly' +include { LONGREAD_ASSEMBLY } from './longread_assembly' +include { HYBRID_ASSEMBLY } from './hybrid_assembly' + +// MODULES +include { POOL_SINGLE_READS as POOL_SHORT_SINGLE_READS } from '../../modules/local/pool_single_reads' +include { POOL_PAIRED_READS } from '../../modules/local/pool_paired_reads' +include { POOL_SINGLE_READS as POOL_LONG_READS } from '../../modules/local/pool_single_reads' +include { GUNZIP as GUNZIP_SHORTREAD_ASSEMBLIES } from '../../modules/nf-core/gunzip' +include { GUNZIP as GUNZIP_LONGREAD_ASSEMBLIES } from '../../modules/nf-core/gunzip' + +workflow ASSEMBLY { + take: + ch_short_reads // [ [meta] , fastq1, fastq2] (mandatory) + ch_long_reads // [ [meta] , fastq] (mandatory) + + main: + + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + /* + ================================================================================ + Assembly preparation + ================================================================================ + */ + + if (params.coassemble_group) { + // short reads + // group and set group as new id + ch_short_reads_grouped = ch_short_reads + .map { meta, reads -> [meta.group, meta, reads] } + .groupTuple(by: 0) + .map { group, metas, reads -> + def assemble_as_single = params.single_end || (params.bbnorm && params.coassemble_group) + def meta = [:] + meta.id = "group-${group}" + meta.group = group + meta.single_end = assemble_as_single + if (assemble_as_single) { + [meta, reads.collect { it }, []] + } + else { + [meta, reads.collect { it[0] }, reads.collect { it[1] }] + } + } + // long reads + // group and set group as new id + ch_long_reads_grouped = ch_long_reads + .map { meta, reads -> [meta.group, meta, reads] } + .groupTuple(by: 0) + .map { group, metas, reads -> + def meta = [:] + meta.id = "group-${group}" + meta.group = group + [meta, reads.collect { it }] + } + } + else { + ch_short_reads_grouped = ch_short_reads + .filter { it[0].single_end } + .map { meta, reads -> [meta, [reads], []] } + .mix( + ch_short_reads.filter { !it[0].single_end }.map { meta, reads -> [meta, [reads[0]], [reads[1]]] } + ) + ch_long_reads_grouped = ch_long_reads + } + + if (!params.skip_spades || !params.skip_spadeshybrid) { + if (params.coassemble_group) { + if (params.bbnorm) { + ch_short_reads_spades = ch_short_reads_grouped.map { [it[0], it[1]] } + } + else { + POOL_SHORT_SINGLE_READS( + ch_short_reads_grouped.filter { it[0].single_end } + ) + POOL_PAIRED_READS( + ch_short_reads_grouped.filter { !it[0].single_end } + ) + ch_short_reads_spades = POOL_SHORT_SINGLE_READS.out.reads.mix(POOL_PAIRED_READS.out.reads) + } + } + else { + ch_short_reads_spades = ch_short_reads + } + // long reads + if (!params.single_end && !params.skip_spadeshybrid) { + POOL_LONG_READS(ch_long_reads_grouped) + ch_long_reads_spades = POOL_LONG_READS.out.reads + } + else { + ch_long_reads_spades = Channel.empty() + } + } + else { + ch_short_reads_spades = Channel.empty() + ch_long_reads_spades = Channel.empty() + } + + /* + ================================================================================ + Assembly + ================================================================================ + */ + + ch_shortread_assembled_contigs = Channel.empty() + ch_longread_assembled_contigs = Channel.empty() + + // SHORTREAD ASSEMBLY + SHORTREAD_ASSEMBLY( + ch_short_reads_grouped, + ch_short_reads_spades + ) + ch_versions = ch_versions.mix(SHORTREAD_ASSEMBLY.out.versions) + + // HYBRID ASSEMBLY + HYBRID_ASSEMBLY( + ch_short_reads_spades, + ch_long_reads_spades + ) + ch_versions = ch_versions.mix(HYBRID_ASSEMBLY.out.versions) + + // LONGREAD ASSEMBLY + LONGREAD_ASSEMBLY( + ch_long_reads_grouped + ) + ch_versions = ch_versions.mix(LONGREAD_ASSEMBLY.out.versions) + + ch_shortread_assembled_contigs = SHORTREAD_ASSEMBLY.out.assembled_contigs.mix(HYBRID_ASSEMBLY.out.assembled_contigs) + ch_longread_assembled_contigs = LONGREAD_ASSEMBLY.out.assembled_contigs + + GUNZIP_SHORTREAD_ASSEMBLIES(ch_shortread_assembled_contigs) + ch_versions = ch_versions.mix(GUNZIP_SHORTREAD_ASSEMBLIES.out.versions) + ch_shortread_assemblies = GUNZIP_SHORTREAD_ASSEMBLIES.out.gunzip + + GUNZIP_LONGREAD_ASSEMBLIES(ch_longread_assembled_contigs) + ch_versions = ch_versions.mix(GUNZIP_LONGREAD_ASSEMBLIES.out.versions) + ch_longread_assemblies = GUNZIP_LONGREAD_ASSEMBLIES.out.gunzip + + emit: + shortread_assemblies = ch_shortread_assemblies + longread_assemblies = ch_longread_assemblies + versions = ch_versions + multiqc_files = ch_multiqc_files + +} diff --git a/subworkflows/local/binning.nf b/subworkflows/local/binning.nf index 51caaeb9f..2116bcdf9 100644 --- a/subworkflows/local/binning.nf +++ b/subworkflows/local/binning.nf @@ -2,11 +2,12 @@ * Binning with MetaBAT2 and MaxBin2 */ -include { METABAT2_METABAT2 } from '../../modules/nf-core/metabat2/metabat2/main' -include { METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS } from '../../modules/nf-core/metabat2/jgisummarizebamcontigdepths/main' -include { MAXBIN2 } from '../../modules/nf-core/maxbin2/main' -include { GUNZIP as GUNZIP_BINS } from '../../modules/nf-core/gunzip/main' -include { GUNZIP as GUNZIP_UNBINS } from '../../modules/nf-core/gunzip/main' +include { METABAT2_METABAT2 } from '../../modules/nf-core/metabat2/metabat2/main' +include { METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS as METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS_SHORTREAD } from '../../modules/nf-core/metabat2/jgisummarizebamcontigdepths/main' +include { METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS as METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS_LONGREAD } from '../../modules/nf-core/metabat2/jgisummarizebamcontigdepths/main' +include { MAXBIN2 } from '../../modules/nf-core/maxbin2/main' +include { GUNZIP as GUNZIP_BINS } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_UNBINS } from '../../modules/nf-core/gunzip/main' include { CONVERT_DEPTHS } from '../../modules/local/convert_depths' include { ADJUST_MAXBIN2_EXT } from '../../modules/local/adjust_maxbin2_ext' @@ -22,21 +23,30 @@ workflow BINNING { ch_versions = Channel.empty() - // generate coverage depths for each contig + // generate coverage depths for each contig and branch by assembler type ch_summarizedepth_input = assemblies - .map { meta, assembly, bams, bais -> - [ meta, bams, bais ] - } + .map { meta, assembly, bams, bais -> + [ meta, bams, bais ] + } + .branch { + longread: it[0].assembler in ['FLYE', 'METAMDBG'] + shortread: true + } - METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS ( ch_summarizedepth_input ) + // Process each through appropriate module + METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS_LONGREAD ( ch_summarizedepth_input.longread ) + METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS_SHORTREAD ( ch_summarizedepth_input.shortread ) - ch_metabat_depths = METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS.out.depth + // Merge the outputs + ch_metabat_depths = METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS_LONGREAD.out.depth + .mix(METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS_SHORTREAD.out.depth) .map { meta, depths -> def meta_new = meta + [binner: 'MetaBAT2'] [ meta_new, depths ] } - ch_versions = ch_versions.mix(METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS.out.versions.first()) + ch_versions = ch_versions.mix(METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS_LONGREAD.out.versions.first()) + ch_versions = ch_versions.mix(METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS_SHORTREAD.out.versions.first()) // combine depths back with assemblies ch_metabat2_input = assemblies @@ -134,6 +144,6 @@ workflow BINNING { bins_gz = ch_binning_results_gzipped_final unbinned = ch_splitfasta_results_gunzipped unbinned_gz = SPLIT_FASTA.out.unbinned - metabat2depths = METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS.out.depth + metabat2depths = ch_metabat_depths versions = ch_versions } diff --git a/subworkflows/local/binning_preparation.nf b/subworkflows/local/binning_preparation.nf index 60f63a269..4f6f2b432 100644 --- a/subworkflows/local/binning_preparation.nf +++ b/subworkflows/local/binning_preparation.nf @@ -1,51 +1,29 @@ -/* - * Binning preparation with Bowtie2 - */ -include { BOWTIE2_ASSEMBLY_BUILD } from '../../modules/local/bowtie2_assembly_build' -include { BOWTIE2_ASSEMBLY_ALIGN } from '../../modules/local/bowtie2_assembly_align' +include { SHORTREAD_BINNING_PREPARATION } from './shortread_binning_preparation' +include { LONGREAD_BINNING_PREPARATION } from './longread_binning_preparation' workflow BINNING_PREPARATION { take: - assemblies // channel: [ val(meta), path(assembly) ] - reads // channel: [ val(meta), [ reads ] ] + shortread_assemblies // channel: [ val(meta), path(assembly) ] + shortreads // channel: [ val(meta), [ reads ] ] + longread_assemblies // channel: [ val(meta), path(assembly) ] + longreads // channel: [ val(meta), [ reads ] ] main: - // build bowtie2 index for all assemblies - BOWTIE2_ASSEMBLY_BUILD ( assemblies ) + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + SHORTREAD_BINNING_PREPARATION ( shortread_assemblies, shortreads ) + LONGREAD_BINNING_PREPARATION ( longread_assemblies, longreads ) - // combine assemblies with sample reads for binning depending on specified mapping mode - if (params.binning_map_mode == 'all'){ - // combine assemblies with reads of all samples - ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.assembly_index - .combine(reads) - } else if (params.binning_map_mode == 'group'){ - // combine assemblies with reads of samples from same group - ch_reads_bowtie2 = reads.map{ meta, reads -> [ meta.group, meta, reads ] } - ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.assembly_index - .map { meta, assembly, index -> [ meta.group, meta, assembly, index ] } - .combine(ch_reads_bowtie2, by: 0) - .map { group, assembly_meta, assembly, index, reads_meta, reads -> [ assembly_meta, assembly, index, reads_meta, reads ] } + ch_grouped_mappings = SHORTREAD_BINNING_PREPARATION.out.grouped_mappings + .mix( LONGREAD_BINNING_PREPARATION.out.grouped_mappings ) - } else { - // i.e. --binning_map_mode 'own' - // combine assemblies (not co-assembled) with reads from own sample - ch_reads_bowtie2 = reads.map{ meta, reads -> [ meta.id, meta, reads ] } - ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.assembly_index - .map { meta, assembly, index -> [ meta.id, meta, assembly, index ] } - .combine(ch_reads_bowtie2, by: 0) - .map { id, assembly_meta, assembly, index, reads_meta, reads -> [ assembly_meta, assembly, index, reads_meta, reads ] } - - } - - BOWTIE2_ASSEMBLY_ALIGN ( ch_bowtie2_input ) - // group mappings for one assembly - ch_grouped_mappings = BOWTIE2_ASSEMBLY_ALIGN.out.mappings - .groupTuple(by: 0) - .map { meta, assembly, bams, bais -> [ meta, assembly.sort()[0], bams, bais ] } // multiple symlinks to the same assembly -> use first of sorted list + ch_versions = ch_versions.mix( SHORTREAD_BINNING_PREPARATION.out.versions ) + ch_versions = ch_versions.mix( LONGREAD_BINNING_PREPARATION.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_BINNING_PREPARATION.out.bowtie2_assembly_multiqc ) emit: - bowtie2_assembly_multiqc = BOWTIE2_ASSEMBLY_ALIGN.out.log.map { assembly_meta, reads_meta, log -> [ log ] } - bowtie2_version = BOWTIE2_ASSEMBLY_ALIGN.out.versions grouped_mappings = ch_grouped_mappings + versions = ch_versions + multiqc_files = ch_multiqc_files } diff --git a/subworkflows/local/hybrid_assembly.nf b/subworkflows/local/hybrid_assembly.nf new file mode 100644 index 000000000..c690a5e7f --- /dev/null +++ b/subworkflows/local/hybrid_assembly.nf @@ -0,0 +1,35 @@ +// MODULES +include { SPADES as METASPADESHYBRID } from '../../modules/nf-core/spades/main' + +workflow HYBRID_ASSEMBLY { + + take: + ch_short_reads_spades // [ [meta] , fastq1, fastq2] (mandatory) + ch_long_reads_spades // [ [meta] , fastq] (mandatory) + + main: + + ch_versions = Channel.empty() + ch_assembled_contigs = Channel.empty() + + if (!params.single_end && !params.skip_spadeshybrid) { + ch_short_reads_spades_tmp = ch_short_reads_spades.map { meta, reads -> [meta.id, meta, reads] } + + ch_reads_spadeshybrid = ch_long_reads_spades + .map { meta, reads -> [meta.id, meta, reads] } + .combine(ch_short_reads_spades_tmp, by: 0) + .map { id, meta_long, long_reads, meta_short, short_reads -> [meta_short, short_reads, [], long_reads] } + + METASPADESHYBRID(ch_reads_spadeshybrid, [], []) + ch_spadeshybrid_assemblies = METASPADESHYBRID.out.scaffolds.map { meta, assembly -> + def meta_new = meta + [assembler: "SPAdesHybrid"] + [meta_new, assembly] + } + ch_assembled_contigs = ch_assembled_contigs.mix(ch_spadeshybrid_assemblies) + ch_versions = ch_versions.mix(METASPADESHYBRID.out.versions.first()) + } + + emit: + assembled_contigs = ch_assembled_contigs + versions = ch_versions +} diff --git a/subworkflows/local/longread_assembly.nf b/subworkflows/local/longread_assembly.nf new file mode 100644 index 000000000..64b080406 --- /dev/null +++ b/subworkflows/local/longread_assembly.nf @@ -0,0 +1,50 @@ +/* +* LONGREAD_ASSEMBLY: Assembly of long reads +*/ + +include { FLYE } from '../../modules/nf-core/flye/main' +include { METAMDBG_ASM } from '../../modules/nf-core/metamdbg/asm/main' + +workflow LONGREAD_ASSEMBLY { + take: + ch_long_reads // [ [meta] , fastq] (mandatory) + + main: + ch_assembled_contigs = Channel.empty() + ch_versions = Channel.empty() + + if (!params.skip_flye) { + + FLYE ( + ch_long_reads, + "--" + params.flye_mode + ) + + ch_flye_assemblies = FLYE.out.fasta.map { meta, assembly -> + def meta_new = meta + [assembler: "FLYE"] + [meta_new, assembly] + } + ch_assembled_contigs = ch_assembled_contigs.mix(ch_flye_assemblies) + ch_versions = ch_versions.mix(FLYE.out.versions.first()) + } + + if (!params.skip_metamdbg) { + + METAMDBG_ASM ( + ch_long_reads, + params.metamdbg_mode + ) + + ch_metamdbg_assemblies = METAMDBG_ASM.out.contigs.map { meta, assembly -> + def meta_new = meta + [assembler: "METAMDBG"] + [meta_new, assembly] + } + ch_assembled_contigs = ch_assembled_contigs.mix(ch_metamdbg_assemblies) + ch_versions = ch_versions.mix( METAMDBG_ASM.out.versions.first() ) + } + + emit: + assembled_contigs = ch_assembled_contigs + versions = ch_versions + +} diff --git a/subworkflows/local/longread_binning_preparation.nf b/subworkflows/local/longread_binning_preparation.nf new file mode 100644 index 000000000..0e0056707 --- /dev/null +++ b/subworkflows/local/longread_binning_preparation.nf @@ -0,0 +1,54 @@ +include { MINIMAP2_INDEX as MINIMAP2_ASSEMBLY_INDEX } from '../../modules/nf-core/minimap2/index/main' +include { MINIMAP2_ALIGN as MINIMAP2_ASSEMBLY_ALIGN } from '../../modules/nf-core/minimap2/align/main' + +workflow LONGREAD_BINNING_PREPARATION { + take: + assemblies // channel: [ val(meta), path(assembly) ] + reads // channel: [ val(meta), [ reads ] ] + + main: + ch_versions = Channel.empty() + + MINIMAP2_ASSEMBLY_INDEX ( assemblies ) + + if (params.binning_map_mode == 'all'){ + ch_minimap2_input = MINIMAP2_ASSEMBLY_INDEX.out.index + .combine(reads) + .map { meta_idx, idx, meta_reads, reads -> [ meta_idx, idx, meta_reads, reads ] } + + } else if (params.binning_map_mode == 'group') { + ch_reads_minimap2 = reads.map{ meta, reads -> [ meta.group, meta, reads ] } + ch_minimap2_input = MINIMAP2_ASSEMBLY_INDEX.out.index + .map { meta_idx, index -> [ meta_idx.group, meta_idx, index ] } + .combine(ch_reads_minimap2, by: 0) + .map { group, meta_idx, idx, meta_reads, reads -> [ meta_idx, idx, meta_reads, reads ] } + + } else { + ch_reads_minimap2 = reads.map{ meta, reads -> [ meta.id, meta, reads ] } + ch_minimap2_input = MINIMAP2_ASSEMBLY_INDEX.out.index + .map { meta_idx, index -> [ meta_idx.id, meta_idx, index ] } + .combine(ch_reads_minimap2, by: 0) + .map { id, meta_idx, idx, meta_reads, reads -> [ meta_idx, idx, meta_reads, reads ] } + } + + ch_minimap2_input_reads = ch_minimap2_input + .map { meta_idx, index, meta, reads -> [ meta_idx, reads ] } + ch_minimap2_input_idx = ch_minimap2_input + .map { meta_idx, index, meta, reads -> [ meta, index ] } + + MINIMAP2_ASSEMBLY_ALIGN ( ch_minimap2_input_reads, ch_minimap2_input_idx, true, 'bai', false, false ) + ch_versions = ch_versions.mix( MINIMAP2_ASSEMBLY_ALIGN.out.versions.first() ) + + ch_grouped_mappings_reads = MINIMAP2_ASSEMBLY_ALIGN.out.bam + .groupTuple(by: 0) + ch_grouped_mappings_index = MINIMAP2_ASSEMBLY_ALIGN.out.index + .groupTuple(by: 0) + ch_grouped_mappings = ch_grouped_mappings_reads + .combine(ch_grouped_mappings_index, by: 0) + .combine(assemblies, by: 0) + .map { meta, bams, bais, assembly -> [ meta, assembly, bams, bais ] } + + emit: + versions = ch_versions + grouped_mappings = ch_grouped_mappings +} diff --git a/subworkflows/local/longread_hostremoval.nf b/subworkflows/local/longread_hostremoval.nf new file mode 100644 index 000000000..d19f7abeb --- /dev/null +++ b/subworkflows/local/longread_hostremoval.nf @@ -0,0 +1,55 @@ + +// +// Remove host reads via alignment and export off-target reads +// + +include { MINIMAP2_INDEX as MINIMAP2_HOST_INDEX } from '../../modules/nf-core/minimap2/index/main' +include { MINIMAP2_ALIGN as MINIMAP2_HOST_ALIGN } from '../../modules/nf-core/minimap2/align/main' +include { SAMTOOLS_UNMAPPED as SAMTOOLS_HOSTREMOVED_UNMAPPED } from '../../modules/local/samtools_unmapped' +include { SAMTOOLS_INDEX as SAMTOOLS_HOSTREMOVED_INDEX } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_STATS as SAMTOOLS_HOSTREMOVED_STATS } from '../../modules/nf-core/samtools/stats/main' + + +workflow LONGREAD_HOSTREMOVAL { + take: + reads // [ [ meta ], [ reads ] ] + reference // /path/to/fasta + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + ch_host_reference = reference.map { [ [:], it ] } + + ch_minimap2_index = MINIMAP2_HOST_INDEX ( ch_host_reference ).index + ch_versions = ch_versions.mix( MINIMAP2_HOST_INDEX.out.versions ) + + MINIMAP2_HOST_ALIGN ( reads, ch_minimap2_index, true, 'bai', false, false ) + ch_versions = ch_versions.mix( MINIMAP2_HOST_ALIGN.out.versions.first() ) + ch_minimap2_mapped = MINIMAP2_HOST_ALIGN.out.bam + .map { + meta, reads -> + [ meta, reads, [] ] + } + + // Generate unmapped reads FASTQ for downstream taxprofiling + SAMTOOLS_HOSTREMOVED_UNMAPPED ( ch_minimap2_mapped ) + ch_versions = ch_versions.mix( SAMTOOLS_HOSTREMOVED_UNMAPPED.out.versions.first() ) + + // Indexing whole BAM for host removal statistics + SAMTOOLS_HOSTREMOVED_INDEX ( MINIMAP2_HOST_ALIGN.out.bam ) + ch_versions = ch_versions.mix( SAMTOOLS_HOSTREMOVED_INDEX.out.versions.first() ) + + bam_bai = MINIMAP2_HOST_ALIGN.out.bam + .join(SAMTOOLS_HOSTREMOVED_INDEX.out.bai) + + SAMTOOLS_HOSTREMOVED_STATS ( bam_bai, ch_host_reference ) + ch_versions = ch_versions.mix(SAMTOOLS_HOSTREMOVED_STATS.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_HOSTREMOVED_STATS.out.stats ) + + emit: + stats = SAMTOOLS_HOSTREMOVED_STATS.out.stats //channel: [val(meta), [reads ] ] + reads = SAMTOOLS_HOSTREMOVED_UNMAPPED.out.fastq // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] + multiqc_files = ch_multiqc_files +} diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index 7de6dd253..b6e26b67d 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -11,11 +11,15 @@ include { FILTLONG } from '../../mo include { CHOPPER } from '../../modules/nf-core/chopper' include { NANOQ } from '../../modules/nf-core/nanoq' +// include other subworkflows here +include { LONGREAD_HOSTREMOVAL } from './longread_hostremoval' + workflow LONGREAD_PREPROCESSING { take: ch_raw_long_reads // [ [meta] , fastq] (mandatory) ch_short_reads // [ [meta] , fastq1, fastq2] (mandatory) - ch_lambda_db // [fasta] + ch_lambda_db // [fasta] + ch_host_fasta // [fasta] main: ch_versions = Channel.empty() @@ -65,12 +69,13 @@ workflow LONGREAD_PREPROCESSING { if (params.longread_filtering_tool == 'filtlong') { // join long and short reads by sample name ch_short_reads_tmp = ch_short_reads - .map { meta, sr -> [ meta.id, meta, sr ] } + .map { meta, sr -> [ meta.id, sr ] } ch_short_and_long_reads = ch_long_reads .map { meta, lr -> [ meta.id, meta, lr ] } - .join(ch_short_reads_tmp, by: 0) - .map { id, meta_lr, lr, meta_sr, sr -> [ meta_lr, sr, lr ] } // should not occur for single-end, since SPAdes (hybrid) does not support single-end + .join(ch_short_reads_tmp, by: 0, remainder: true) + .filter { it[1] != null } // Make sure long reads are not null, which happens if ch_short_reads is empty + .map { id, meta_lr, lr, sr -> [ meta_lr, sr ? sr : [], lr ] } // should not occur for single-end, since SPAdes (hybrid) does not support single-end FILTLONG ( ch_short_and_long_reads @@ -95,6 +100,17 @@ workflow LONGREAD_PREPROCESSING { ch_versions = ch_versions.mix(CHOPPER.out.versions.first()) } + // host removal long reads + if ( params.host_fasta ) { + LONGREAD_HOSTREMOVAL ( + ch_long_reads, + ch_host_fasta + ) + ch_long_reads = LONGREAD_HOSTREMOVAL.out.reads + ch_versions = ch_versions.mix(LONGREAD_HOSTREMOVAL.out.versions) + ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_HOSTREMOVAL.out.multiqc_files ) + } + NANOPLOT_FILTERED ( ch_long_reads ) diff --git a/subworkflows/local/shortread_assembly.nf b/subworkflows/local/shortread_assembly.nf new file mode 100644 index 000000000..efec33640 --- /dev/null +++ b/subworkflows/local/shortread_assembly.nf @@ -0,0 +1,39 @@ + +//MODULES +include { MEGAHIT } from '../../modules/nf-core/megahit/main' +include { SPADES as METASPADES } from '../../modules/nf-core/spades/main' + +workflow SHORTREAD_ASSEMBLY { + take: + ch_short_reads_grouped // [ [meta] , fastq1, fastq2] (mandatory) + ch_short_reads_spades + + main: + ch_versions = Channel.empty() + ch_assembled_contigs = Channel.empty() + + if (!params.single_end && !params.skip_spades) { + METASPADES(ch_short_reads_spades.map { meta, reads -> [meta, reads, [], []] }, [], []) + ch_spades_assemblies = METASPADES.out.scaffolds.map { meta, assembly -> + def meta_new = meta + [assembler: 'SPAdes'] + [meta_new, assembly] + } + ch_assembled_contigs = ch_assembled_contigs.mix(ch_spades_assemblies) + ch_versions = ch_versions.mix(METASPADES.out.versions.first()) + } + + if (!params.skip_megahit) { + MEGAHIT(ch_short_reads_grouped) + ch_megahit_assemblies = MEGAHIT.out.contigs.map { meta, assembly -> + def meta_new = meta + [assembler: 'MEGAHIT'] + [meta_new, assembly] + } + ch_assembled_contigs = ch_assembled_contigs.mix(ch_megahit_assemblies) + ch_versions = ch_versions.mix(MEGAHIT.out.versions.first()) + } + + emit: + assembled_contigs = ch_assembled_contigs + versions = ch_versions + +} diff --git a/subworkflows/local/shortread_binning_preparation.nf b/subworkflows/local/shortread_binning_preparation.nf new file mode 100644 index 000000000..a44bccd99 --- /dev/null +++ b/subworkflows/local/shortread_binning_preparation.nf @@ -0,0 +1,55 @@ +/* + * Binning preparation with Bowtie2 + */ + +include { BOWTIE2_ASSEMBLY_BUILD } from '../../modules/local/bowtie2_assembly_build' +include { BOWTIE2_ASSEMBLY_ALIGN } from '../../modules/local/bowtie2_assembly_align' + +workflow SHORTREAD_BINNING_PREPARATION { + take: + assemblies // channel: [ val(meta), path(assembly) ] + reads // channel: [ val(meta), [ reads ] ] + + main: + + ch_versions = Channel.empty() + // build bowtie2 index for all assemblies + BOWTIE2_ASSEMBLY_BUILD ( assemblies ) + + // combine assemblies with sample reads for binning depending on specified mapping mode + if (params.binning_map_mode == 'all'){ + // combine assemblies with reads of all samples + ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.assembly_index + .combine(reads) + } else if (params.binning_map_mode == 'group'){ + // combine assemblies with reads of samples from same group + ch_reads_bowtie2 = reads.map{ meta, reads -> [ meta.group, meta, reads ] } + ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.assembly_index + .map { meta, assembly, index -> [ meta.group, meta, assembly, index ] } + .combine(ch_reads_bowtie2, by: 0) + .map { group, assembly_meta, assembly, index, reads_meta, reads -> [ assembly_meta, assembly, index, reads_meta, reads ] } + + } else { + // i.e. --binning_map_mode 'own' + // combine assemblies (not co-assembled) with reads from own sample + ch_reads_bowtie2 = reads.map{ meta, reads -> [ meta.id, meta, reads ] } + ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.assembly_index + .map { meta, assembly, index -> [ meta.id, meta, assembly, index ] } + .combine(ch_reads_bowtie2, by: 0) + .map { id, assembly_meta, assembly, index, reads_meta, reads -> [ assembly_meta, assembly, index, reads_meta, reads ] } + + } + + BOWTIE2_ASSEMBLY_ALIGN ( ch_bowtie2_input ) + // group mappings for one assembly + ch_grouped_mappings = BOWTIE2_ASSEMBLY_ALIGN.out.mappings + .groupTuple(by: 0) + .map { meta, assembly, bams, bais -> [ meta, assembly.sort()[0], bams, bais ] } // multiple symlinks to the same assembly -> use first of sorted list + + ch_versions = ch_versions.mix( BOWTIE2_ASSEMBLY_ALIGN.out.versions.first() ) + + emit: + bowtie2_assembly_multiqc = BOWTIE2_ASSEMBLY_ALIGN.out.log.map { assembly_meta, reads_meta, log -> [ log ] } + versions = ch_versions + grouped_mappings = ch_grouped_mappings +} diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf index a8c6dfd2b..15298f6c0 100644 --- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf @@ -81,10 +81,10 @@ workflow PIPELINE_INITIALISATION { meta.run = meta.run == [] ? "0" : meta.run meta.single_end = params.single_end - if (params.single_end) { + if (params.single_end && sr1) { return [meta, [sr1]] } - else { + else if (sr1 && sr2) { return [meta, [sr1, sr2]] } } @@ -98,11 +98,14 @@ workflow PIPELINE_INITIALISATION { // Check already if long reads are provided, for later parameter validation def hybrid = false - ch_raw_long_reads.map { - if (it) { - hybrid = true + ch_raw_long_reads + .map { meta, lr -> [ meta.id, lr ] } + .join(ch_raw_long_reads.map {meta, sr1 -> [meta.id, sr1] }, by: 0, remainder: true) + .map { id, lr, sr1 -> + if (lr && sr1) { + hybrid = true + } } - } // // Custom validation for pipeline parameters @@ -225,7 +228,7 @@ def validateInputParameters(hybrid) { if (!params.skip_spades && params.spades_fix_cpus == -1) { log.warn("[nf-core/mag]: At least one assembly process is run with a parameter to ensure reproducible results, but SPAdes not. Consider using the parameter '--spades_fix_cpus'.") } - if (hybrid && params.skip_spadeshybrid && params.spadeshybrid_fix_cpus == -1) { + if (hybrid && !params.skip_spadeshybrid && params.spadeshybrid_fix_cpus == -1) { log.warn("[nf-core/mag]: At least one assembly process is run with a parameter to ensure reproducible results, but SPAdes hybrid not. Consider using the parameter '--spadeshybrid_fix_cpus'.") } if (!params.skip_megahit && !params.megahit_fix_cpu_1) { @@ -245,11 +248,8 @@ def validateInputParameters(hybrid) { if (params.host_fasta && params.host_genome) { error('[nf-core/mag] ERROR: Both host fasta reference and iGenomes genome are specified to remove host contamination! Invalid combination, please specify either --host_fasta or --host_genome.') } - if (hybrid && (params.host_fasta || params.host_genome)) { - log.warn('[nf-core/mag]: Host read removal is only applied to short reads. Long reads might be filtered indirectly by Filtlong, which is set to use read qualities estimated based on k-mer matches to the short, already filtered reads.') - if (params.longreads_length_weight > 1) { - log.warn("[nf-core/mag]: The parameter --longreads_length_weight is ${params.longreads_length_weight}, causing the read length being more important for long read filtering than the read quality. Set --longreads_length_weight to 1 in order to assign equal weights.") - } + if (hybrid && (params.host_fasta || params.host_genome) && params.longread_filtering_tool == "filtlong" && params.longreads_length_weight > 0 ) { + log.warn("[nf-core/mag]: The parameter --longreads_length_weight is ${params.longreads_length_weight}, causing the read length being more important for long read filtering than the read quality. Set --longreads_length_weight to 1 in order to assign equal weights.") } if (params.host_genome) { if (!params.genomes) { @@ -329,7 +329,7 @@ def validateInputParameters(hybrid) { // def validateInputSamplesheet(meta, sr1, sr2, lr) { - if (!sr2 && !params.single_end) { + if ((!sr2 && !lr) && !params.single_end) { error("[nf-core/mag] ERROR: Single-end data must be executed with `--single_end`. Note that it is not possible to mix single- and paired-end data in one run! Check input TSV for sample: ${meta.id}") } if (sr2 && params.single_end) { diff --git a/workflows/mag.nf b/workflows/mag.nf index 5b424d62d..9734a89e5 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -23,6 +23,7 @@ include { DOMAIN_CLASSIFICATION } from '../subwo include { DEPTHS } from '../subworkflows/local/depths' include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing' +include { ASSEMBLY } from '../subworkflows/local/assembly' // // MODULE: Installed directly from nf-core/modules @@ -33,10 +34,6 @@ include { CENTRIFUGE_KREPORT } from '../modul include { KRONA_KRONADB } from '../modules/nf-core/krona/kronadb/main' include { KRONA_KTIMPORTTAXONOMY } from '../modules/nf-core/krona/ktimporttaxonomy/main' include { KRAKENTOOLS_KREPORT2KRONA as KREPORT2KRONA_CENTRIFUGE } from '../modules/nf-core/krakentools/kreport2krona/main' -include { MEGAHIT } from '../modules/nf-core/megahit/main' -include { SPADES as METASPADES } from '../modules/nf-core/spades/main' -include { SPADES as METASPADESHYBRID } from '../modules/nf-core/spades/main' -include { GUNZIP as GUNZIP_ASSEMBLIES } from '../modules/nf-core/gunzip' include { GUNZIP as GUNZIP_ASSEMBLYINPUT } from '../modules/nf-core/gunzip' include { PRODIGAL } from '../modules/nf-core/prodigal/main' include { PROKKA } from '../modules/nf-core/prokka/main' @@ -48,9 +45,6 @@ include { METAEUK_EASYPREDICT } from '../modul // include { KRAKEN2_DB_PREPARATION } from '../modules/local/kraken2_db_preparation' include { KRAKEN2 } from '../modules/local/kraken2' -include { POOL_SINGLE_READS as POOL_SHORT_SINGLE_READS } from '../modules/local/pool_single_reads' -include { POOL_PAIRED_READS } from '../modules/local/pool_paired_reads' -include { POOL_SINGLE_READS as POOL_LONG_READS } from '../modules/local/pool_single_reads' include { QUAST } from '../modules/local/quast' include { QUAST_BINS } from '../modules/local/quast_bins' include { QUAST_BINS_SUMMARY } from '../modules/local/quast_bins_summary' @@ -200,6 +194,7 @@ workflow MAG { ch_raw_long_reads, ch_short_reads, ch_lambda_db, + ch_host_fasta ) ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions) @@ -312,127 +307,18 @@ workflow MAG { if (!params.assembly_input) { - // Co-assembly preparation: grouping for MEGAHIT and for pooling for SPAdes - if (params.coassemble_group) { - // short reads - // group and set group as new id - ch_short_reads_grouped = ch_short_reads_assembly - .map { meta, reads -> [meta.group, meta, reads] } - .groupTuple(by: 0) - .map { group, metas, reads -> - def assemble_as_single = params.single_end || (params.bbnorm && params.coassemble_group) - def meta = [:] - meta.id = "group-${group}" - meta.group = group - meta.single_end = assemble_as_single - if (assemble_as_single) { - [meta, reads.collect { it }, []] - } - else { - [meta, reads.collect { it[0] }, reads.collect { it[1] }] - } - } - // long reads - // group and set group as new id - ch_long_reads_grouped = ch_long_reads - .map { meta, reads -> [meta.group, meta, reads] } - .groupTuple(by: 0) - .map { group, metas, reads -> - def meta = [:] - meta.id = "group-${group}" - meta.group = group - [meta, reads.collect { it }] - } - } - else { - ch_short_reads_grouped = ch_short_reads_assembly - .filter { it[0].single_end } - .map { meta, reads -> [meta, [reads], []] } - .mix( - ch_short_reads_assembly.filter { !it[0].single_end }.map { meta, reads -> [meta, [reads[0]], [reads[1]]] } - ) - ch_long_reads_grouped = ch_long_reads - } - - if (!params.skip_spades || !params.skip_spadeshybrid) { - if (params.coassemble_group) { - if (params.bbnorm) { - ch_short_reads_spades = ch_short_reads_grouped.map { [it[0], it[1]] } - } - else { - POOL_SHORT_SINGLE_READS( - ch_short_reads_grouped.filter { it[0].single_end } - ) - POOL_PAIRED_READS( - ch_short_reads_grouped.filter { !it[0].single_end } - ) - ch_short_reads_spades = POOL_SHORT_SINGLE_READS.out.reads.mix(POOL_PAIRED_READS.out.reads) - } - } - else { - ch_short_reads_spades = ch_short_reads_assembly - } - // long reads - if (!params.single_end && !params.skip_spadeshybrid) { - POOL_LONG_READS(ch_long_reads_grouped) - ch_long_reads_spades = POOL_LONG_READS.out.reads - } - else { - ch_long_reads_spades = Channel.empty() - } - } - else { - ch_short_reads_spades = Channel.empty() - ch_long_reads_spades = Channel.empty() - } - - // Assembly - - ch_assembled_contigs = Channel.empty() - - if (!params.single_end && !params.skip_spades) { - METASPADES(ch_short_reads_spades.map { meta, reads -> [meta, reads, [], []] }, [], []) - ch_spades_assemblies = METASPADES.out.scaffolds.map { meta, assembly -> - def meta_new = meta + [assembler: 'SPAdes'] - [meta_new, assembly] - } - ch_assembled_contigs = ch_assembled_contigs.mix(ch_spades_assemblies) - ch_versions = ch_versions.mix(METASPADES.out.versions.first()) - } - - if (!params.single_end && !params.skip_spadeshybrid) { - ch_short_reads_spades_tmp = ch_short_reads_spades.map { meta, reads -> [meta.id, meta, reads] } - - ch_reads_spadeshybrid = ch_long_reads_spades - .map { meta, reads -> [meta.id, meta, reads] } - .combine(ch_short_reads_spades_tmp, by: 0) - .map { id, meta_long, long_reads, meta_short, short_reads -> [meta_short, short_reads, [], long_reads] } - - METASPADESHYBRID(ch_reads_spadeshybrid, [], []) - ch_spadeshybrid_assemblies = METASPADESHYBRID.out.scaffolds.map { meta, assembly -> - def meta_new = meta + [assembler: "SPAdesHybrid"] - [meta_new, assembly] - } - ch_assembled_contigs = ch_assembled_contigs.mix(ch_spadeshybrid_assemblies) - ch_versions = ch_versions.mix(METASPADESHYBRID.out.versions.first()) - } - - if (!params.skip_megahit) { - MEGAHIT(ch_short_reads_grouped) - ch_megahit_assemblies = MEGAHIT.out.contigs.map { meta, assembly -> - def meta_new = meta + [assembler: 'MEGAHIT'] - [meta_new, assembly] - } - ch_assembled_contigs = ch_assembled_contigs.mix(ch_megahit_assemblies) - ch_versions = ch_versions.mix(MEGAHIT.out.versions.first()) - } + ASSEMBLY( + ch_short_reads_assembly, + ch_long_reads + ) + ch_versions = ch_versions.mix(ASSEMBLY.out.versions) + ch_shortread_assemblies = ASSEMBLY.out.shortread_assemblies + ch_longread_assemblies = ASSEMBLY.out.longread_assemblies + ch_assemblies = ch_shortread_assemblies.mix(ch_longread_assemblies) - GUNZIP_ASSEMBLIES(ch_assembled_contigs) - ch_versions = ch_versions.mix(GUNZIP_ASSEMBLIES.out.versions) - ch_assemblies = GUNZIP_ASSEMBLIES.out.gunzip } else { ch_assemblies_split = ch_input_assemblies.branch { meta, assembly -> @@ -445,6 +331,10 @@ workflow MAG { ch_assemblies = Channel.empty() ch_assemblies = ch_assemblies.mix(ch_assemblies_split.ungzip, GUNZIP_ASSEMBLYINPUT.out.gunzip) + ch_shortread_assemblies = ch_assemblies + .filter { it[0].assembler.toUpperCase() in ['SPADES', 'SPADESHYBRID', 'MEGAHIT']} + ch_longread_assemblies = ch_assemblies + .filter { it[0].assembler.toUpperCase() in ['FLYE', 'METAMDBG']} } ch_quast_multiqc = Channel.empty() @@ -488,10 +378,14 @@ workflow MAG { if (!params.skip_binning || params.ancient_dna) { BINNING_PREPARATION( - ch_assemblies, + ch_shortread_assemblies, ch_short_reads, + ch_longread_assemblies, + ch_long_reads ) - ch_versions = ch_versions.mix(BINNING_PREPARATION.out.bowtie2_version.first()) + ch_versions = ch_versions.mix(BINNING_PREPARATION.out.versions) + + } /* @@ -826,7 +720,7 @@ workflow MAG { } if (!params.skip_binning || params.ancient_dna) { - ch_multiqc_files = ch_multiqc_files.mix(BINNING_PREPARATION.out.bowtie2_assembly_multiqc.collect().ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(BINNING_PREPARATION.out.multiqc_files.collect().ifEmpty([])) } if (!params.skip_binning && !params.skip_prokka) {