From a4093471795c5e7443cef89deed742ab64ae197c Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Wed, 28 Aug 2024 15:05:03 +0200 Subject: [PATCH 01/23] install class expanded subworkflows --- .nf-core.yml | 3 +- modules.json | 100 ++++++ .../mirpedrol/clustalo/align/environment.yml | 8 + modules/mirpedrol/clustalo/align/main.nf | 54 +++ modules/mirpedrol/clustalo/align/meta.yml | 59 ++++ .../clustalo/align/tests/main.nf.test | 34 ++ .../clustalo/align/tests/main.nf.test.snap | 31 ++ .../mirpedrol/clustalo/align/tests/tags.yml | 2 + .../clustalo/guidetree/environment.yml | 7 + modules/mirpedrol/clustalo/guidetree/main.nf | 47 +++ modules/mirpedrol/clustalo/guidetree/meta.yml | 53 +++ .../clustalo/guidetree/tests/main.nf.test | 33 ++ .../guidetree/tests/main.nf.test.snap | 23 ++ .../clustalo/guidetree/tests/tags.yml | 2 + .../clustalo/treealign/environment.yml | 8 + modules/mirpedrol/clustalo/treealign/main.nf | 56 ++++ modules/mirpedrol/clustalo/treealign/meta.yml | 68 ++++ .../clustalo/treealign/tests/main.nf.test | 48 +++ .../treealign/tests/main.nf.test.snap | 31 ++ .../clustalo/treealign/tests/tags.yml | 2 + modules/mirpedrol/famsa/align/environment.yml | 7 + modules/mirpedrol/famsa/align/main.nf | 48 +++ modules/mirpedrol/famsa/align/meta.yml | 54 +++ .../mirpedrol/famsa/align/tests/main.nf.test | 34 ++ .../famsa/align/tests/main.nf.test.snap | 31 ++ modules/mirpedrol/famsa/align/tests/tags.yml | 2 + .../mirpedrol/famsa/guidetree/environment.yml | 7 + modules/mirpedrol/famsa/guidetree/main.nf | 49 +++ modules/mirpedrol/famsa/guidetree/meta.yml | 52 +++ .../famsa/guidetree/tests/main.nf.test | 32 ++ .../famsa/guidetree/tests/main.nf.test.snap | 23 ++ .../mirpedrol/famsa/guidetree/tests/tags.yml | 2 + .../mirpedrol/famsa/treealign/environment.yml | 7 + modules/mirpedrol/famsa/treealign/main.nf | 50 +++ modules/mirpedrol/famsa/treealign/meta.yml | 63 ++++ .../famsa/treealign/tests/main.nf.test | 48 +++ .../famsa/treealign/tests/main.nf.test.snap | 31 ++ .../mirpedrol/famsa/treealign/tests/tags.yml | 2 + .../mirpedrol/kalign/align/environment.yml | 8 + modules/mirpedrol/kalign/align/main.nf | 48 +++ modules/mirpedrol/kalign/align/meta.yml | 53 +++ .../mirpedrol/kalign/align/tests/main.nf.test | 33 ++ .../kalign/align/tests/main.nf.test.snap | 60 ++++ modules/mirpedrol/kalign/align/tests/tags.yml | 2 + .../mirpedrol/learnmsa/align/environment.yml | 8 + modules/mirpedrol/learnmsa/align/main.nf | 48 +++ modules/mirpedrol/learnmsa/align/meta.yml | 53 +++ .../learnmsa/align/tests/main.nf.test | 36 ++ .../learnmsa/align/tests/main.nf.test.snap | 26 ++ .../mirpedrol/learnmsa/align/tests/tags.yml | 2 + modules/mirpedrol/mafft/environment.yml | 8 + modules/mirpedrol/mafft/main.nf | 50 +++ modules/mirpedrol/mafft/meta.yml | 59 ++++ modules/mirpedrol/mafft/tests/main.nf.test | 31 ++ .../mirpedrol/mafft/tests/main.nf.test.snap | 317 ++++++++++++++++++ modules/mirpedrol/mafft/tests/tags.yml | 2 + modules/mirpedrol/magus/align/environment.yml | 8 + modules/mirpedrol/magus/align/main.nf | 53 +++ modules/mirpedrol/magus/align/meta.yml | 53 +++ .../mirpedrol/magus/align/tests/main.nf.test | 40 +++ .../magus/align/tests/main.nf.test.snap | 14 + modules/mirpedrol/magus/align/tests/tags.yml | 2 + .../mirpedrol/magus/guidetree/environment.yml | 7 + modules/mirpedrol/magus/guidetree/main.nf | 48 +++ modules/mirpedrol/magus/guidetree/meta.yml | 51 +++ .../magus/guidetree/tests/main.nf.test | 39 +++ .../magus/guidetree/tests/main.nf.test.snap | 23 ++ .../mirpedrol/magus/guidetree/tests/tags.yml | 2 + .../mirpedrol/magus/treealign/environment.yml | 8 + modules/mirpedrol/magus/treealign/main.nf | 55 +++ modules/mirpedrol/magus/treealign/meta.yml | 65 ++++ .../magus/treealign/tests/main.nf.test | 54 +++ .../magus/treealign/tests/main.nf.test.snap | 14 + .../mirpedrol/magus/treealign/tests/tags.yml | 2 + .../mirpedrol/muscle5/super5/environment.yml | 8 + modules/mirpedrol/muscle5/super5/main.nf | 62 ++++ modules/mirpedrol/muscle5/super5/meta.yml | 60 ++++ .../muscle5/super5/tests/main.nf.test | 54 +++ .../muscle5/super5/tests/main.nf.test.snap | 65 ++++ .../muscle5/super5/tests/nextflow.config | 0 .../muscle5/super5/tests/perm_all.config | 3 + .../mirpedrol/muscle5/super5/tests/tags.yml | 2 + .../mirpedrol/tcoffee/align/environment.yml | 8 + modules/mirpedrol/tcoffee/align/main.nf | 58 ++++ modules/mirpedrol/tcoffee/align/meta.yml | 61 ++++ .../tcoffee/align/tests/main.nf.test | 33 ++ .../tcoffee/align/tests/main.nf.test.snap | 31 ++ .../mirpedrol/tcoffee/align/tests/tags.yml | 2 + .../tcoffee/treealign/environment.yml | 8 + modules/mirpedrol/tcoffee/treealign/main.nf | 60 ++++ modules/mirpedrol/tcoffee/treealign/meta.yml | 70 ++++ .../tcoffee/treealign/tests/main.nf.test | 51 +++ .../tcoffee/treealign/tests/main.nf.test.snap | 31 ++ .../tcoffee/treealign/tests/tags.yml | 2 + subworkflows/mirpedrol/msa_alignment/main.nf | 35 ++ subworkflows/mirpedrol/msa_alignment/meta.yml | 39 +++ .../msa_alignment/tests/main.nf.test | 193 +++++++++++ .../msa_alignment/tests/main.nf.test.snap | 224 +++++++++++++ subworkflows/mirpedrol/msa_guidetree/main.nf | 25 ++ subworkflows/mirpedrol/msa_guidetree/meta.yml | 34 ++ .../msa_guidetree/tests/main.nf.test | 81 +++++ .../msa_guidetree/tests/main.nf.test.snap | 80 +++++ subworkflows/mirpedrol/msa_treealign/main.nf | 29 ++ subworkflows/mirpedrol/msa_treealign/meta.yml | 67 ++++ .../msa_treealign/tests/main.nf.test | 172 ++++++++++ .../msa_treealign/tests/main.nf.test.snap | 101 ++++++ 106 files changed, 4341 insertions(+), 1 deletion(-) create mode 100644 modules/mirpedrol/clustalo/align/environment.yml create mode 100644 modules/mirpedrol/clustalo/align/main.nf create mode 100644 modules/mirpedrol/clustalo/align/meta.yml create mode 100644 modules/mirpedrol/clustalo/align/tests/main.nf.test create mode 100644 modules/mirpedrol/clustalo/align/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/clustalo/align/tests/tags.yml create mode 100644 modules/mirpedrol/clustalo/guidetree/environment.yml create mode 100644 modules/mirpedrol/clustalo/guidetree/main.nf create mode 100644 modules/mirpedrol/clustalo/guidetree/meta.yml create mode 100644 modules/mirpedrol/clustalo/guidetree/tests/main.nf.test create mode 100644 modules/mirpedrol/clustalo/guidetree/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/clustalo/guidetree/tests/tags.yml create mode 100644 modules/mirpedrol/clustalo/treealign/environment.yml create mode 100644 modules/mirpedrol/clustalo/treealign/main.nf create mode 100644 modules/mirpedrol/clustalo/treealign/meta.yml create mode 100644 modules/mirpedrol/clustalo/treealign/tests/main.nf.test create mode 100644 modules/mirpedrol/clustalo/treealign/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/clustalo/treealign/tests/tags.yml create mode 100644 modules/mirpedrol/famsa/align/environment.yml create mode 100644 modules/mirpedrol/famsa/align/main.nf create mode 100644 modules/mirpedrol/famsa/align/meta.yml create mode 100644 modules/mirpedrol/famsa/align/tests/main.nf.test create mode 100644 modules/mirpedrol/famsa/align/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/famsa/align/tests/tags.yml create mode 100644 modules/mirpedrol/famsa/guidetree/environment.yml create mode 100644 modules/mirpedrol/famsa/guidetree/main.nf create mode 100644 modules/mirpedrol/famsa/guidetree/meta.yml create mode 100644 modules/mirpedrol/famsa/guidetree/tests/main.nf.test create mode 100644 modules/mirpedrol/famsa/guidetree/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/famsa/guidetree/tests/tags.yml create mode 100644 modules/mirpedrol/famsa/treealign/environment.yml create mode 100644 modules/mirpedrol/famsa/treealign/main.nf create mode 100644 modules/mirpedrol/famsa/treealign/meta.yml create mode 100644 modules/mirpedrol/famsa/treealign/tests/main.nf.test create mode 100644 modules/mirpedrol/famsa/treealign/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/famsa/treealign/tests/tags.yml create mode 100644 modules/mirpedrol/kalign/align/environment.yml create mode 100644 modules/mirpedrol/kalign/align/main.nf create mode 100644 modules/mirpedrol/kalign/align/meta.yml create mode 100644 modules/mirpedrol/kalign/align/tests/main.nf.test create mode 100644 modules/mirpedrol/kalign/align/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/kalign/align/tests/tags.yml create mode 100644 modules/mirpedrol/learnmsa/align/environment.yml create mode 100644 modules/mirpedrol/learnmsa/align/main.nf create mode 100644 modules/mirpedrol/learnmsa/align/meta.yml create mode 100644 modules/mirpedrol/learnmsa/align/tests/main.nf.test create mode 100644 modules/mirpedrol/learnmsa/align/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/learnmsa/align/tests/tags.yml create mode 100644 modules/mirpedrol/mafft/environment.yml create mode 100644 modules/mirpedrol/mafft/main.nf create mode 100644 modules/mirpedrol/mafft/meta.yml create mode 100644 modules/mirpedrol/mafft/tests/main.nf.test create mode 100644 modules/mirpedrol/mafft/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/mafft/tests/tags.yml create mode 100644 modules/mirpedrol/magus/align/environment.yml create mode 100644 modules/mirpedrol/magus/align/main.nf create mode 100644 modules/mirpedrol/magus/align/meta.yml create mode 100644 modules/mirpedrol/magus/align/tests/main.nf.test create mode 100644 modules/mirpedrol/magus/align/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/magus/align/tests/tags.yml create mode 100644 modules/mirpedrol/magus/guidetree/environment.yml create mode 100644 modules/mirpedrol/magus/guidetree/main.nf create mode 100644 modules/mirpedrol/magus/guidetree/meta.yml create mode 100644 modules/mirpedrol/magus/guidetree/tests/main.nf.test create mode 100644 modules/mirpedrol/magus/guidetree/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/magus/guidetree/tests/tags.yml create mode 100644 modules/mirpedrol/magus/treealign/environment.yml create mode 100644 modules/mirpedrol/magus/treealign/main.nf create mode 100644 modules/mirpedrol/magus/treealign/meta.yml create mode 100644 modules/mirpedrol/magus/treealign/tests/main.nf.test create mode 100644 modules/mirpedrol/magus/treealign/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/magus/treealign/tests/tags.yml create mode 100644 modules/mirpedrol/muscle5/super5/environment.yml create mode 100644 modules/mirpedrol/muscle5/super5/main.nf create mode 100644 modules/mirpedrol/muscle5/super5/meta.yml create mode 100644 modules/mirpedrol/muscle5/super5/tests/main.nf.test create mode 100644 modules/mirpedrol/muscle5/super5/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/muscle5/super5/tests/nextflow.config create mode 100644 modules/mirpedrol/muscle5/super5/tests/perm_all.config create mode 100644 modules/mirpedrol/muscle5/super5/tests/tags.yml create mode 100644 modules/mirpedrol/tcoffee/align/environment.yml create mode 100644 modules/mirpedrol/tcoffee/align/main.nf create mode 100644 modules/mirpedrol/tcoffee/align/meta.yml create mode 100644 modules/mirpedrol/tcoffee/align/tests/main.nf.test create mode 100644 modules/mirpedrol/tcoffee/align/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/tcoffee/align/tests/tags.yml create mode 100644 modules/mirpedrol/tcoffee/treealign/environment.yml create mode 100644 modules/mirpedrol/tcoffee/treealign/main.nf create mode 100644 modules/mirpedrol/tcoffee/treealign/meta.yml create mode 100644 modules/mirpedrol/tcoffee/treealign/tests/main.nf.test create mode 100644 modules/mirpedrol/tcoffee/treealign/tests/main.nf.test.snap create mode 100644 modules/mirpedrol/tcoffee/treealign/tests/tags.yml create mode 100644 subworkflows/mirpedrol/msa_alignment/main.nf create mode 100644 subworkflows/mirpedrol/msa_alignment/meta.yml create mode 100644 subworkflows/mirpedrol/msa_alignment/tests/main.nf.test create mode 100644 subworkflows/mirpedrol/msa_alignment/tests/main.nf.test.snap create mode 100644 subworkflows/mirpedrol/msa_guidetree/main.nf create mode 100644 subworkflows/mirpedrol/msa_guidetree/meta.yml create mode 100644 subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test create mode 100644 subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test.snap create mode 100644 subworkflows/mirpedrol/msa_treealign/main.nf create mode 100644 subworkflows/mirpedrol/msa_treealign/meta.yml create mode 100644 subworkflows/mirpedrol/msa_treealign/tests/main.nf.test create mode 100644 subworkflows/mirpedrol/msa_treealign/tests/main.nf.test.snap diff --git a/.nf-core.yml b/.nf-core.yml index e164e770..8dcac511 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -2,4 +2,5 @@ repository_type: pipeline nf_core_version: "2.14.1" lint: multiqc_config: False - files_exist: conf/igenomes.config + files_exist: + - conf/igenomes.config diff --git a/modules.json b/modules.json index b8e484f5..59908138 100644 --- a/modules.json +++ b/modules.json @@ -2,6 +2,106 @@ "name": "nf-core/multiplesequencealign", "homePage": "https://github.com/nf-core/multiplesequencealign", "repos": { + "https://github.com/mirpedrol/class-modules.git": { + "modules": { + "mirpedrol": { + "clustalo/align": { + "branch": "main", + "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", + "installed_by": ["msa_alignment"] + }, + "clustalo/guidetree": { + "branch": "main", + "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", + "installed_by": ["msa_guidetree"] + }, + "clustalo/treealign": { + "branch": "main", + "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", + "installed_by": ["msa_treealign"] + }, + "famsa/align": { + "branch": "main", + "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", + "installed_by": ["msa_alignment"] + }, + "famsa/guidetree": { + "branch": "main", + "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", + "installed_by": ["msa_guidetree"] + }, + "famsa/treealign": { + "branch": "main", + "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", + "installed_by": ["msa_treealign"] + }, + "kalign/align": { + "branch": "main", + "git_sha": "caf37f3ee943a8101000b25ba502f038f8bfeb87", + "installed_by": ["msa_alignment"] + }, + "learnmsa/align": { + "branch": "main", + "git_sha": "caf37f3ee943a8101000b25ba502f038f8bfeb87", + "installed_by": ["msa_alignment"] + }, + "mafft": { + "branch": "main", + "git_sha": "caf37f3ee943a8101000b25ba502f038f8bfeb87", + "installed_by": ["msa_alignment"] + }, + "magus/align": { + "branch": "main", + "git_sha": "caf37f3ee943a8101000b25ba502f038f8bfeb87", + "installed_by": ["msa_alignment"] + }, + "magus/guidetree": { + "branch": "main", + "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", + "installed_by": ["msa_guidetree"] + }, + "magus/treealign": { + "branch": "main", + "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", + "installed_by": ["msa_treealign"] + }, + "muscle5/super5": { + "branch": "main", + "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", + "installed_by": ["msa_alignment"] + }, + "tcoffee/align": { + "branch": "main", + "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", + "installed_by": ["msa_alignment"] + }, + "tcoffee/treealign": { + "branch": "main", + "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", + "installed_by": ["msa_treealign"] + } + } + }, + "subworkflows": { + "mirpedrol": { + "msa_alignment": { + "branch": "main", + "git_sha": "da5b7cd83d0a060b0b5343671ab552ee5b7c8aa9", + "installed_by": ["subworkflows"] + }, + "msa_guidetree": { + "branch": "main", + "git_sha": "da5b7cd83d0a060b0b5343671ab552ee5b7c8aa9", + "installed_by": ["subworkflows"] + }, + "msa_treealign": { + "branch": "main", + "git_sha": "0da6e13e2cade9d530dcf731a3f281998f72b5d1", + "installed_by": ["subworkflows"] + } + } + } + }, "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { diff --git a/modules/mirpedrol/clustalo/align/environment.yml b/modules/mirpedrol/clustalo/align/environment.yml new file mode 100644 index 00000000..be1eef95 --- /dev/null +++ b/modules/mirpedrol/clustalo/align/environment.yml @@ -0,0 +1,8 @@ +name: clustalo_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::clustalo=1.2.4 + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/clustalo/align/main.nf b/modules/mirpedrol/clustalo/align/main.nf new file mode 100644 index 00000000..55a1113b --- /dev/null +++ b/modules/mirpedrol/clustalo/align/main.nf @@ -0,0 +1,54 @@ +process CLUSTALO_ALIGN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-4cefc38542f86c17596c29b35a059de10387c6a7:adbe4fbad680f9beb083956d79128039a727e7b3-0': + 'biocontainers/mulled-v2-4cefc38542f86c17596c29b35a059de10387c6a7:adbe4fbad680f9beb083956d79128039a727e7b3-0' }" + + input: + tuple val(meta) , path(fasta) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // using >() is necessary to preserve the return value, + // so nextflow knows to display an error when it failed + // the --force -o is necessary, as clustalo expands the commandline input, + // causing it to treat the pipe as a parameter and fail + // this way, the command expands to /dev/fd/, and --force allows writing output to an already existing file + """ + clustalo \ + -i ${fasta} \ + --threads=${task.cpus} \ + $args \ + --force -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clustalo: \$( clustalo --version ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clustalo: \$( clustalo --version ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/clustalo/align/meta.yml b/modules/mirpedrol/clustalo/align/meta.yml new file mode 100644 index 00000000..910d0395 --- /dev/null +++ b/modules/mirpedrol/clustalo/align/meta.yml @@ -0,0 +1,59 @@ +name: "clustalo_align" +description: Align sequences using Clustal Omega +keywords: + - alignment + - MSA + - msa + - align + - genomics +tools: + - "clustalo": + description: "Latest version of Clustal: a multiple sequence alignment program + for DNA or proteins" + homepage: "http://www.clustal.org/omega/" + documentation: "http://www.clustal.org/omega/" + tool_dev_url: "http://www.clustal.org/omega/" + doi: "10.1038/msb.2011.75" + licence: ["GPL v2"] + identifier: "biotools:clustalo" + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.aln.gz": + type: file + description: Alignment file, in gzipped fasta format + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@joseespinosa" +maintainers: + - "@luisas" + - "@joseespinosa" + - "@lrauschning" diff --git a/modules/mirpedrol/clustalo/align/tests/main.nf.test b/modules/mirpedrol/clustalo/align/tests/main.nf.test new file mode 100644 index 00000000..b26d19a7 --- /dev/null +++ b/modules/mirpedrol/clustalo/align/tests/main.nf.test @@ -0,0 +1,34 @@ +nextflow_process { + + name "Test Process CLUSTALO_ALIGN" + script "../main.nf" + process "CLUSTALO_ALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "clustalo" + tag "clustalo/align" + + + test("sarscov2 - contigs-fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("alignment ")}, + { assert snapshot(process.out.versions).match("versions1") } + ) + } + + } +} \ No newline at end of file diff --git a/modules/mirpedrol/clustalo/align/tests/main.nf.test.snap b/modules/mirpedrol/clustalo/align/tests/main.nf.test.snap new file mode 100644 index 00000000..b92811bf --- /dev/null +++ b/modules/mirpedrol/clustalo/align/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "versions1": { + "content": [ + [ + "versions.yml:md5,327da6a4250a6b7c4e45cddaa1f56280" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T13:15:10.541265" + }, + "alignment ": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T13:15:10.531416" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/clustalo/align/tests/tags.yml b/modules/mirpedrol/clustalo/align/tests/tags.yml new file mode 100644 index 00000000..3dd9c9c0 --- /dev/null +++ b/modules/mirpedrol/clustalo/align/tests/tags.yml @@ -0,0 +1,2 @@ +clustalo/align: + - "modules/mirpedrol/clustalo/align/**" diff --git a/modules/mirpedrol/clustalo/guidetree/environment.yml b/modules/mirpedrol/clustalo/guidetree/environment.yml new file mode 100644 index 00000000..38b2f5b9 --- /dev/null +++ b/modules/mirpedrol/clustalo/guidetree/environment.yml @@ -0,0 +1,7 @@ +name: clustalo_guidetree +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::clustalo=1.2.4 diff --git a/modules/mirpedrol/clustalo/guidetree/main.nf b/modules/mirpedrol/clustalo/guidetree/main.nf new file mode 100644 index 00000000..b94f2aa6 --- /dev/null +++ b/modules/mirpedrol/clustalo/guidetree/main.nf @@ -0,0 +1,47 @@ +process CLUSTALO_GUIDETREE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/clustalo:1.2.4--h87f3376_5': + 'biocontainers/clustalo:1.2.4--h87f3376_5' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.dnd"), emit: tree + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + clustalo \\ + -i ${fasta} \\ + --guidetree-out ${prefix}.dnd \\ + --threads=${task.cpus} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clustalo: \$( clustalo --version ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.dnd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clustalo: \$( clustalo --version ) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/clustalo/guidetree/meta.yml b/modules/mirpedrol/clustalo/guidetree/meta.yml new file mode 100644 index 00000000..acc0c553 --- /dev/null +++ b/modules/mirpedrol/clustalo/guidetree/meta.yml @@ -0,0 +1,53 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/modules/yaml-schema.json +name: "clustalo_guidetree" +description: Renders a guidetree in clustalo +keywords: + - guide tree + - msa + - newick + - align + - guidetree +tools: + - "clustalo": + description: "Latest version of Clustal: a multiple sequence alignment program + for DNA or proteins" + homepage: "http://www.clustal.org/omega/" + documentation: "http://www.clustal.org/omega/" + tool_dev_url: "http://www.clustal.org/omega/" + doi: "10.1038/msb.2011.75" + licence: ["GPL v2"] + identifier: "biotools:clustalo" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - tree: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.dnd": + type: file + description: Guide tree file in Newick format + pattern: "*.{dnd}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@JoseEspinosa" +maintainers: + - "@luisas" + - "@JoseEspinosa" diff --git a/modules/mirpedrol/clustalo/guidetree/tests/main.nf.test b/modules/mirpedrol/clustalo/guidetree/tests/main.nf.test new file mode 100644 index 00000000..4726492c --- /dev/null +++ b/modules/mirpedrol/clustalo/guidetree/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process CLUSTALO_GUIDETREE" + script "../main.nf" + process "CLUSTALO_GUIDETREE" + + tag "modules" + tag "modules_mirpedrol" + tag "clustalo" + tag "clustalo/guidetree" + + test("sarscov2 - contigs-fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.tree).match("tree")}, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + +} diff --git a/modules/mirpedrol/clustalo/guidetree/tests/main.nf.test.snap b/modules/mirpedrol/clustalo/guidetree/tests/main.nf.test.snap new file mode 100644 index 00000000..6e3fdfc1 --- /dev/null +++ b/modules/mirpedrol/clustalo/guidetree/tests/main.nf.test.snap @@ -0,0 +1,23 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,64796b9beb7201a42b2c78cbdad51049" + ] + ], + "timestamp": "2023-11-27T22:49:13.44908228" + }, + "tree": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.dnd:md5,5428bad500a0a0bd985744bec1a12a70" + ] + ] + ], + "timestamp": "2023-11-27T22:49:13.43743393" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/clustalo/guidetree/tests/tags.yml b/modules/mirpedrol/clustalo/guidetree/tests/tags.yml new file mode 100644 index 00000000..c666305f --- /dev/null +++ b/modules/mirpedrol/clustalo/guidetree/tests/tags.yml @@ -0,0 +1,2 @@ +clustalo/guidetree: + - "modules/mirpedrol/clustalo/guidetree/**" diff --git a/modules/mirpedrol/clustalo/treealign/environment.yml b/modules/mirpedrol/clustalo/treealign/environment.yml new file mode 100644 index 00000000..9226c3ac --- /dev/null +++ b/modules/mirpedrol/clustalo/treealign/environment.yml @@ -0,0 +1,8 @@ +name: clustalo_treealign +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::clustalo=1.2.4 + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/clustalo/treealign/main.nf b/modules/mirpedrol/clustalo/treealign/main.nf new file mode 100644 index 00000000..cfa9c117 --- /dev/null +++ b/modules/mirpedrol/clustalo/treealign/main.nf @@ -0,0 +1,56 @@ +process CLUSTALO_TREEALIGN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-4cefc38542f86c17596c29b35a059de10387c6a7:adbe4fbad680f9beb083956d79128039a727e7b3-0': + 'biocontainers/mulled-v2-4cefc38542f86c17596c29b35a059de10387c6a7:adbe4fbad680f9beb083956d79128039a727e7b3-0' }" + + input: + tuple val(meta) , path(fasta) + tuple val(meta2), path(tree) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // using >() is necessary to preserve the return value, + // so nextflow knows to display an error when it failed + // the --force -o is necessary, as clustalo expands the commandline input, + // causing it to treat the pipe as a parameter and fail + // this way, the command expands to /dev/fd/, and --force allows writing output to an already existing file + """ + clustalo \ + -i ${fasta} \ + --guidetree-in=${tree} \ + --threads=${task.cpus} \ + $args \ + --force -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clustalo: \$( clustalo --version ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clustalo: \$( clustalo --version ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/clustalo/treealign/meta.yml b/modules/mirpedrol/clustalo/treealign/meta.yml new file mode 100644 index 00000000..8058fe74 --- /dev/null +++ b/modules/mirpedrol/clustalo/treealign/meta.yml @@ -0,0 +1,68 @@ +name: "clustalo_treealign" +description: Align sequences using Clustal Omega +keywords: + - alignment + - treealignment + - MSA + - msa + - genomics +tools: + - "clustalo": + description: "Latest version of Clustal: a multiple sequence alignment program + for DNA or proteins" + homepage: "http://www.clustal.org/omega/" + documentation: "http://www.clustal.org/omega/" + tool_dev_url: "http://www.clustal.org/omega/" + doi: "10.1038/msb.2011.75" + licence: ["GPL v2"] + identifier: "biotools:clustalo" + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 + - - meta2: + type: map + description: | + Groovy Map containing tree information + e.g. `[ id:'test_tree']` + - tree: + type: file + description: Input guide tree in Newick format + pattern: "*.{dnd}" +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.aln.gz": + type: file + description: Alignment file, in gzipped fasta format + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@joseespinosa" +maintainers: + - "@luisas" + - "@joseespinosa" + - "@lrauschning" diff --git a/modules/mirpedrol/clustalo/treealign/tests/main.nf.test b/modules/mirpedrol/clustalo/treealign/tests/main.nf.test new file mode 100644 index 00000000..ea916135 --- /dev/null +++ b/modules/mirpedrol/clustalo/treealign/tests/main.nf.test @@ -0,0 +1,48 @@ +nextflow_process { + + name "Test Process CLUSTALO_TREEALIGN" + script "../main.nf" + process "CLUSTALO_TREEALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "clustalo" + tag "clustalo/treealign" + tag "clustalo/guidetree" + + test("sarscov2 - contigs-fasta - guide_tree") { + + setup { + + run("CLUSTALO_GUIDETREE") { + script "../../guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + input[1] = CLUSTALO_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("with_guide_tree_alignment")}, + { assert snapshot(process.out.versions).match("with_guide_tree_versions") } + ) + } + } +} \ No newline at end of file diff --git a/modules/mirpedrol/clustalo/treealign/tests/main.nf.test.snap b/modules/mirpedrol/clustalo/treealign/tests/main.nf.test.snap new file mode 100644 index 00000000..16e80fed --- /dev/null +++ b/modules/mirpedrol/clustalo/treealign/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "with_guide_tree_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-02-09T19:40:45.057777867" + }, + "with_guide_tree_versions": { + "content": [ + [ + "versions.yml:md5,b825152229a974c6cfc6d826db883cb4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T13:29:28.254709" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/clustalo/treealign/tests/tags.yml b/modules/mirpedrol/clustalo/treealign/tests/tags.yml new file mode 100644 index 00000000..3dd9c9c0 --- /dev/null +++ b/modules/mirpedrol/clustalo/treealign/tests/tags.yml @@ -0,0 +1,2 @@ +clustalo/align: + - "modules/mirpedrol/clustalo/align/**" diff --git a/modules/mirpedrol/famsa/align/environment.yml b/modules/mirpedrol/famsa/align/environment.yml new file mode 100644 index 00000000..c41cda2a --- /dev/null +++ b/modules/mirpedrol/famsa/align/environment.yml @@ -0,0 +1,7 @@ +name: famsa_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::famsa=2.2.2 diff --git a/modules/mirpedrol/famsa/align/main.nf b/modules/mirpedrol/famsa/align/main.nf new file mode 100644 index 00000000..ba7c0eb1 --- /dev/null +++ b/modules/mirpedrol/famsa/align/main.nf @@ -0,0 +1,48 @@ + + +process FAMSA_ALIGN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/famsa:2.2.2--h9f5acd7_0': + 'biocontainers/famsa:2.2.2--h9f5acd7_0' }" + + input: + tuple val(meta) , path(fasta) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + famsa -gz \\ + $args \\ + -t ${task.cpus} \\ + ${fasta} \\ + ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + famsa: \$( famsa -help 2>&1 | head -n 2 | tail -n 1 | sed 's/ version //g' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + famsa: \$( famsa -help 2>&1 | head -n 2 | tail -n 1 | sed 's/ version //g' ) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/famsa/align/meta.yml b/modules/mirpedrol/famsa/align/meta.yml new file mode 100644 index 00000000..7c7aa406 --- /dev/null +++ b/modules/mirpedrol/famsa/align/meta.yml @@ -0,0 +1,54 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/modules/yaml-schema.json +name: "famsa_align" +description: Aligns sequences using FAMSA +keywords: + - alignment + - MSA + - genomics + - msa + - align +tools: + - "famsa": + description: "Algorithm for large-scale multiple sequence alignments" + homepage: "https://github.com/refresh-bio/FAMSA" + documentation: "https://github.com/refresh-bio/FAMSA" + tool_dev_url: "https://github.com/refresh-bio/FAMSA" + doi: "10.1038/srep33964" + licence: ["GPL v3"] + identifier: biotools:famsa +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.aln.gz": + type: file + description: Alignment file, in FASTA format. + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@JoseEspinosa" +maintainers: + - "@luisas" + - "@JoseEspinosa" diff --git a/modules/mirpedrol/famsa/align/tests/main.nf.test b/modules/mirpedrol/famsa/align/tests/main.nf.test new file mode 100644 index 00000000..89154377 --- /dev/null +++ b/modules/mirpedrol/famsa/align/tests/main.nf.test @@ -0,0 +1,34 @@ +nextflow_process { + + name "Test Process FAMSA_ALIGN" + script "../main.nf" + process "FAMSA_ALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "famsa" + tag "famsa/align" + + + test("sarscov2 - fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("alignment")}, + { assert snapshot(process.out.versions).match("versions1") } + ) + } + + } +} \ No newline at end of file diff --git a/modules/mirpedrol/famsa/align/tests/main.nf.test.snap b/modules/mirpedrol/famsa/align/tests/main.nf.test.snap new file mode 100644 index 00000000..3f144f6e --- /dev/null +++ b/modules/mirpedrol/famsa/align/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "versions1": { + "content": [ + [ + "versions.yml:md5,7d9e0a8c263fa6d9017075fe88c9e9dc" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:06:05.094484" + }, + "alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,7cf7375f2ba360814ea978731838b972" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:06:05.047249" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/famsa/align/tests/tags.yml b/modules/mirpedrol/famsa/align/tests/tags.yml new file mode 100644 index 00000000..6944f882 --- /dev/null +++ b/modules/mirpedrol/famsa/align/tests/tags.yml @@ -0,0 +1,2 @@ +famsa/align: + - "modules/mirpedrol/famsa/align/**" diff --git a/modules/mirpedrol/famsa/guidetree/environment.yml b/modules/mirpedrol/famsa/guidetree/environment.yml new file mode 100644 index 00000000..28be1c7f --- /dev/null +++ b/modules/mirpedrol/famsa/guidetree/environment.yml @@ -0,0 +1,7 @@ +name: famsa_guidetree +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::famsa=2.2.2 diff --git a/modules/mirpedrol/famsa/guidetree/main.nf b/modules/mirpedrol/famsa/guidetree/main.nf new file mode 100644 index 00000000..7d8f46cd --- /dev/null +++ b/modules/mirpedrol/famsa/guidetree/main.nf @@ -0,0 +1,49 @@ + +process FAMSA_GUIDETREE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/famsa:2.2.2--h9f5acd7_0': + 'biocontainers/famsa:2.2.2--h9f5acd7_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.dnd"), emit: tree + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + famsa -gt_export \\ + $args \\ + -t ${task.cpus} \\ + ${fasta} \\ + ${prefix}.dnd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + famsa: \$( famsa -help 2>&1 | head -n 2 | tail -n 1 | sed 's/ version //g' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.dnd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + famsa: \$( famsa -help 2>&1 | head -n 2 | tail -n 1 | sed 's/ version //g' ) + END_VERSIONS + """ +} + diff --git a/modules/mirpedrol/famsa/guidetree/meta.yml b/modules/mirpedrol/famsa/guidetree/meta.yml new file mode 100644 index 00000000..61924961 --- /dev/null +++ b/modules/mirpedrol/famsa/guidetree/meta.yml @@ -0,0 +1,52 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/modules/yaml-schema.json +name: "famsa_guidetree" +description: Renders a guidetree in famsa +keywords: + - guide tree + - msa + - newick + - align + - guidetree +tools: + - "famsa": + description: "Algorithm for large-scale multiple sequence alignments" + homepage: "https://github.com/refresh-bio/FAMSA" + documentation: "https://github.com/refresh-bio/FAMSA" + tool_dev_url: "https://github.com/refresh-bio/FAMSA" + doi: "10.1038/srep33964" + licence: ["GPL v3"] + identifier: biotools:famsa +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - tree: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.dnd": + type: file + description: Guide tree file in Newick format + pattern: "*.{dnd}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@JoseEspinosa" +maintainers: + - "@luisas" + - "@JoseEspinosa" diff --git a/modules/mirpedrol/famsa/guidetree/tests/main.nf.test b/modules/mirpedrol/famsa/guidetree/tests/main.nf.test new file mode 100644 index 00000000..d3c944fe --- /dev/null +++ b/modules/mirpedrol/famsa/guidetree/tests/main.nf.test @@ -0,0 +1,32 @@ +nextflow_process { + + name "Test Process FAMSA_GUIDETREE" + script "../main.nf" + process "FAMSA_GUIDETREE" + + tag "modules" + tag "modules_mirpedrol" + tag "famsa" + tag "famsa/guidetree" + + test("sarscov2 - fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.tree).match("tree")}, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} diff --git a/modules/mirpedrol/famsa/guidetree/tests/main.nf.test.snap b/modules/mirpedrol/famsa/guidetree/tests/main.nf.test.snap new file mode 100644 index 00000000..00a049d1 --- /dev/null +++ b/modules/mirpedrol/famsa/guidetree/tests/main.nf.test.snap @@ -0,0 +1,23 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,320ce01bcb255b03ef5125755bf95195" + ] + ], + "timestamp": "2023-11-29T12:12:38.870544616" + }, + "tree": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.dnd:md5,f3ef8b16a7a16cb4548942ebf2e7bad6" + ] + ] + ], + "timestamp": "2023-11-29T12:12:38.855536268" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/famsa/guidetree/tests/tags.yml b/modules/mirpedrol/famsa/guidetree/tests/tags.yml new file mode 100644 index 00000000..1bb93661 --- /dev/null +++ b/modules/mirpedrol/famsa/guidetree/tests/tags.yml @@ -0,0 +1,2 @@ +famsa/guidetree: + - "modules/mirpedrol/famsa/guidetree/**" diff --git a/modules/mirpedrol/famsa/treealign/environment.yml b/modules/mirpedrol/famsa/treealign/environment.yml new file mode 100644 index 00000000..3f200c75 --- /dev/null +++ b/modules/mirpedrol/famsa/treealign/environment.yml @@ -0,0 +1,7 @@ +name: famsa_treealign +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::famsa=2.2.2 diff --git a/modules/mirpedrol/famsa/treealign/main.nf b/modules/mirpedrol/famsa/treealign/main.nf new file mode 100644 index 00000000..6d1fd80a --- /dev/null +++ b/modules/mirpedrol/famsa/treealign/main.nf @@ -0,0 +1,50 @@ + + +process FAMSA_TREEALIGN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/famsa:2.2.2--h9f5acd7_0': + 'biocontainers/famsa:2.2.2--h9f5acd7_0' }" + + input: + tuple val(meta) , path(fasta) + tuple val(meta2), path(tree) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + famsa -gt import $tree \\ + -gz \\ + $args \\ + -t ${task.cpus} \\ + ${fasta} \\ + ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + famsa: \$( famsa -help 2>&1 | head -n 2 | tail -n 1 | sed 's/ version //g' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + famsa: \$( famsa -help 2>&1 | head -n 2 | tail -n 1 | sed 's/ version //g' ) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/famsa/treealign/meta.yml b/modules/mirpedrol/famsa/treealign/meta.yml new file mode 100644 index 00000000..45eafde8 --- /dev/null +++ b/modules/mirpedrol/famsa/treealign/meta.yml @@ -0,0 +1,63 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/modules/yaml-schema.json +name: "famsa_treealign" +description: Aligns sequences using FAMSA +keywords: + - alignment + - treealignment + - MSA + - msa + - genomics +tools: + - "famsa": + description: "Algorithm for large-scale multiple sequence alignments" + homepage: "https://github.com/refresh-bio/FAMSA" + documentation: "https://github.com/refresh-bio/FAMSA" + tool_dev_url: "https://github.com/refresh-bio/FAMSA" + doi: "10.1038/srep33964" + licence: ["GPL v3"] + identifier: biotools:famsa +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 + - - meta2: + type: map + description: | + Groovy Map containing tree information + e.g. `[ id:'test_tree']` + - tree: + type: file + description: Input guide tree in Newick format + pattern: "*.{dnd}" +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.aln.gz": + type: file + description: Alignment file, in FASTA format. + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@JoseEspinosa" +maintainers: + - "@luisas" + - "@JoseEspinosa" diff --git a/modules/mirpedrol/famsa/treealign/tests/main.nf.test b/modules/mirpedrol/famsa/treealign/tests/main.nf.test new file mode 100644 index 00000000..d9ef2ad0 --- /dev/null +++ b/modules/mirpedrol/famsa/treealign/tests/main.nf.test @@ -0,0 +1,48 @@ +nextflow_process { + + name "Test Process FAMSA_TREEALIGN" + script "../main.nf" + process "FAMSA_TREEALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "famsa" + tag "famsa/treealign" + tag "famsa/guidetree" + + + test("sarscov2 - fasta - guide_tree") { + + setup { + run("FAMSA_GUIDETREE") { + script "../../guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("with_guide_tree_alignment")}, + { assert snapshot(process.out.versions).match("with_guide_tree_versions") } + ) + } + } +} \ No newline at end of file diff --git a/modules/mirpedrol/famsa/treealign/tests/main.nf.test.snap b/modules/mirpedrol/famsa/treealign/tests/main.nf.test.snap new file mode 100644 index 00000000..cc750f4b --- /dev/null +++ b/modules/mirpedrol/famsa/treealign/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "with_guide_tree_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,7cf7375f2ba360814ea978731838b972" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-02-09T19:10:05.167368314" + }, + "with_guide_tree_versions": { + "content": [ + [ + "versions.yml:md5,7b4c829b2d9a9fc6e805c06d432998cf" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T14:20:45.346455" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/famsa/treealign/tests/tags.yml b/modules/mirpedrol/famsa/treealign/tests/tags.yml new file mode 100644 index 00000000..6944f882 --- /dev/null +++ b/modules/mirpedrol/famsa/treealign/tests/tags.yml @@ -0,0 +1,2 @@ +famsa/align: + - "modules/mirpedrol/famsa/align/**" diff --git a/modules/mirpedrol/kalign/align/environment.yml b/modules/mirpedrol/kalign/align/environment.yml new file mode 100644 index 00000000..93563eae --- /dev/null +++ b/modules/mirpedrol/kalign/align/environment.yml @@ -0,0 +1,8 @@ +name: kalign_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::kalign3=3.4.0 + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/kalign/align/main.nf b/modules/mirpedrol/kalign/align/main.nf new file mode 100644 index 00000000..014f5216 --- /dev/null +++ b/modules/mirpedrol/kalign/align/main.nf @@ -0,0 +1,48 @@ +process KALIGN_ALIGN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-5cd0277547c6b33133225c8ce14c0cf2a4396ea2:0a70b6d89a3e06fbdc4a735461e8b98ff32ee5de-0': + 'biocontainers/mulled-v2-5cd0277547c6b33133225c8ce14c0cf2a4396ea2:0a70b6d89a3e06fbdc4a735461e8b98ff32ee5de-0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + unpigz -cdf $fasta | \\ + kalign \\ + $args \\ + -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kalign: \$(echo \$(kalign -v) | sed 's/kalign //g' ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kalign : \$(echo \$(kalign -v) | sed 's/kalign //g' ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/kalign/align/meta.yml b/modules/mirpedrol/kalign/align/meta.yml new file mode 100644 index 00000000..35381d4e --- /dev/null +++ b/modules/mirpedrol/kalign/align/meta.yml @@ -0,0 +1,53 @@ +name: "kalign_align" +description: "Aligns sequences using kalign" +keywords: + - alignment + - MSA + - genomics + - msa + - align +tools: + - "kalign": + description: "Kalign is a fast and accurate multiple sequence alignment algorithm." + homepage: "https://msa.sbc.su.se/cgi-bin/msa.cgi" + documentation: "https://github.com/TimoLassmann/kalign" + tool_dev_url: "https://github.com/TimoLassmann/kalign" + doi: "10.1093/bioinformatics/btz795" + licence: ["GPL v3"] + identifier: "biotools:kalign" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format. May be gzipped or uncompressed. + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.aln.gz": + type: file + description: Alignment file. + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@JoseEspinosa" +maintainers: + - "@luisas" + - "@JoseEspinosa" diff --git a/modules/mirpedrol/kalign/align/tests/main.nf.test b/modules/mirpedrol/kalign/align/tests/main.nf.test new file mode 100644 index 00000000..569aa6d0 --- /dev/null +++ b/modules/mirpedrol/kalign/align/tests/main.nf.test @@ -0,0 +1,33 @@ +// nf-core modules test kalign/align +nextflow_process { + + name "Test Process KALIGN_ALIGN" + script "../main.nf" + process "KALIGN_ALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "kalign" + tag "kalign/align" + + + test("sarscov2 - fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta")}, + ) + } + } +} \ No newline at end of file diff --git a/modules/mirpedrol/kalign/align/tests/main.nf.test.snap b/modules/mirpedrol/kalign/align/tests/main.nf.test.snap new file mode 100644 index 00000000..da6fc94c --- /dev/null +++ b/modules/mirpedrol/kalign/align/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "SARS-CoV-2 scaffolds fasta - uncompressed": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln:md5,c165ecf48fb89862cc2a991cc3cadb2d" + ] + ], + "1": [ + "versions.yml:md5,0764ff5c30fd8befd86baa9026493ffe" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln:md5,c165ecf48fb89862cc2a991cc3cadb2d" + ] + ], + "versions": [ + "versions.yml:md5,0764ff5c30fd8befd86baa9026493ffe" + ] + } + ], + "timestamp": "2024-03-22T16:42:01.934768" + }, + "SARS-CoV-2 scaffolds fasta - compressed": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,c165ecf48fb89862cc2a991cc3cadb2d" + ] + ], + "1": [ + "versions.yml:md5,0764ff5c30fd8befd86baa9026493ffe" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,c165ecf48fb89862cc2a991cc3cadb2d" + ] + ], + "versions": [ + "versions.yml:md5,0764ff5c30fd8befd86baa9026493ffe" + ] + } + ], + "timestamp": "2024-03-22T16:42:07.734293" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/kalign/align/tests/tags.yml b/modules/mirpedrol/kalign/align/tests/tags.yml new file mode 100644 index 00000000..d5cecbe4 --- /dev/null +++ b/modules/mirpedrol/kalign/align/tests/tags.yml @@ -0,0 +1,2 @@ +kalign/align: + - "modules/mirpedrol/kalign/align/**" diff --git a/modules/mirpedrol/learnmsa/align/environment.yml b/modules/mirpedrol/learnmsa/align/environment.yml new file mode 100644 index 00000000..124b8d84 --- /dev/null +++ b/modules/mirpedrol/learnmsa/align/environment.yml @@ -0,0 +1,8 @@ +name: learnmsa_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::learnmsa=2.0.1 + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/learnmsa/align/main.nf b/modules/mirpedrol/learnmsa/align/main.nf new file mode 100644 index 00000000..365768e0 --- /dev/null +++ b/modules/mirpedrol/learnmsa/align/main.nf @@ -0,0 +1,48 @@ +process LEARNMSA_ALIGN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-741e0da5cf2d6d964f559672e2908c2111cbb46b:4930edd009376542543bfd2e20008bb1ae58f841-0' : + 'biocontainers/mulled-v2-741e0da5cf2d6d964f559672e2908c2111cbb46b:4930edd009376542543bfd2e20008bb1ae58f841-0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + learnMSA \\ + $args \\ + -i <(unpigz -cdf $fasta) \\ + -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + learnmsa: \$(learnMSA -h | grep 'version' | awk -F 'version ' '{print \$2}' | awk '{print \$1}' | sed 's/)//g') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + learnmsa: \$(learnMSA -h | grep 'version' | awk -F 'version ' '{print \$2}' | awk '{print \$1}' | sed 's/)//g') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/learnmsa/align/meta.yml b/modules/mirpedrol/learnmsa/align/meta.yml new file mode 100644 index 00000000..b04d1da0 --- /dev/null +++ b/modules/mirpedrol/learnmsa/align/meta.yml @@ -0,0 +1,53 @@ +name: "learnmsa_align" +description: Align sequences using learnMSA +keywords: + - alignment + - MSA + - genomics + - msa + - align +tools: + - "learnmsa": + description: "learnMSA: Learning and Aligning large Protein Families" + homepage: "https://github.com/Gaius-Augustus/learnMSA" + documentation: "https://github.com/Gaius-Augustus/learnMSA" + tool_dev_url: "https://github.com/Gaius-Augustus/learnMSA" + doi: "10.1093/gigascience/giac104" + licence: ["MIT"] + identifier: biotools:learnMSA +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format. May be gz-compressed or uncompressed. + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.aln.gz": + type: file + description: Alignment file, in FASTA format. + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@JoseEspinosa" +maintainers: + - "@luisas" + - "@JoseEspinosa" diff --git a/modules/mirpedrol/learnmsa/align/tests/main.nf.test b/modules/mirpedrol/learnmsa/align/tests/main.nf.test new file mode 100644 index 00000000..82039e14 --- /dev/null +++ b/modules/mirpedrol/learnmsa/align/tests/main.nf.test @@ -0,0 +1,36 @@ +// nf-core modules test learnmsa/align +nextflow_process { + + name "Test Process LEARNMSA_ALIGN" + script "../main.nf" + process "LEARNMSA_ALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "learnmsa" + tag "learnmsa/align" + + + test("sarscov2 - fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.alignment.get(0).get(1)).getTextGzip().contains(">sample1") }, + { assert snapshot(process.out.versions).match("versions1") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/mirpedrol/learnmsa/align/tests/main.nf.test.snap b/modules/mirpedrol/learnmsa/align/tests/main.nf.test.snap new file mode 100644 index 00000000..981738a2 --- /dev/null +++ b/modules/mirpedrol/learnmsa/align/tests/main.nf.test.snap @@ -0,0 +1,26 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,85322b0f038aa768f202fd0d748d6c7c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T16:06:48.867020809" + }, + "versions1": { + "content": [ + [ + "versions.yml:md5,85322b0f038aa768f202fd0d748d6c7c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T16:12:13.921813607" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/learnmsa/align/tests/tags.yml b/modules/mirpedrol/learnmsa/align/tests/tags.yml new file mode 100644 index 00000000..fda0d147 --- /dev/null +++ b/modules/mirpedrol/learnmsa/align/tests/tags.yml @@ -0,0 +1,2 @@ +learnmsa/align: + - "modules/mirpedrol/learnmsa/align/**" diff --git a/modules/mirpedrol/mafft/environment.yml b/modules/mirpedrol/mafft/environment.yml new file mode 100644 index 00000000..595252e0 --- /dev/null +++ b/modules/mirpedrol/mafft/environment.yml @@ -0,0 +1,8 @@ +name: mafft +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::mafft=7.520 + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/mafft/main.nf b/modules/mirpedrol/mafft/main.nf new file mode 100644 index 00000000..1ed127b6 --- /dev/null +++ b/modules/mirpedrol/mafft/main.nf @@ -0,0 +1,50 @@ +process MAFFT { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-12eba4a074f913c639117640936668f5a6a01da6:425707898cf4f85051b77848be253b88f1d2298a-0': + 'biocontainers/mulled-v2-12eba4a074f913c639117640936668f5a6a01da6:425707898cf4f85051b77848be253b88f1d2298a-0' }" + + input: + tuple val(meta) , path(fasta) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mafft \\ + --thread ${task.cpus} \\ + ${args} \\ + ${fasta} \\ + | pigz -cp ${task.cpus} > ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mafft: \$(mafft --version 2>&1 | sed 's/^v//' | sed 's/ (.*)//') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mafft: \$(mafft --version 2>&1 | sed 's/^v//' | sed 's/ (.*)//') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + +} diff --git a/modules/mirpedrol/mafft/meta.yml b/modules/mirpedrol/mafft/meta.yml new file mode 100644 index 00000000..427a903b --- /dev/null +++ b/modules/mirpedrol/mafft/meta.yml @@ -0,0 +1,59 @@ +name: mafft +description: Multiple sequence alignment using MAFFT +keywords: + - fasta + - msa + - multiple sequence alignment + - alignment + - align +tools: + - "mafft": + description: Multiple alignment program for amino acid or nucleotide sequences + based on fast Fourier transform + homepage: https://mafft.cbrc.jp/alignment/software/ + documentation: https://mafft.cbrc.jp/alignment/software/manual/manual.html + tool_dev_url: https://mafft.cbrc.jp/alignment/software/source.html + doi: "10.1093/nar/gkf436" + licence: ["BSD"] + identifier: biotools:MAFFT + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file containing the sequences to align. May be gzipped or + uncompressed. + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.aln.gz": + type: file + description: Aligned sequences in FASTA format. + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@MillironX" +maintainers: + - "@MillironX" + - "@Joon-Klaps" diff --git a/modules/mirpedrol/mafft/tests/main.nf.test b/modules/mirpedrol/mafft/tests/main.nf.test new file mode 100644 index 00000000..13089a04 --- /dev/null +++ b/modules/mirpedrol/mafft/tests/main.nf.test @@ -0,0 +1,31 @@ +nextflow_process { + + name "Test Process MAFFT" + script "../main.nf" + process "MAFFT" + tag "modules" + tag "modules_mirpedrol" + tag "mafft" + + + test("SARS-CoV-2 scaffolds fasta") { + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['scaffolds_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta")} + ) + } + + } +} \ No newline at end of file diff --git a/modules/mirpedrol/mafft/tests/main.nf.test.snap b/modules/mirpedrol/mafft/tests/main.nf.test.snap new file mode 100644 index 00000000..f898bed1 --- /dev/null +++ b/modules/mirpedrol/mafft/tests/main.nf.test.snap @@ -0,0 +1,317 @@ +{ + "SARS-CoV-2 scaffolds fasta - uncompressed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas:md5,23426611f4a0df532b6708f072bd445b" + ] + ], + "1": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas:md5,23426611f4a0df532b6708f072bd445b" + ] + ], + "versions": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-09T19:08:41.735774847" + }, + "SARS-CoV-2 scaffolds fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.aln.gz:md5,23426611f4a0df532b6708f072bd445b" + ] + ], + "1": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ], + "alignment": [ + [ + { + "id": "test", + "single_end": false + }, + "test.aln.gz:md5,23426611f4a0df532b6708f072bd445b" + ] + ], + "versions": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-11T09:20:32.608521064" + }, + "SARS-CoV-2 scaffolds fasta - add informative sites fasta multiple": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" + ] + ], + "1": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" + ] + ], + "versions": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-09T19:10:38.940555785" + }, + "SARS-CoV-2 scaffolds fasta - add informative sites fasta normal": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,a57a34f1c566dea114dc1b13416536d4" + ] + ], + "1": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,a57a34f1c566dea114dc1b13416536d4" + ] + ], + "versions": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-09T19:09:35.656248409" + }, + "SARS-CoV-2 scaffolds fasta - add informative sites fasta long": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,e8868da70d1f3050a8daaee0e53b2fd9" + ] + ], + "1": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,e8868da70d1f3050a8daaee0e53b2fd9" + ] + ], + "versions": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-09T19:10:26.372655394" + }, + "SARS-CoV-2 scaffolds fasta - add informative sites fasta profile": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,c2b5caf39beff4473878e6aa4036ad43" + ] + ], + "1": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,c2b5caf39beff4473878e6aa4036ad43" + ] + ], + "versions": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-09T19:10:14.039053212" + }, + "SARS-CoV-2 scaffolds fasta - add informative sites fasta fragments": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" + ] + ], + "1": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" + ] + ], + "versions": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-09T19:09:49.737364197" + }, + "SARS-CoV-2 scaffolds fasta - add informative sites fasta full": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,611cb0a65195a282f110f7f56e310c66" + ] + ], + "1": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,611cb0a65195a282f110f7f56e310c66" + ] + ], + "versions": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-09T19:10:02.952480822" + }, + "SARS-CoV-2 scaffolds fasta - compressed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,23426611f4a0df532b6708f072bd445b" + ] + ], + "1": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ], + "fas": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fas.gz:md5,23426611f4a0df532b6708f072bd445b" + ] + ], + "versions": [ + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-09T19:09:21.096197597" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/mafft/tests/tags.yml b/modules/mirpedrol/mafft/tests/tags.yml new file mode 100644 index 00000000..8109d122 --- /dev/null +++ b/modules/mirpedrol/mafft/tests/tags.yml @@ -0,0 +1,2 @@ +mafft: + - modules/mirpedrol/mafft/** diff --git a/modules/mirpedrol/magus/align/environment.yml b/modules/mirpedrol/magus/align/environment.yml new file mode 100644 index 00000000..685f5a87 --- /dev/null +++ b/modules/mirpedrol/magus/align/environment.yml @@ -0,0 +1,8 @@ +name: magus_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::magus-msa=0.2.0 + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/magus/align/main.nf b/modules/mirpedrol/magus/align/main.nf new file mode 100644 index 00000000..18622ddd --- /dev/null +++ b/modules/mirpedrol/magus/align/main.nf @@ -0,0 +1,53 @@ +process MAGUS_ALIGN { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ae4ea1182e75371808710b6c081bef8b228c4815:10b41722a6b9471a0945fe6baeb9aff444d8eb1d-0': + 'biocontainers/mulled-v2-ae4ea1182e75371808710b6c081bef8b228c4815:10b41722a6b9471a0945fe6baeb9aff444d8eb1d-0' }" + + input: + tuple val(meta) , path(fasta) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // using >() is necessary to preserve the return value, + // so nextflow knows to display an error when it failed + // using --overwrite is necessary, as the file descriptor generated by the named file will already exist + """ + magus \\ + -np $task.cpus \\ + -i $fasta \\ + -d ./ \\ + --overwrite -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MAGUS: \$(magus --version) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MAGUS: \$(magus --version) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/magus/align/meta.yml b/modules/mirpedrol/magus/align/meta.yml new file mode 100644 index 00000000..c999c9ad --- /dev/null +++ b/modules/mirpedrol/magus/align/meta.yml @@ -0,0 +1,53 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/modules/yaml-schema.json +name: "magus_align" +description: Multiple Sequence Alignment using Graph Clustering +keywords: + - MSA + - alignment + - genomics + - graph + - msa + - align +tools: + - "magus": + description: "Multiple Sequence Alignment using Graph Clustering" + homepage: "https://github.com/vlasmirnov/MAGUS" + documentation: "https://github.com/vlasmirnov/MAGUS" + tool_dev_url: "https://github.com/vlasmirnov/MAGUS" + doi: "10.1093/bioinformatics/btaa992" + licence: ["MIT"] + identifier: biotools:magus + +input: + - - meta: + type: map + description: | + Groovy Map containing the fasta meta information + e.g. `[ id:'test', single_end:false ]` + - fasta: + type: file + description: Input sequences in FASTA format. + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample meta information. + e.g. `[ id:'test', single_end:false ]` + - "*.aln.gz": + type: file + description: File containing the output alignment, in FASTA format containing + gaps. The sequences may be in a different order than in the input FASTA. + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@lrauschning" diff --git a/modules/mirpedrol/magus/align/tests/main.nf.test b/modules/mirpedrol/magus/align/tests/main.nf.test new file mode 100644 index 00000000..d1db58c9 --- /dev/null +++ b/modules/mirpedrol/magus/align/tests/main.nf.test @@ -0,0 +1,40 @@ +nextflow_process { + + name "Test Process MAGUS_ALIGN" + script "../main.nf" + process "MAGUS_ALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "magus" + tag "magus/align" + + + test("setoxin - fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + // tests seem to be reproducible on a single machine, but not across different machines + // test the correct samples are in there + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1apf") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1ahl") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1atx") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1sh1") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1bds") }, + { assert snapshot(process.out.versions).match("versions1") } + ) + } + + } +} \ No newline at end of file diff --git a/modules/mirpedrol/magus/align/tests/main.nf.test.snap b/modules/mirpedrol/magus/align/tests/main.nf.test.snap new file mode 100644 index 00000000..b0757599 --- /dev/null +++ b/modules/mirpedrol/magus/align/tests/main.nf.test.snap @@ -0,0 +1,14 @@ +{ + "versions1": { + "content": [ + [ + "versions.yml:md5,ef9456e058ce51bce10dbc3703da29c7" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-03-28T18:17:23.679862847" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/magus/align/tests/tags.yml b/modules/mirpedrol/magus/align/tests/tags.yml new file mode 100644 index 00000000..c7d2f94a --- /dev/null +++ b/modules/mirpedrol/magus/align/tests/tags.yml @@ -0,0 +1,2 @@ +magus/align: + - "modules/mirpedrol/magus/align/**" diff --git a/modules/mirpedrol/magus/guidetree/environment.yml b/modules/mirpedrol/magus/guidetree/environment.yml new file mode 100644 index 00000000..8e750334 --- /dev/null +++ b/modules/mirpedrol/magus/guidetree/environment.yml @@ -0,0 +1,7 @@ +name: magus_guidetree +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::magus-msa=0.2.0 diff --git a/modules/mirpedrol/magus/guidetree/main.nf b/modules/mirpedrol/magus/guidetree/main.nf new file mode 100644 index 00000000..0206053e --- /dev/null +++ b/modules/mirpedrol/magus/guidetree/main.nf @@ -0,0 +1,48 @@ +process MAGUS_GUIDETREE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/magus-msa:0.2.0--pyhdfd78af_0': + 'biocontainers/magus-msa:0.2.0--pyhdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.dnd"), emit: tree + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + magus \\ + -np $task.cpus \\ + -i $fasta \\ + -o ${prefix}.dnd \\ + --onlyguidetree TRUE \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MAGUS: \$(magus --version) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.dnd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MAGUS: \$(magus --version) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/magus/guidetree/meta.yml b/modules/mirpedrol/magus/guidetree/meta.yml new file mode 100644 index 00000000..dbd4d8b3 --- /dev/null +++ b/modules/mirpedrol/magus/guidetree/meta.yml @@ -0,0 +1,51 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/modules/yaml-schema.json +name: "magus_guidetree" +description: Multiple Sequence Alignment using Graph Clustering +keywords: + - MSA + - guide tree + - genomics + - graph + - align + - guidetree + - msa +tools: + - "magus": + description: "Multiple Sequence Alignment using Graph Clustering" + homepage: "https://github.com/vlasmirnov/MAGUS" + documentation: "https://github.com/vlasmirnov/MAGUS" + tool_dev_url: "https://github.com/vlasmirnov/MAGUS" + doi: "10.1093/bioinformatics/btaa992" + licence: ["MIT"] + identifier: biotools:magus + +input: + - - meta: + type: map + description: | + Groovy Map containing fasta meta information + e.g. `[ id:'test', single_end:false ]` + - fasta: + type: file + description: Input sequences in FASTA format. + pattern: "*.{fa,fna,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - tree: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.dnd": + type: file + description: File containing the output guidetree, in newick format. + pattern: "*.dnd" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@lrauschning" diff --git a/modules/mirpedrol/magus/guidetree/tests/main.nf.test b/modules/mirpedrol/magus/guidetree/tests/main.nf.test new file mode 100644 index 00000000..5c07799d --- /dev/null +++ b/modules/mirpedrol/magus/guidetree/tests/main.nf.test @@ -0,0 +1,39 @@ +nextflow_process { + + name "Test Process MAGUS_GUIDETREE" + script "../main.nf" + process "MAGUS_GUIDETREE" + + tag "modules" + tag "modules_mirpedrol" + tag "magus" + tag "magus/guidetree" + + test("setoxin - fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + //{ assert snapshot(process.out.tree).match("tree")}, + // tests seem to be reproducible on a single machine, but not across different machines + // test the correct samples are in there + { assert path(process.out.tree[0][1]).getText().contains("1apf") }, + { assert path(process.out.tree[0][1]).getText().contains("1ahl") }, + { assert path(process.out.tree[0][1]).getText().contains("1atx") }, + { assert path(process.out.tree[0][1]).getText().contains("1sh1") }, + { assert path(process.out.tree[0][1]).getText().contains("1bds") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} \ No newline at end of file diff --git a/modules/mirpedrol/magus/guidetree/tests/main.nf.test.snap b/modules/mirpedrol/magus/guidetree/tests/main.nf.test.snap new file mode 100644 index 00000000..d564be3d --- /dev/null +++ b/modules/mirpedrol/magus/guidetree/tests/main.nf.test.snap @@ -0,0 +1,23 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,5a2ad92c9ea945c4bf4890f02ca2562f" + ] + ], + "timestamp": "2024-03-28T18:25:41.292337485" + }, + "tree": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.tree:md5,c742636229d166322a2824d409595738" + ] + ] + ], + "timestamp": "2024-03-28T18:25:41.226027114" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/magus/guidetree/tests/tags.yml b/modules/mirpedrol/magus/guidetree/tests/tags.yml new file mode 100644 index 00000000..75534069 --- /dev/null +++ b/modules/mirpedrol/magus/guidetree/tests/tags.yml @@ -0,0 +1,2 @@ +magus/guidetree: + - "modules/mirpedrol/magus/guidetree/**" diff --git a/modules/mirpedrol/magus/treealign/environment.yml b/modules/mirpedrol/magus/treealign/environment.yml new file mode 100644 index 00000000..f6b42d26 --- /dev/null +++ b/modules/mirpedrol/magus/treealign/environment.yml @@ -0,0 +1,8 @@ +name: magus_treealign +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::magus-msa=0.2.0 + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/magus/treealign/main.nf b/modules/mirpedrol/magus/treealign/main.nf new file mode 100644 index 00000000..0fd93baa --- /dev/null +++ b/modules/mirpedrol/magus/treealign/main.nf @@ -0,0 +1,55 @@ +process MAGUS_TREEALIGN { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ae4ea1182e75371808710b6c081bef8b228c4815:10b41722a6b9471a0945fe6baeb9aff444d8eb1d-0': + 'biocontainers/mulled-v2-ae4ea1182e75371808710b6c081bef8b228c4815:10b41722a6b9471a0945fe6baeb9aff444d8eb1d-0' }" + + input: + tuple val(meta) , path(fasta) + tuple val(meta2), path(tree) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // using >() is necessary to preserve the return value, + // so nextflow knows to display an error when it failed + // using --overwrite is necessary, as the file descriptor generated by the named file will already exist + """ + magus \\ + -np $task.cpus \\ + -i $fasta \\ + -d ./ \\ + --overwrite -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) \\ + -t $tree \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MAGUS: \$(magus --version) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MAGUS: \$(magus --version) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/magus/treealign/meta.yml b/modules/mirpedrol/magus/treealign/meta.yml new file mode 100644 index 00000000..11091388 --- /dev/null +++ b/modules/mirpedrol/magus/treealign/meta.yml @@ -0,0 +1,65 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/modules/yaml-schema.json +name: "magus_treealign" +description: Multiple Sequence Alignment using Graph Clustering +keywords: + - MSA + - alignment + - treealignment + - msa + - genomics + - graph +tools: + - "magus": + description: "Multiple Sequence Alignment using Graph Clustering" + homepage: "https://github.com/vlasmirnov/MAGUS" + documentation: "https://github.com/vlasmirnov/MAGUS" + tool_dev_url: "https://github.com/vlasmirnov/MAGUS" + doi: "10.1093/bioinformatics/btaa992" + licence: ["MIT"] + identifier: biotools:magus + +input: + - - meta: + type: map + description: | + Groovy Map containing the fasta meta information + e.g. `[ id:'test', single_end:false ]` + - fasta: + type: file + description: Input sequences in FASTA format. + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 + - - meta2: + type: map + description: | + Groovy Map containing sample information for the specified guide tree (if supplied) + e.g. `[ id:'test', single_end:false ]` + - tree: + type: file + description: Optional path to a file containing a guide tree in newick format + to use as input. If empty, or overwritten by passing `-t [fasttree|fasttree-noml|clustal|parttree]`, + MAGUS will construct its own guide tree. If empty, `fasttree` is used as a + default. + pattern: "*.{dnd}" +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample meta information. + e.g. `[ id:'test', single_end:false ]` + - "*.aln.gz": + type: file + description: File containing the output alignment, in FASTA format containing + gaps. The sequences may be in a different order than in the input FASTA. + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@lrauschning" diff --git a/modules/mirpedrol/magus/treealign/tests/main.nf.test b/modules/mirpedrol/magus/treealign/tests/main.nf.test new file mode 100644 index 00000000..359178f3 --- /dev/null +++ b/modules/mirpedrol/magus/treealign/tests/main.nf.test @@ -0,0 +1,54 @@ +nextflow_process { + + name "Test Process MAGUS_TREEALIGN" + script "../main.nf" + process "MAGUS_TREEALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "magus" + tag "magus/treealign" + tag "magus/guidetree" + + + test("setoxin - fasta - guide_tree") { + + setup { + run("MAGUS_GUIDETREE") { + script "../../guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + input[1] = MAGUS_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree]} + """ + } + } + + then { + assertAll( + { assert process.success }, + // tests seem to be reproducible on a single machine, but not across different machines + // test the correct samples are in there + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1apf") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1ahl") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1atx") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1sh1") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1bds") }, + { assert snapshot(process.out.versions).match("with_guide_tree_versions") } + ) + } + } +} \ No newline at end of file diff --git a/modules/mirpedrol/magus/treealign/tests/main.nf.test.snap b/modules/mirpedrol/magus/treealign/tests/main.nf.test.snap new file mode 100644 index 00000000..119a2389 --- /dev/null +++ b/modules/mirpedrol/magus/treealign/tests/main.nf.test.snap @@ -0,0 +1,14 @@ +{ + "with_guide_tree_versions": { + "content": [ + [ + "versions.yml:md5,25c60978b2ebb3f6729b00efae44dcee" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T15:18:06.218411" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/magus/treealign/tests/tags.yml b/modules/mirpedrol/magus/treealign/tests/tags.yml new file mode 100644 index 00000000..c7d2f94a --- /dev/null +++ b/modules/mirpedrol/magus/treealign/tests/tags.yml @@ -0,0 +1,2 @@ +magus/align: + - "modules/mirpedrol/magus/align/**" diff --git a/modules/mirpedrol/muscle5/super5/environment.yml b/modules/mirpedrol/muscle5/super5/environment.yml new file mode 100644 index 00000000..fbaf4a2e --- /dev/null +++ b/modules/mirpedrol/muscle5/super5/environment.yml @@ -0,0 +1,8 @@ +name: muscle5_super5 +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::muscle=5.1 + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/muscle5/super5/main.nf b/modules/mirpedrol/muscle5/super5/main.nf new file mode 100644 index 00000000..09545015 --- /dev/null +++ b/modules/mirpedrol/muscle5/super5/main.nf @@ -0,0 +1,62 @@ +process MUSCLE5_SUPER5 { + tag "$meta.id" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-8eb01a3c2755c935d070dd03ff2dee698eeb4466:ceb6e65e00346ed20d0d8078dddf9858a7af0fe2-0': + 'biocontainers/mulled-v2-8eb01a3c2755c935d070dd03ff2dee698eeb4466:ceb6e65e00346ed20d0d8078dddf9858a7af0fe2-0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + prefix = args.contains('-perm all') ? "${prefix}@" : "${prefix}" + def write_output = (!args.contains('-perm all')) ? " -output >(pigz -cp ${task.cpus} > ${prefix}.aln.gz)" : "-output ${prefix}.aln" + // muscle internally expands the shell pipe to a file descriptor of the form /dev/fd/ + // this causes it to fail, unless -output is left at the end of the call + // see also clustalo/align + // using >() is necessary to preserve the return value, + // so nextflow knows to display an error when it failed + """ + muscle \\ + -super5 ${fasta} \\ + ${args} \\ + -threads ${task.cpus} \\ + $write_output + + + # output may be multiple files if -perm all is set + # compress these individually + if ${args.contains('-perm all')}; then + pigz -p ${task.cpus} *.aln + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + muscle: \$(muscle -version | head -n 1 | cut -d ' ' -f 2 | sed 's/.linux64//') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + muscle: \$(muscle -version | head -n 1 | cut -d ' ' -f 2 | sed 's/.linux64//') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/muscle5/super5/meta.yml b/modules/mirpedrol/muscle5/super5/meta.yml new file mode 100644 index 00000000..695d8a77 --- /dev/null +++ b/modules/mirpedrol/muscle5/super5/meta.yml @@ -0,0 +1,60 @@ +name: "muscle5_super5" +description: Muscle is a program for creating multiple alignments of amino acid or + nucleotide sequences. This particular module uses the super5 algorithm for very + big alignments. It can permutate the guide tree according to a set of flags. +keywords: + - align + - msa + - multiple sequence alignment + - msa + - align +tools: + - muscle-super5: + description: "Muscle v5 is a major re-write of MUSCLE based on new algorithms." + homepage: "https://drive5.com/muscle5/" + documentation: "https://drive5.com/muscle5/manual/" + doi: "10.1101/2021.06.20.449169" + licence: ["Public Domain"] + identifier: "biotools:muscle" + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input sequences for alignment must be in FASTA format + pattern: "*.{fasta,fa,fna}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.aln.gz": + type: file + description: Multiple sequence alignment produced in gzipped FASTA format. If + '-perm all' is passed in ext.args, this will be multiple files per input! + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@alessiovignoli" + - "@JoseEspinosa" +maintainers: + - "@alessiovignoli" + - "@JoseEspinosa" + - "@lrauschning" diff --git a/modules/mirpedrol/muscle5/super5/tests/main.nf.test b/modules/mirpedrol/muscle5/super5/tests/main.nf.test new file mode 100644 index 00000000..bfb375cf --- /dev/null +++ b/modules/mirpedrol/muscle5/super5/tests/main.nf.test @@ -0,0 +1,54 @@ +nextflow_process { + + name "Test Process MUSCLE5_SUPER5" + script "../main.nf" + process "MUSCLE5_SUPER5" + config "./nextflow.config" + + tag "modules" + tag "modules_mirpedrol" + tag "muscle5" + tag "muscle5/super5" + + + test("fasta - align_sequence") { + when { + process { + """ + input[0] = [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("alignment") }, + { assert snapshot(process.out.versions).match("versions_align") } + ) + } + } + + test("fasta - align_sequence - perm_all") { + config "./perm_all.config" + when { + process { + """ + input[0] = [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("perm-all")}, + { assert snapshot(process.out.versions).match("versions_perm") } + ) + } + } +} \ No newline at end of file diff --git a/modules/mirpedrol/muscle5/super5/tests/main.nf.test.snap b/modules/mirpedrol/muscle5/super5/tests/main.nf.test.snap new file mode 100644 index 00000000..e38233aa --- /dev/null +++ b/modules/mirpedrol/muscle5/super5/tests/main.nf.test.snap @@ -0,0 +1,65 @@ +{ + "versions_align": { + "content": [ + [ + "versions.yml:md5,5b5af5ac30721027249837f33a4da01f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T11:56:05.493488" + }, + "perm-all": { + "content": [ + [ + [ + { + "id": "test" + }, + [ + "testabc.0.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a", + "testacb.0.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a", + "testbca.0.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a", + "testnone.0.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-02-09T19:08:37.386512953" + }, + "versions_perm": { + "content": [ + [ + "versions.yml:md5,5b5af5ac30721027249837f33a4da01f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T11:56:08.627006" + }, + "alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T11:56:05.446683" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/muscle5/super5/tests/nextflow.config b/modules/mirpedrol/muscle5/super5/tests/nextflow.config new file mode 100644 index 00000000..e69de29b diff --git a/modules/mirpedrol/muscle5/super5/tests/perm_all.config b/modules/mirpedrol/muscle5/super5/tests/perm_all.config new file mode 100644 index 00000000..d3502716 --- /dev/null +++ b/modules/mirpedrol/muscle5/super5/tests/perm_all.config @@ -0,0 +1,3 @@ +process { + ext.args = { "-perm all" } +} diff --git a/modules/mirpedrol/muscle5/super5/tests/tags.yml b/modules/mirpedrol/muscle5/super5/tests/tags.yml new file mode 100644 index 00000000..4c144ae6 --- /dev/null +++ b/modules/mirpedrol/muscle5/super5/tests/tags.yml @@ -0,0 +1,2 @@ +muscle5/super5: + - "modules/mirpedrol/muscle5/super5/**" diff --git a/modules/mirpedrol/tcoffee/align/environment.yml b/modules/mirpedrol/tcoffee/align/environment.yml new file mode 100644 index 00000000..28f159fd --- /dev/null +++ b/modules/mirpedrol/tcoffee/align/environment.yml @@ -0,0 +1,8 @@ +name: tcoffee_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::t-coffee=13.46.0.919e8c6b + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/tcoffee/align/main.nf b/modules/mirpedrol/tcoffee/align/main.nf new file mode 100644 index 00000000..a5cc3bd2 --- /dev/null +++ b/modules/mirpedrol/tcoffee/align/main.nf @@ -0,0 +1,58 @@ +process TCOFFEE_ALIGN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0': + 'biocontainers/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0' }" + + input: + tuple val(meta) , path(fasta) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + export TEMP='./' + t_coffee -seq ${fasta} \ + $args \ + -output fasta_aln \ + -thread ${task.cpus} \ + -outfile stdout \ + | pigz -cp ${task.cpus} > ${prefix}.aln.gz + + # If stdout file exist, then compress the file + # This is a patch for the current behaviour of the regressive algorithm + # that does not support the stdout redirection + if [ -f stdout ]; then + pigz -cp ${task.cpus} < stdout > ${prefix}.aln.gz + rm stdout + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/tcoffee/align/meta.yml b/modules/mirpedrol/tcoffee/align/meta.yml new file mode 100644 index 00000000..4a18afb5 --- /dev/null +++ b/modules/mirpedrol/tcoffee/align/meta.yml @@ -0,0 +1,61 @@ +name: "tcoffee_align" +description: Aligns sequences using T_COFFEE +keywords: + - alignment + - MSA + - genomics + - msa + - align +tools: + - "tcoffee": + description: "A collection of tools for Computing, Evaluating and Manipulating + Multiple Alignments of DNA, RNA, Protein Sequences and Structures." + homepage: "http://www.tcoffee.org/Projects/tcoffee/" + documentation: "https://tcoffee.readthedocs.io/en/latest/tcoffee_main_documentation.html" + tool_dev_url: "https://github.com/cbcrg/tcoffee" + doi: "10.1006/jmbi.2000.4042" + licence: ["GPL v3"] + identifier: "biotools:tcoffee" + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.aln.gz": + type: file + description: Alignment file in FASTA format. May be gzipped. + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@JoseEspinosa" + - "@alessiovignoli" +maintainers: + - "@luisas" + - "@JoseEspinosa" + - "@lrauschning" + - "@alessiovignoli" diff --git a/modules/mirpedrol/tcoffee/align/tests/main.nf.test b/modules/mirpedrol/tcoffee/align/tests/main.nf.test new file mode 100644 index 00000000..07c3fd83 --- /dev/null +++ b/modules/mirpedrol/tcoffee/align/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process TCOFFEE_ALIGN" + script "../main.nf" + process "TCOFFEE_ALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "tcoffee" + tag "tcoffee/align" + tag "untar" + + test("fasta - align_sequence") { + + when { + process { + """ + input[0] = [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("alignment")}, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} \ No newline at end of file diff --git a/modules/mirpedrol/tcoffee/align/tests/main.nf.test.snap b/modules/mirpedrol/tcoffee/align/tests/main.nf.test.snap new file mode 100644 index 00000000..7a64b137 --- /dev/null +++ b/modules/mirpedrol/tcoffee/align/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,fb187c9186b50a8076d08cd3be3c1b70" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T15:28:18.280597" + }, + "alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,bd1db08ad04514cc6d1334598c1a6ef0" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-28T18:59:35.169119" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/tcoffee/align/tests/tags.yml b/modules/mirpedrol/tcoffee/align/tests/tags.yml new file mode 100644 index 00000000..f170df92 --- /dev/null +++ b/modules/mirpedrol/tcoffee/align/tests/tags.yml @@ -0,0 +1,2 @@ +tcoffee/align: + - "modules/mirpedrol/tcoffee/align/**" diff --git a/modules/mirpedrol/tcoffee/treealign/environment.yml b/modules/mirpedrol/tcoffee/treealign/environment.yml new file mode 100644 index 00000000..efc70609 --- /dev/null +++ b/modules/mirpedrol/tcoffee/treealign/environment.yml @@ -0,0 +1,8 @@ +name: tcoffee_treealign +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::t-coffee=13.46.0.919e8c6b + - conda-forge::pigz=2.8 diff --git a/modules/mirpedrol/tcoffee/treealign/main.nf b/modules/mirpedrol/tcoffee/treealign/main.nf new file mode 100644 index 00000000..76ccb6fd --- /dev/null +++ b/modules/mirpedrol/tcoffee/treealign/main.nf @@ -0,0 +1,60 @@ +process TCOFFEE_TREEALIGN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0': + 'biocontainers/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0' }" + + input: + tuple val(meta) , path(fasta) + tuple val(meta2), path(tree) + + output: + tuple val(meta), path("*.aln.gz"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + export TEMP='./' + t_coffee -seq ${fasta} \ + -usetree $tree \ + -output fasta_aln \ + $args \ + -thread ${task.cpus} \ + -outfile stdout \ + | pigz -cp ${task.cpus} > ${prefix}.aln.gz + + # If stdout file exist, then compress the file + # This is a patch for the current behaviour of the regressive algorithm + # that does not support the stdout redirection + if [ -f stdout ]; then + pigz -cp ${task.cpus} < stdout > ${prefix}.aln.gz + rm stdout + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.aln.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/mirpedrol/tcoffee/treealign/meta.yml b/modules/mirpedrol/tcoffee/treealign/meta.yml new file mode 100644 index 00000000..d2103d3b --- /dev/null +++ b/modules/mirpedrol/tcoffee/treealign/meta.yml @@ -0,0 +1,70 @@ +name: "tcoffee_treealign" +description: Aligns sequences using T_COFFEE +keywords: + - alignment + - treealignment + - MSA + - genomics + - msa +tools: + - "tcoffee": + description: "A collection of tools for Computing, Evaluating and Manipulating + Multiple Alignments of DNA, RNA, Protein Sequences and Structures." + homepage: "http://www.tcoffee.org/Projects/tcoffee/" + documentation: "https://tcoffee.readthedocs.io/en/latest/tcoffee_main_documentation.html" + tool_dev_url: "https://github.com/cbcrg/tcoffee" + doi: "10.1006/jmbi.2000.4042" + licence: ["GPL v3"] + identifier: "biotools:tcoffee" + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - fasta: + type: file + description: Input sequences in FASTA format + pattern: "*.{fa,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 + - - meta2: + type: map + description: | + Groovy Map containing tree information + e.g. `[ id:'test_tree']` + - tree: + type: file + description: Input guide tree in Newick format + pattern: "*.{dnd}" +output: + - alignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - "*.aln.gz": + type: file + description: Alignment file in FASTA format. May be gzipped. + pattern: "*.aln.gz" + ontologies: + - edam: http://edamontology.org/format_1984 + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" + - "@JoseEspinosa" + - "@alessiovignoli" +maintainers: + - "@luisas" + - "@JoseEspinosa" + - "@lrauschning" + - "@alessiovignoli" diff --git a/modules/mirpedrol/tcoffee/treealign/tests/main.nf.test b/modules/mirpedrol/tcoffee/treealign/tests/main.nf.test new file mode 100644 index 00000000..0ae93dc5 --- /dev/null +++ b/modules/mirpedrol/tcoffee/treealign/tests/main.nf.test @@ -0,0 +1,51 @@ +nextflow_process { + + name "Test Process TCOFFEE_TREEALIGN" + script "../main.nf" + process "TCOFFEE_TREEALIGN" + + tag "modules" + tag "modules_mirpedrol" + tag "tcoffee" + tag "tcoffee/treealign" + tag "famsa/guidetree" + tag "untar" + + test("sarscov2 - fasta - align_with_guide_tree") { + + setup { + + run("FAMSA_GUIDETREE") { + script "../../../famsa/guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) + ] + + """ + } + } + } + + when { + process { + """ + input[0] = [ [ id:'test' ], + file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) + ] + input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test'], tree]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("alignment_guidetree")}, + { assert snapshot(process.out.versions).match("versions_guidetree") } + ) + } + + } +} \ No newline at end of file diff --git a/modules/mirpedrol/tcoffee/treealign/tests/main.nf.test.snap b/modules/mirpedrol/tcoffee/treealign/tests/main.nf.test.snap new file mode 100644 index 00000000..a8a6142d --- /dev/null +++ b/modules/mirpedrol/tcoffee/treealign/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "versions_guidetree": { + "content": [ + [ + "versions.yml:md5,1fe864892229d72f6fe3bca7871491bc" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-11T09:13:29.39446115" + }, + "alignment_guidetree": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,93bc8adfcd88f7913718eacc13da8e4a" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-11T09:13:29.320114268" + } +} \ No newline at end of file diff --git a/modules/mirpedrol/tcoffee/treealign/tests/tags.yml b/modules/mirpedrol/tcoffee/treealign/tests/tags.yml new file mode 100644 index 00000000..f170df92 --- /dev/null +++ b/modules/mirpedrol/tcoffee/treealign/tests/tags.yml @@ -0,0 +1,2 @@ +tcoffee/align: + - "modules/mirpedrol/tcoffee/align/**" diff --git a/subworkflows/mirpedrol/msa_alignment/main.nf b/subworkflows/mirpedrol/msa_alignment/main.nf new file mode 100644 index 00000000..56b9fcce --- /dev/null +++ b/subworkflows/mirpedrol/msa_alignment/main.nf @@ -0,0 +1,35 @@ +if ( params.aligner == "clustalo/align" ) { + include { CLUSTALO_ALIGN as ALIGNER } from '../../../modules/mirpedrol/clustalo/align/main' +} else if ( params.aligner == "famsa/align" ) { + include { FAMSA_ALIGN as ALIGNER } from '../../../modules/mirpedrol/famsa/align/main' +} else if ( params.aligner == "kalign/align" ) { + include { KALIGN_ALIGN as ALIGNER } from '../../../modules/mirpedrol/kalign/align/main' +} else if ( params.aligner == "learnmsa/align" ) { + include { LEARNMSA_ALIGN as ALIGNER } from '../../../modules/mirpedrol/learnmsa/align/main' +} else if ( params.aligner == "mafft" ) { + include { MAFFT as ALIGNER } from '../../../modules/mirpedrol/mafft/main' +} else if ( params.aligner == "magus/align" ) { + include { MAGUS_ALIGN as ALIGNER } from '../../../modules/mirpedrol/magus/align/main' +} else if ( params.aligner == "muscle5/super5" ) { + include { MUSCLE5_SUPER5 as ALIGNER } from '../../../modules/mirpedrol/muscle5/super5/main' +} else if ( params.aligner == "tcoffee/align" ) { + include { TCOFFEE_ALIGN as ALIGNER } from '../../../modules/mirpedrol/tcoffee/align/main' +} + +workflow MSA_ALIGNMENT { + + take: + ch_fasta // channel: [ meta, fasta ] + + main: + + ch_versions = Channel.empty() + + ALIGNER ( ch_fasta ) + ch_versions = ch_versions.mix(ALIGNER.out.versions.first()) + + emit: + alignment = ALIGNER.out.alignment // channel: [ meta, *.aln.gz ] + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/mirpedrol/msa_alignment/meta.yml b/subworkflows/mirpedrol/msa_alignment/meta.yml new file mode 100644 index 00000000..16f6b0b1 --- /dev/null +++ b/subworkflows/mirpedrol/msa_alignment/meta.yml @@ -0,0 +1,39 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "msa_alignment" +description: Perform a multiple sequence alignment with one of the possible aligners +keywords: + - alignment + - MSA + - genomics +components: + - clustalo/align + - famsa/align + - kalign/align + - learnmsa/align + - mafft + - magus/align + - muscle5/super5 + - tcoffee/align +input: + - ch_fasta: + type: file + description: | + Structure: [ val(meta), path(fasta) ] + meta: Groovy Map containing sample information + fasta: Input sequences in FASTA format (*.{fa,fasta}) +output: + - alignment: + type: file + description: | + Structure: [ val(meta), path(alignment) ] + meta: Groovy Map containing sample information + alignment: Alignment file, in gzipped fasta format (*.aln.gz) + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@mirpedrol" +maintainers: + - "@mirpedrol" diff --git a/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test b/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test new file mode 100644 index 00000000..827c4de2 --- /dev/null +++ b/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test @@ -0,0 +1,193 @@ +nextflow_workflow { + + name "Test Subworkflow MSA_ALIGNMENT" + script "../main.nf" + workflow "MSA_ALIGNMENT" + + tag "subworkflows" + tag "subworkflows_mirpedrol" + tag "subworkflows/msa_alignment" + tag "clustalo/align" + tag "famsa/align" + tag "kalign/align" + tag "learnmsa/align" + tag "mafft" + tag "magus/align" + tag "muscle5/super5" + tag "tcoffee/align" + + test("sarscov2 - clustalo/align") { + + when { + params.aligner = "clustalo/align" + workflow { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } + + test("sarscov2 - famsa/align") { + + when { + params.aligner = "famsa/align" + workflow { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } + + test("sarscov2 - kalign/align") { + + when { + params.aligner = "kalign/align" + workflow { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } + + test("sarscov2 - learnmsa/align") { + tag "mytest" + + when { + params.aligner = "learnmsa/align" + workflow { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert path(workflow.out.alignment.get(0).get(1)).getTextGzip().contains(">sample1")}, + { assert snapshot(workflow.out.versions).match("versions") } + ) + } + } + + test("sarscov2 - mafft") { + + when { + params.aligner = "mafft" + workflow { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } + + test("sarscov2 - magus/align") { + + when { + params.aligner = "magus/align" + workflow { + """ + input[0] = [ [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1apf") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1ahl") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1atx") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1sh1") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1bds") }, + { assert snapshot(workflow.out.versions).match("versions1") } + ) + } + } + + test("sarscov2 - muscle5/super5") { + + when { + params.aligner = "muscle5/super5" + workflow { + """ + input[0] = [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } + + test("sarscov2 - tcoffee/align") { + + when { + params.aligner = "tcoffee/align" + workflow { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } +} diff --git a/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test.snap b/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test.snap new file mode 100644 index 00000000..a2c6b9af --- /dev/null +++ b/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test.snap @@ -0,0 +1,224 @@ +{ + "sarscov2 - mafft": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,23426611f4a0df532b6708f072bd445b" + ] + ], + "1": [ + "versions.yml:md5,3376c16fb93c6f92f9f2a1c6c5d7d058" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,23426611f4a0df532b6708f072bd445b" + ] + ], + "versions": [ + "versions.yml:md5,3376c16fb93c6f92f9f2a1c6c5d7d058" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T12:28:57.275899399" + }, + "sarscov2 - famsa/align": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,7cf7375f2ba360814ea978731838b972" + ] + ], + "1": [ + "versions.yml:md5,aec272178993715fd8d3e1ce192fe7d3" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,7cf7375f2ba360814ea978731838b972" + ] + ], + "versions": [ + "versions.yml:md5,aec272178993715fd8d3e1ce192fe7d3" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T12:26:09.158593884" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,1a266e903df6779d66f9e85f51b04240" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T17:54:03.355114155" + }, + "versions1": { + "content": [ + [ + "versions.yml:md5,45270687addf1e651298ad01be9858b9" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-15T15:57:35.581187151" + }, + "sarscov2 - kalign/align": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,c165ecf48fb89862cc2a991cc3cadb2d" + ] + ], + "1": [ + "versions.yml:md5,98ce16aea87f74ab4e08b2b96c98b3e8" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,c165ecf48fb89862cc2a991cc3cadb2d" + ] + ], + "versions": [ + "versions.yml:md5,98ce16aea87f74ab4e08b2b96c98b3e8" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T12:26:27.100719984" + }, + "sarscov2 - muscle5/super5": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a" + ] + ], + "1": [ + "versions.yml:md5,892b294cb7d1f3fc16beee1d4d023165" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a" + ] + ], + "versions": [ + "versions.yml:md5,892b294cb7d1f3fc16beee1d4d023165" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T14:41:45.676732121" + }, + "sarscov2 - tcoffee/align": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,f9feb411ffabb1603473c8a60c06187d" + ] + ], + "1": [ + "versions.yml:md5,5b9b99bdeef801de9eb0157b8240f9a2" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,f9feb411ffabb1603473c8a60c06187d" + ] + ], + "versions": [ + "versions.yml:md5,5b9b99bdeef801de9eb0157b8240f9a2" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T14:51:25.769451346" + }, + "sarscov2 - clustalo/align": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" + ] + ], + "1": [ + "versions.yml:md5,b876f18fb1dfcb122933043312a64bae" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" + ] + ], + "versions": [ + "versions.yml:md5,b876f18fb1dfcb122933043312a64bae" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T12:25:28.903889766" + } +} \ No newline at end of file diff --git a/subworkflows/mirpedrol/msa_guidetree/main.nf b/subworkflows/mirpedrol/msa_guidetree/main.nf new file mode 100644 index 00000000..ead8f637 --- /dev/null +++ b/subworkflows/mirpedrol/msa_guidetree/main.nf @@ -0,0 +1,25 @@ +if ( params.guidetree == "clustalo/guidetree" ) { + include { CLUSTALO_GUIDETREE as GUIDETREE } from '../../../modules/mirpedrol/clustalo/guidetree/main' +} else if ( params.guidetree == "famsa/guidetree" ) { + include { FAMSA_GUIDETREE as GUIDETREE } from '../../../modules/mirpedrol/famsa/guidetree/main' +} else if ( params.guidetree == "magus/guidetree" ) { + include { MAGUS_GUIDETREE as GUIDETREE } from '../../../modules/mirpedrol/magus/guidetree/main' +} + +workflow MSA_GUIDETREE { + + take: + ch_fasta // channel: [ meta, fasta ] + + main: + + ch_versions = Channel.empty() + + GUIDETREE ( ch_fasta ) + ch_versions = ch_versions.mix(GUIDETREE.out.versions) + + emit: + guidetree = GUIDETREE.out.tree // channel: [ meta, *.dnd ] + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/mirpedrol/msa_guidetree/meta.yml b/subworkflows/mirpedrol/msa_guidetree/meta.yml new file mode 100644 index 00000000..4991038e --- /dev/null +++ b/subworkflows/mirpedrol/msa_guidetree/meta.yml @@ -0,0 +1,34 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "msa_guidetree" +description: Compute the guide tree of a multiple sequence alignment with one of the possible tools +keywords: + - guide tree + - MSA + - genomics +components: + - clustalo/guidetree + - famsa/guidetree + - magus/guidetree +input: + - ch_fasta: + type: file + description: | + Structure: [ val(meta), path(fasta) ] + meta: Groovy Map containing sample information + fasta: Input sequences in FASTA format (*.{fa,fasta}) +output: + - guidetree: + type: file + description: | + Structure: [ val(meta), path(guidetree) ] + meta: Groovy Map containing sample information + guidetree: Tree file, in gzipped newick format (*.dnd) + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@mirpedrol" +maintainers: + - "@mirpedrol" diff --git a/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test b/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test new file mode 100644 index 00000000..31a1f2f9 --- /dev/null +++ b/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test @@ -0,0 +1,81 @@ +nextflow_workflow { + + name "Test Subworkflow MSA_GUIDETREE" + script "../main.nf" + workflow "MSA_GUIDETREE" + + tag "subworkflows" + tag "subworkflows_mirpedrol" + tag "subworkflows/msa_guidetree" + tag "clustalo/guidetree" + tag "famsa/guidetree" + tag "magus/guidetree" + + test("sarscov2 - clustalo/guidetree") { + + when { + params.guidetree = "clustalo/guidetree" + workflow { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } + + test("sarscov2 - famsa/guidetree") { + + when { + params.guidetree = "famsa/guidetree" + workflow { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } + + test("sarscov2 - magus/guidetree") { + + when { + params.guidetree = "magus/guidetree" + workflow { + """ + input[0] = [ [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert path(workflow.out.guidetree[0][1]).getText().contains("1apf") }, + { assert path(workflow.out.guidetree[0][1]).getText().contains("1ahl") }, + { assert path(workflow.out.guidetree[0][1]).getText().contains("1atx") }, + { assert path(workflow.out.guidetree[0][1]).getText().contains("1sh1") }, + { assert path(workflow.out.guidetree[0][1]).getText().contains("1bds") }, + { assert snapshot(workflow.out.versions).match("versions1") } + ) + } + } +} diff --git a/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test.snap b/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test.snap new file mode 100644 index 00000000..bcc0df09 --- /dev/null +++ b/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test.snap @@ -0,0 +1,80 @@ +{ + "sarscov2 - famsa/guidetree": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.dnd:md5,f3ef8b16a7a16cb4548942ebf2e7bad6" + ] + ], + "1": [ + "versions.yml:md5,723b3358beebc97847b4681f562bcea0" + ], + "guidetree": [ + [ + { + "id": "test" + }, + "test.dnd:md5,f3ef8b16a7a16cb4548942ebf2e7bad6" + ] + ], + "versions": [ + "versions.yml:md5,723b3358beebc97847b4681f562bcea0" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-16T13:12:32.7754812" + }, + "sarscov2 - clustalo/guidetree": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.dnd:md5,5428bad500a0a0bd985744bec1a12a70" + ] + ], + "1": [ + "versions.yml:md5,b1ee3efbf09bc7cf7b4970916a00fddc" + ], + "guidetree": [ + [ + { + "id": "test" + }, + "test.dnd:md5,5428bad500a0a0bd985744bec1a12a70" + ] + ], + "versions": [ + "versions.yml:md5,b1ee3efbf09bc7cf7b4970916a00fddc" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-16T13:12:06.216087789" + }, + "versions1": { + "content": [ + [ + "versions.yml:md5,75333144e16039f25cae8e933f30d003" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-16T13:13:24.823000398" + } +} \ No newline at end of file diff --git a/subworkflows/mirpedrol/msa_treealign/main.nf b/subworkflows/mirpedrol/msa_treealign/main.nf new file mode 100644 index 00000000..45acabac --- /dev/null +++ b/subworkflows/mirpedrol/msa_treealign/main.nf @@ -0,0 +1,29 @@ +if ( params.treealign == "famsa/treealign" ) { + include { FAMSA_TREEALIGN as TREEALIGN } from '../../../modules/mirpedrol/famsa/treealign/main' +} else if ( params.treealign == "magus/treealign" ) { + include { MAGUS_TREEALIGN as TREEALIGN } from '../../../modules/mirpedrol/magus/treealign/main' +} else if ( params.treealign == "clustalo/treealign" ) { + include { CLUSTALO_TREEALIGN as TREEALIGN } from '../../../modules/mirpedrol/clustalo/treealign/main' +} else if ( params.treealign == "tcoffee/treealign" ) { + include { TCOFFEE_TREEALIGN as TREEALIGN } from '../../../modules/mirpedrol/tcoffee/treealign/main' +} + +workflow MSA_TREEALIGN { + + take: + ch_fasta + ch_tree + + main: + + ch_versions = Channel.empty() + + TREEALIGN ( ch_fasta, ch_tree ) + ch_versions = ch_versions.mix(TREEALIGN.out.versions) + + emit: + alignment = TREEALIGN.out.alignment + versions = ch_versions + +} + diff --git a/subworkflows/mirpedrol/msa_treealign/meta.yml b/subworkflows/mirpedrol/msa_treealign/meta.yml new file mode 100644 index 00000000..f985d2ac --- /dev/null +++ b/subworkflows/mirpedrol/msa_treealign/meta.yml @@ -0,0 +1,67 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/subworkflows/yaml-schema.json +name: "msa_treealign" +description: Perform multiple sequence alignment from a provided guide tree +keywords: ["alignment", "treealignment", "msa"] +components: ["famsa/treealign", "magus/treealign", "clustalo/treealign", "tcoffee/treealign"] + +input: + - ch_fasta: + description: "Channel containing: meta, fasta" + structure: + - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'test']` + + " + type: map + - fasta: + description: Input sequences in FASTA format + ontologies: + - edam: http://edamontology.org/format_1929 + pattern: "*.{fa,fasta}" + type: file + - ch_tree: + description: "Channel containing: meta, fasta" + structure: + - meta2: + description: "Groovy Map containing tree information + + e.g. `[ id:'test_tree']` + + " + type: map + - tree: + description: Input guide tree in Newick format + pattern: "*.{dnd}" + type: file + +output: + - alignment: + description: Output channel alignment + structure: + - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'test']` + + " + type: map + - "*.aln.gz": + description: Alignment file, in gzipped fasta format + ontologies: + - edam: http://edamontology.org/format_1984 + pattern: "*.aln.gz" + type: file + - versions: + description: Output channel versions + structure: + - versions.yml: + description: File containing software versions + pattern: versions.yml + type: file + +authors: + - "@mirpedrol" +maintainers: + - "@mirpedrol" diff --git a/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test b/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test new file mode 100644 index 00000000..f8356e93 --- /dev/null +++ b/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test @@ -0,0 +1,172 @@ +nextflow_workflow { + + name "Test Subworkflow MSA_TREEALIGN" + script "../main.nf" + workflow "MSA_TREEALIGN" + + tag "subworkflows" + tag "subworkflows_mirpedrol" + tag "subworkflows/msa_treealign" + tag "famsa/treealign" + tag "magus/treealign" + tag "clustalo/treealign" + tag "tcoffee/treealign" + tag "clustalo/guidetree" + tag "famsa/guidetree" + tag "magus/guidetree" + + + test("run famsa/treealign") { + + setup { + run("FAMSA_GUIDETREE") { + script "../../../../modules/mirpedrol/famsa/guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + params.treealign = "famsa/treealign" + workflow { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree]} + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.alignment).match("famsa_alignment")}, + { assert snapshot(workflow.out.versions).match("famsa_versions") } + ) + } + } + + test("run magus/treealign") { + + setup { + run("MAGUS_GUIDETREE") { + script "../../../../modules/mirpedrol/magus/guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + } + + when { + params.treealign = "magus/treealign" + workflow { + """ + input[0] = [ [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + input[1] = MAGUS_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree]} + """ + } + } + + then { + assertAll( + { assert workflow.success }, + // tests seem to be reproducible on a single machine, but not across different machines + // test the correct samples are in there + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1apf") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1ahl") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1atx") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1sh1") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1bds") }, + { assert snapshot(workflow.out.versions).match("magus_versions") } + ) + } + } + + test("run clustalo/treealign") { + + setup { + + run("CLUSTALO_GUIDETREE") { + script "../../../../modules/mirpedrol/clustalo/guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + params.treealign = "clustalo/treealign" + workflow { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + input[1] = CLUSTALO_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree]} + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.alignment).match("clustalo_alignment")}, + { assert snapshot(workflow.out.versions).match("clustalo_versions") } + ) + } + } + + test("run tcoffee/treealign") { + setup { + + run("FAMSA_GUIDETREE") { + script "../../../../modules/mirpedrol/famsa/guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) + ] + + """ + } + } + } + + when { + params.treealign = "tcoffee/treealign" + workflow { + """ + input[0] = [ [ id:'test' ], + file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) + ] + input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test'], tree]} + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.alignment).match("tcoffee_alignment")}, + { assert snapshot(workflow.out.versions).match("tcoffee_versions") } + ) + } + } + + +} diff --git a/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test.snap b/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test.snap new file mode 100644 index 00000000..fd1b6dde --- /dev/null +++ b/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test.snap @@ -0,0 +1,101 @@ +{ + "famsa_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,7cf7375f2ba360814ea978731838b972" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-22T15:21:17.914586" + }, + "famsa_versions": { + "content": [ + [ + "versions.yml:md5,3002c8ce2c4f1b9a4b084b61efc0c2b4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-22T15:21:17.922286" + }, + "clustalo_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-22T15:21:52.483352" + }, + "clustalo_versions": { + "content": [ + [ + "versions.yml:md5,2c9f1ab1c3e8a431546fda2ef0170713" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-22T15:21:52.490603" + }, + "tcoffee_versions": { + "content": [ + [ + "versions.yml:md5,c6bb7f0fcab261972bd722de048f36b1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-22T15:21:57.324541" + }, + "magus_versions": { + "content": [ + [ + "versions.yml:md5,6bd0fc2decf3dd8e3b43a4d5b9c0cc58" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-22T15:21:31.000053" + }, + "tcoffee_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,93bc8adfcd88f7913718eacc13da8e4a" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-22T15:21:57.317953" + } +} \ No newline at end of file From 8b6c879ace4f5208059e3a8ddb4d6b7a672204dc Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Wed, 28 Aug 2024 16:02:10 +0200 Subject: [PATCH 02/23] use class subworkflows --- conf/test.config | 3 ++ nextflow.config | 5 +++ nextflow_schema.json | 41 ++++++++++++++++++---- workflows/multiplesequencealign.nf | 56 ++++++++++++++++++++++-------- 4 files changed, 84 insertions(+), 21 deletions(-) diff --git a/conf/test.config b/conf/test.config index de7041fd..79a961fc 100644 --- a/conf/test.config +++ b/conf/test.config @@ -33,4 +33,7 @@ params { // Input data input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.0/samplesheet_test_af2.csv' tools = params.pipelines_testdata_base_path + 'multiplesequencealign/toolsheet/v1.0/toolsheet_full.csv' + aligner = 'clustalo/align' + guidetree = 'clustalo/guidetree' + treealign = 'clustalo/treealign' } diff --git a/nextflow.config b/nextflow.config index 6a7777bd..4c94d84f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,6 +17,11 @@ params { input = null tools = null + // Tool selectors + aligner = '' + guidetree = '' + treealign = '' + // Stats skip_stats = false calc_sim = false diff --git a/nextflow_schema.json b/nextflow_schema.json index b770573c..bcb3d08f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft-07/schema", "$id": "https://raw.githubusercontent.com/nf-core/multiplesequencealign/master/nextflow_schema.json", "title": "nf-core/multiplesequencealign pipeline parameters", "description": "Pipeline to run and benchmark multiple sequence alignment tools.", @@ -54,6 +54,27 @@ } } }, + "tool_selectors": { + "title": "Tool selectors", + "type": "object", + "description": "Parameters to select which tools to use", + "default": "", + "fa_icon": "fas fa-tools", + "properties": { + "aligner": { + "type": "string", + "description": "Which aligner tool to use" + }, + "guidetree": { + "type": "string", + "description": "Which aligner tool to use to generate a guide tree" + }, + "treealign": { + "type": "string", + "description": "Which aligner tool to use to align providing a generated guide tree" + } + } + }, "stats_options": { "title": "Stats options", "type": "object", @@ -73,7 +94,8 @@ "calc_seq_stats": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Calculate general statistics on input files." + "description": "Calculate general statistics on input files.", + "default": true }, "extract_plddt": { "type": "boolean", @@ -83,7 +105,8 @@ "calc_gaps": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Extract total number of gaps and average number of gaps of the alignment." + "description": "Extract total number of gaps and average number of gaps of the alignment.", + "default": true } } }, @@ -101,12 +124,14 @@ "calc_sp": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Calculate the Sum of Pairs of alignment." + "description": "Calculate the Sum of Pairs of alignment.", + "default": true }, "calc_tc": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Calculate the Total Column Score of alignment." + "description": "Calculate the Total Column Score of alignment.", + "default": true }, "calc_irmsd": { "type": "boolean", @@ -144,7 +169,8 @@ }, "shiny_trace_mode": { "type": "string", - "description": "variable containing the shiny_trace mode to be used." + "description": "variable containing the shiny_trace mode to be used.", + "default": "latest" } } }, @@ -366,6 +392,9 @@ { "$ref": "#/definitions/input_output_options" }, + { + "$ref": "#/definitions/tool_selectors" + }, { "$ref": "#/definitions/stats_options" }, diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index c4952c2b..7ac039dd 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -50,6 +50,16 @@ include { UNTAR } from '../modules/nf-core/untar/main' include { CSVTK_JOIN as MERGE_STATS_EVAL } from '../modules/nf-core/csvtk/join/main.nf' include { PIGZ_COMPRESS } from '../modules/nf-core/pigz/compress/main' +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT CLASS-MODULES MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { MSA_ALIGNMENT } from '../subworkflows/mirpedrol/msa_alignment/main' +include { MSA_GUIDETREE } from '../subworkflows/mirpedrol/msa_guidetree/main' +include { MSA_TREEALIGN } from '../subworkflows/mirpedrol/msa_treealign/main' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -168,28 +178,44 @@ workflow MULTIPLESEQUENCEALIGN { stats_summary = stats_summary.mix(STATS.out.stats_summary) } - // - // Align - // - compress_during_align = !params.skip_compression && params.skip_eval - ALIGN ( - ch_seqs, - ch_tools, - ch_structures_template, - compress_during_align - ) - ch_versions = ch_versions.mix(ALIGN.out.versions) + msa_alignment = Channel.empty() + + if (params.guidetree && params.treealign) { + // + // Compute tree + // + MSA_GUIDETREE ( + ch_seqs + ) + ch_versions = ch_versions.mix(MSA_GUIDETREE.out.versions) + + // + // Align with a given tree + // + MSA_TREEALIGN ( + ch_seqs, + MSA_GUIDETREE.out.guidetree + ) + ch_versions = ch_versions.mix(MSA_TREEALIGN.out.versions) + msa_alignment.mix(MSA_TREEALIGN.out.alignment) + } - if (!params.skip_compression && !compress_during_align) { - PIGZ_COMPRESS (ALIGN.out.msa) - ch_versions = ch_versions.mix(PIGZ_COMPRESS.out.versions) + if (params.aligner) { + // + // Align + // + MSA_ALIGNMENT ( + ch_seqs + ) + ch_versions = ch_versions.mix(MSA_ALIGNMENT.out.versions) + msa_alignment.mix(MSA_ALIGNMENT.out.alignment) } // // Evaluate the quality of the alignment // if (!params.skip_eval) { - EVALUATE (ALIGN.out.msa, ch_refs, ch_structures_template) + EVALUATE (msa_alignment, ch_refs, ch_structures_template) ch_versions = ch_versions.mix(EVALUATE.out.versions) evaluation_summary = evaluation_summary.mix(EVALUATE.out.eval_summary) } From a357e15081d584e0d5a7528b59938cfe28190899 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Wed, 28 Aug 2024 16:34:55 +0200 Subject: [PATCH 03/23] remove old - not needed - modules and subworkflows --- modules.json | 182 +++++------ .../nf-core/clustalo/align/environment.yml | 8 - modules/nf-core/clustalo/align/main.nf | 57 ---- modules/nf-core/clustalo/align/meta.yml | 61 ---- .../nf-core/clustalo/align/tests/main.nf.test | 98 ------ .../clustalo/align/tests/main.nf.test.snap | 57 ---- .../clustalo/align/tests/nextflow.config | 3 - modules/nf-core/clustalo/align/tests/tags.yml | 2 - .../clustalo/guidetree/environment.yml | 7 - modules/nf-core/clustalo/guidetree/main.nf | 47 --- modules/nf-core/clustalo/guidetree/meta.yml | 46 --- .../clustalo/guidetree/tests/main.nf.test | 33 -- .../guidetree/tests/main.nf.test.snap | 23 -- .../nf-core/clustalo/guidetree/tests/tags.yml | 2 - modules/nf-core/famsa/align/environment.yml | 7 - modules/nf-core/famsa/align/main.nf | 53 --- modules/nf-core/famsa/align/meta.yml | 58 ---- .../nf-core/famsa/align/tests/main.nf.test | 96 ------ .../famsa/align/tests/main.nf.test.snap | 57 ---- modules/nf-core/famsa/align/tests/tags.yml | 2 - .../nf-core/famsa/guidetree/environment.yml | 7 - modules/nf-core/famsa/guidetree/main.nf | 49 --- modules/nf-core/famsa/guidetree/meta.yml | 46 --- modules/nf-core/kalign/align/environment.yml | 8 - modules/nf-core/kalign/align/main.nf | 50 --- modules/nf-core/kalign/align/meta.yml | 47 --- .../nf-core/kalign/align/tests/main.nf.test | 54 ---- .../kalign/align/tests/main.nf.test.snap | 60 ---- modules/nf-core/kalign/align/tests/tags.yml | 2 - .../nf-core/learnmsa/align/environment.yml | 8 - modules/nf-core/learnmsa/align/main.nf | 50 --- modules/nf-core/learnmsa/align/meta.yml | 47 --- .../nf-core/learnmsa/align/tests/main.nf.test | 59 ---- .../learnmsa/align/tests/main.nf.test.snap | 26 -- modules/nf-core/learnmsa/align/tests/tags.yml | 2 - modules/nf-core/mafft/environment.yml | 8 - modules/nf-core/mafft/main.nf | 75 ----- modules/nf-core/mafft/meta.yml | 95 ------ modules/nf-core/mafft/tests/main.nf.test | 248 -------------- modules/nf-core/mafft/tests/main.nf.test.snap | 250 -------------- modules/nf-core/mafft/tests/tags.yml | 2 - modules/nf-core/magus/align/environment.yml | 8 - modules/nf-core/magus/align/main.nf | 58 ---- modules/nf-core/magus/align/meta.yml | 67 ---- .../nf-core/magus/align/tests/main.nf.test | 113 ------- .../magus/align/tests/main.nf.test.snap | 65 ---- modules/nf-core/magus/align/tests/tags.yml | 2 - .../nf-core/magus/guidetree/environment.yml | 7 - modules/nf-core/magus/guidetree/main.nf | 48 --- modules/nf-core/magus/guidetree/meta.yml | 49 --- .../magus/guidetree/tests/main.nf.test | 39 --- .../magus/guidetree/tests/main.nf.test.snap | 23 -- .../nf-core/magus/guidetree/tests/tags.yml | 2 - .../nf-core/mtmalign/align/environment.yml | 8 - modules/nf-core/mtmalign/align/main.nf | 73 ----- modules/nf-core/mtmalign/align/meta.yml | 57 ---- .../nf-core/mtmalign/align/tests/main.nf.test | 90 ------ .../mtmalign/align/tests/main.nf.test.snap | 26 -- modules/nf-core/mtmalign/align/tests/tags.yml | 2 - .../nf-core/muscle5/super5/environment.yml | 8 - modules/nf-core/muscle5/super5/main.nf | 63 ---- modules/nf-core/muscle5/super5/meta.yml | 51 --- .../nf-core/muscle5/super5/tests/main.nf.test | 72 ----- .../muscle5/super5/tests/main.nf.test.snap | 46 --- .../muscle5/super5/tests/nextflow.config | 0 modules/nf-core/muscle5/super5/tests/tags.yml | 2 - modules/nf-core/tcoffee/align/environment.yml | 8 - modules/nf-core/tcoffee/align/main.nf | 68 ---- modules/nf-core/tcoffee/align/meta.yml | 80 ----- .../nf-core/tcoffee/align/tests/lib.config | 3 - .../nf-core/tcoffee/align/tests/main.nf.test | 177 ---------- .../tcoffee/align/tests/main.nf.test.snap | 130 -------- .../tcoffee/align/tests/sequence.config | 3 - .../tcoffee/align/tests/structure.config | 5 - modules/nf-core/tcoffee/align/tests/tags.yml | 2 - .../nf-core/tcoffee/align/tests/tree.config | 5 - subworkflows/local/align.nf | 304 ------------------ subworkflows/local/compute_trees.nf | 43 --- workflows/multiplesequencealign.nf | 1 - 79 files changed, 91 insertions(+), 3809 deletions(-) delete mode 100644 modules/nf-core/clustalo/align/environment.yml delete mode 100644 modules/nf-core/clustalo/align/main.nf delete mode 100644 modules/nf-core/clustalo/align/meta.yml delete mode 100644 modules/nf-core/clustalo/align/tests/main.nf.test delete mode 100644 modules/nf-core/clustalo/align/tests/main.nf.test.snap delete mode 100644 modules/nf-core/clustalo/align/tests/nextflow.config delete mode 100644 modules/nf-core/clustalo/align/tests/tags.yml delete mode 100644 modules/nf-core/clustalo/guidetree/environment.yml delete mode 100644 modules/nf-core/clustalo/guidetree/main.nf delete mode 100644 modules/nf-core/clustalo/guidetree/meta.yml delete mode 100644 modules/nf-core/clustalo/guidetree/tests/main.nf.test delete mode 100644 modules/nf-core/clustalo/guidetree/tests/main.nf.test.snap delete mode 100644 modules/nf-core/clustalo/guidetree/tests/tags.yml delete mode 100644 modules/nf-core/famsa/align/environment.yml delete mode 100644 modules/nf-core/famsa/align/main.nf delete mode 100644 modules/nf-core/famsa/align/meta.yml delete mode 100644 modules/nf-core/famsa/align/tests/main.nf.test delete mode 100644 modules/nf-core/famsa/align/tests/main.nf.test.snap delete mode 100644 modules/nf-core/famsa/align/tests/tags.yml delete mode 100644 modules/nf-core/famsa/guidetree/environment.yml delete mode 100644 modules/nf-core/famsa/guidetree/main.nf delete mode 100644 modules/nf-core/famsa/guidetree/meta.yml delete mode 100644 modules/nf-core/kalign/align/environment.yml delete mode 100644 modules/nf-core/kalign/align/main.nf delete mode 100644 modules/nf-core/kalign/align/meta.yml delete mode 100644 modules/nf-core/kalign/align/tests/main.nf.test delete mode 100644 modules/nf-core/kalign/align/tests/main.nf.test.snap delete mode 100644 modules/nf-core/kalign/align/tests/tags.yml delete mode 100644 modules/nf-core/learnmsa/align/environment.yml delete mode 100644 modules/nf-core/learnmsa/align/main.nf delete mode 100644 modules/nf-core/learnmsa/align/meta.yml delete mode 100644 modules/nf-core/learnmsa/align/tests/main.nf.test delete mode 100644 modules/nf-core/learnmsa/align/tests/main.nf.test.snap delete mode 100644 modules/nf-core/learnmsa/align/tests/tags.yml delete mode 100644 modules/nf-core/mafft/environment.yml delete mode 100644 modules/nf-core/mafft/main.nf delete mode 100644 modules/nf-core/mafft/meta.yml delete mode 100644 modules/nf-core/mafft/tests/main.nf.test delete mode 100644 modules/nf-core/mafft/tests/main.nf.test.snap delete mode 100644 modules/nf-core/mafft/tests/tags.yml delete mode 100644 modules/nf-core/magus/align/environment.yml delete mode 100644 modules/nf-core/magus/align/main.nf delete mode 100644 modules/nf-core/magus/align/meta.yml delete mode 100644 modules/nf-core/magus/align/tests/main.nf.test delete mode 100644 modules/nf-core/magus/align/tests/main.nf.test.snap delete mode 100644 modules/nf-core/magus/align/tests/tags.yml delete mode 100644 modules/nf-core/magus/guidetree/environment.yml delete mode 100644 modules/nf-core/magus/guidetree/main.nf delete mode 100644 modules/nf-core/magus/guidetree/meta.yml delete mode 100644 modules/nf-core/magus/guidetree/tests/main.nf.test delete mode 100644 modules/nf-core/magus/guidetree/tests/main.nf.test.snap delete mode 100644 modules/nf-core/magus/guidetree/tests/tags.yml delete mode 100644 modules/nf-core/mtmalign/align/environment.yml delete mode 100644 modules/nf-core/mtmalign/align/main.nf delete mode 100644 modules/nf-core/mtmalign/align/meta.yml delete mode 100644 modules/nf-core/mtmalign/align/tests/main.nf.test delete mode 100644 modules/nf-core/mtmalign/align/tests/main.nf.test.snap delete mode 100644 modules/nf-core/mtmalign/align/tests/tags.yml delete mode 100644 modules/nf-core/muscle5/super5/environment.yml delete mode 100644 modules/nf-core/muscle5/super5/main.nf delete mode 100644 modules/nf-core/muscle5/super5/meta.yml delete mode 100644 modules/nf-core/muscle5/super5/tests/main.nf.test delete mode 100644 modules/nf-core/muscle5/super5/tests/main.nf.test.snap delete mode 100644 modules/nf-core/muscle5/super5/tests/nextflow.config delete mode 100644 modules/nf-core/muscle5/super5/tests/tags.yml delete mode 100644 modules/nf-core/tcoffee/align/environment.yml delete mode 100644 modules/nf-core/tcoffee/align/main.nf delete mode 100644 modules/nf-core/tcoffee/align/meta.yml delete mode 100644 modules/nf-core/tcoffee/align/tests/lib.config delete mode 100644 modules/nf-core/tcoffee/align/tests/main.nf.test delete mode 100644 modules/nf-core/tcoffee/align/tests/main.nf.test.snap delete mode 100644 modules/nf-core/tcoffee/align/tests/sequence.config delete mode 100644 modules/nf-core/tcoffee/align/tests/structure.config delete mode 100644 modules/nf-core/tcoffee/align/tests/tags.yml delete mode 100644 modules/nf-core/tcoffee/align/tests/tree.config delete mode 100644 subworkflows/local/align.nf delete mode 100644 subworkflows/local/compute_trees.nf diff --git a/modules.json b/modules.json index 59908138..fc5bfbc0 100644 --- a/modules.json +++ b/modules.json @@ -8,77 +8,107 @@ "clustalo/align": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": ["msa_alignment"] + "installed_by": [ + "msa_alignment" + ] }, "clustalo/guidetree": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": ["msa_guidetree"] + "installed_by": [ + "msa_guidetree" + ] }, "clustalo/treealign": { "branch": "main", "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", - "installed_by": ["msa_treealign"] + "installed_by": [ + "msa_treealign" + ] }, "famsa/align": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": ["msa_alignment"] + "installed_by": [ + "msa_alignment" + ] }, "famsa/guidetree": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": ["msa_guidetree"] + "installed_by": [ + "msa_guidetree" + ] }, "famsa/treealign": { "branch": "main", "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", - "installed_by": ["msa_treealign"] + "installed_by": [ + "msa_treealign" + ] }, "kalign/align": { "branch": "main", "git_sha": "caf37f3ee943a8101000b25ba502f038f8bfeb87", - "installed_by": ["msa_alignment"] + "installed_by": [ + "msa_alignment" + ] }, "learnmsa/align": { "branch": "main", "git_sha": "caf37f3ee943a8101000b25ba502f038f8bfeb87", - "installed_by": ["msa_alignment"] + "installed_by": [ + "msa_alignment" + ] }, "mafft": { "branch": "main", "git_sha": "caf37f3ee943a8101000b25ba502f038f8bfeb87", - "installed_by": ["msa_alignment"] + "installed_by": [ + "msa_alignment" + ] }, "magus/align": { "branch": "main", "git_sha": "caf37f3ee943a8101000b25ba502f038f8bfeb87", - "installed_by": ["msa_alignment"] + "installed_by": [ + "msa_alignment" + ] }, "magus/guidetree": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": ["msa_guidetree"] + "installed_by": [ + "msa_guidetree" + ] }, "magus/treealign": { "branch": "main", "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", - "installed_by": ["msa_treealign"] + "installed_by": [ + "msa_treealign" + ] }, "muscle5/super5": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": ["msa_alignment"] + "installed_by": [ + "msa_alignment" + ] }, "tcoffee/align": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": ["msa_alignment"] + "installed_by": [ + "msa_alignment" + ] }, "tcoffee/treealign": { "branch": "main", "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", - "installed_by": ["msa_treealign"] + "installed_by": [ + "msa_treealign" + ] } } }, @@ -87,17 +117,23 @@ "msa_alignment": { "branch": "main", "git_sha": "da5b7cd83d0a060b0b5343671ab552ee5b7c8aa9", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "msa_guidetree": { "branch": "main", "git_sha": "da5b7cd83d0a060b0b5343671ab552ee5b7c8aa9", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "msa_treealign": { "branch": "main", "git_sha": "0da6e13e2cade9d530dcf731a3f281998f72b5d1", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } @@ -105,111 +141,69 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { - "clustalo/align": { - "branch": "master", - "git_sha": "faf557ba56156ac0e5de76a25c1e3df11c944f59", - "installed_by": ["modules"] - }, - "clustalo/guidetree": { - "branch": "master", - "git_sha": "1f253ec05723293df7757af8769f8389b7a1884e", - "installed_by": ["modules"] - }, "csvtk/concat": { "branch": "master", "git_sha": "cfe2a24902bfdfe8132f11461ffda92d257f9f09", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "csvtk/join": { "branch": "master", "git_sha": "614abbf126f287a3068dc86997b2e1b6a93abe20", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/csvtk/join/csvtk-join.diff" }, - "famsa/align": { - "branch": "master", - "git_sha": "faf557ba56156ac0e5de76a25c1e3df11c944f59", - "installed_by": ["modules"] - }, - "famsa/guidetree": { - "branch": "master", - "git_sha": "46789a4621be261f10dab0033f46f34779a5afc9", - "installed_by": ["modules"] - }, - "kalign/align": { - "branch": "master", - "git_sha": "7afd02d048ad0100be37fa1741816265c4aa307c", - "installed_by": ["modules"] - }, - "learnmsa/align": { - "branch": "master", - "git_sha": "62007703c84bcfef92ce9e4a57cb1cc382917201", - "installed_by": ["modules"] - }, - "mafft": { - "branch": "master", - "git_sha": "faf557ba56156ac0e5de76a25c1e3df11c944f59", - "installed_by": ["modules"] - }, - "magus/align": { - "branch": "master", - "git_sha": "dc37bcdfa78fe3e9ca56e4b85e1621333c7b4301", - "installed_by": ["modules"] - }, - "magus/guidetree": { - "branch": "master", - "git_sha": "dc37bcdfa78fe3e9ca56e4b85e1621333c7b4301", - "installed_by": ["modules"] - }, - "mtmalign/align": { - "branch": "master", - "git_sha": "7bfb142c3729c1c76198c237a614215d92fe935c", - "installed_by": ["modules"] - }, - "muscle5/super5": { - "branch": "master", - "git_sha": "faf557ba56156ac0e5de76a25c1e3df11c944f59", - "installed_by": ["modules"] - }, "pigz/compress": { "branch": "master", "git_sha": "0eab94fc1e48703c1b0a8704bd665f554905c39d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pigz/uncompress": { "branch": "master", "git_sha": "d7f0de8aae7bf84b080dfdcf4e294bf11a46a51c", - "installed_by": ["modules"] - }, - "tcoffee/align": { - "branch": "master", - "git_sha": "1cacaceabae75b0c3bc393dee52cb6a5020fcb5c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/alncompare": { "branch": "master", "git_sha": "faf557ba56156ac0e5de76a25c1e3df11c944f59", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/irmsd": { "branch": "master", "git_sha": "faf557ba56156ac0e5de76a25c1e3df11c944f59", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/seqreformat": { "branch": "master", "git_sha": "32ae618a60a25a870b5fa47ea2060ddcd911ab53", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/tcs": { "branch": "master", "git_sha": "1cacaceabae75b0c3bc393dee52cb6a5020fcb5c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -218,20 +212,26 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/clustalo/align/environment.yml b/modules/nf-core/clustalo/align/environment.yml deleted file mode 100644 index be1eef95..00000000 --- a/modules/nf-core/clustalo/align/environment.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: clustalo_align -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::clustalo=1.2.4 - - conda-forge::pigz=2.8 diff --git a/modules/nf-core/clustalo/align/main.nf b/modules/nf-core/clustalo/align/main.nf deleted file mode 100644 index eb230cad..00000000 --- a/modules/nf-core/clustalo/align/main.nf +++ /dev/null @@ -1,57 +0,0 @@ -process CLUSTALO_ALIGN { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-4cefc38542f86c17596c29b35a059de10387c6a7:adbe4fbad680f9beb083956d79128039a727e7b3-0': - 'biocontainers/mulled-v2-4cefc38542f86c17596c29b35a059de10387c6a7:adbe4fbad680f9beb083956d79128039a727e7b3-0' }" - - input: - tuple val(meta) , path(fasta) - tuple val(meta2), path(tree) - val(compress) - - output: - tuple val(meta), path("*.aln{.gz,}"), emit: alignment - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def write_output = compress ? "--force -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz)" : "> ${prefix}.aln" - // using >() is necessary to preserve the return value, - // so nextflow knows to display an error when it failed - // the --force -o is necessary, as clustalo expands the commandline input, - // causing it to treat the pipe as a parameter and fail - // this way, the command expands to /dev/fd/, and --force allows writing output to an already existing file - """ - clustalo \ - -i ${fasta} \ - --threads=${task.cpus} \ - $args \ - $write_output - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - clustalo: \$( clustalo --version ) - pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.aln${compress ? '.gz' : ''} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - clustalo: \$( clustalo --version ) - pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) - END_VERSIONS - """ -} diff --git a/modules/nf-core/clustalo/align/meta.yml b/modules/nf-core/clustalo/align/meta.yml deleted file mode 100644 index 469b3a37..00000000 --- a/modules/nf-core/clustalo/align/meta.yml +++ /dev/null @@ -1,61 +0,0 @@ -name: "clustalo_align" -description: Align sequences using Clustal Omega -keywords: - - alignment - - MSA - - genomics -tools: - - "clustalo": - description: "Latest version of Clustal: a multiple sequence alignment program for DNA or proteins" - homepage: "http://www.clustal.org/omega/" - documentation: "http://www.clustal.org/omega/" - tool_dev_url: "http://www.clustal.org/omega/" - doi: "10.1038/msb.2011.75" - licence: ["GPL v2"] - - "pigz": - description: "Parallel implementation of the gzip algorithm." - homepage: "https://zlib.net/pigz/" - documentation: "https://zlib.net/pigz/pigz.pdf" -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - fasta: - type: file - description: Input sequences in FASTA format - pattern: "*.{fa,fasta}" - - meta2: - type: map - description: | - Groovy Map containing tree information - e.g. `[ id:'test_tree']` - - tree: - type: file - description: Input guide tree in Newick format - pattern: "*.{dnd}" - - compress: - type: boolean - description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is done using pigz, and is multithreaded. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - alignment: - type: file - description: Alignment file, in gzipped fasta format - pattern: "*.aln{.gz,}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@luisas" - - "@joseespinosa" -maintainers: - - "@luisas" - - "@joseespinosa" - - "@lrauschning" diff --git a/modules/nf-core/clustalo/align/tests/main.nf.test b/modules/nf-core/clustalo/align/tests/main.nf.test deleted file mode 100644 index 3edd36a2..00000000 --- a/modules/nf-core/clustalo/align/tests/main.nf.test +++ /dev/null @@ -1,98 +0,0 @@ -nextflow_process { - - name "Test Process CLUSTALO_ALIGN" - script "../main.nf" - process "CLUSTALO_ALIGN" - config "./nextflow.config" - - tag "modules" - tag "modules_nfcore" - tag "clustalo" - tag "clustalo/align" - tag "clustalo/guidetree" - - test("sarscov2 - contigs-fasta - uncompressed") { - - when { - process { - """ - input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) - ] - input[1] = [[:],[]] - input[2] = false - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.alignment).match("alignment - uncompressed")}, - { assert snapshot(process.out.versions).match("versions0") } - ) - } - - } - - test("sarscov2 - contigs-fasta - compressed") { - - when { - process { - """ - input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) - ] - input[1] = [[:],[]] - input[2] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.alignment).match("alignment - compressed")}, - { assert snapshot(process.out.versions).match("versions1") } - ) - } - - } - - test("sarscov2 - contigs-fasta - guide_tree") { - - setup { - - run("CLUSTALO_GUIDETREE") { - script "../../guidetree/main.nf" - process { - """ - input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) - ] - """ - } - } - } - - when { - process { - """ - input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) - ] - input[1] = CLUSTALO_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree]} - input[2] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.alignment).match("with_guide_tree_alignment")}, - { assert snapshot(process.out.versions).match("with_guide_tree_versions") } - ) - } - } -} \ No newline at end of file diff --git a/modules/nf-core/clustalo/align/tests/main.nf.test.snap b/modules/nf-core/clustalo/align/tests/main.nf.test.snap deleted file mode 100644 index d7d69870..00000000 --- a/modules/nf-core/clustalo/align/tests/main.nf.test.snap +++ /dev/null @@ -1,57 +0,0 @@ -{ - "alignment - compressed": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" - ] - ] - ], - "timestamp": "2024-02-09T19:39:46.647351958" - }, - "versions": { - "content": [ - [ - "versions.yml:md5,327da6a4250a6b7c4e45cddaa1f56280" - ] - ], - "timestamp": "2024-02-09T19:39:14.826528498" - }, - "alignment - uncompressed": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.aln:md5,74bb9a2820a91cf68db94dbd46787722" - ] - ] - ], - "timestamp": "2024-02-09T19:39:14.786480272" - }, - "with_guide_tree_alignment": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" - ] - ] - ], - "timestamp": "2024-02-09T19:40:45.057777867" - }, - "with_guide_tree_versions": { - "content": [ - [ - "versions.yml:md5,327da6a4250a6b7c4e45cddaa1f56280" - ] - ], - "timestamp": "2024-02-09T19:40:45.122824595" - } -} \ No newline at end of file diff --git a/modules/nf-core/clustalo/align/tests/nextflow.config b/modules/nf-core/clustalo/align/tests/nextflow.config deleted file mode 100644 index 71db4c7c..00000000 --- a/modules/nf-core/clustalo/align/tests/nextflow.config +++ /dev/null @@ -1,3 +0,0 @@ -process { - ext.args = { tree ? "--guidetree-in=$tree" : "" } -} \ No newline at end of file diff --git a/modules/nf-core/clustalo/align/tests/tags.yml b/modules/nf-core/clustalo/align/tests/tags.yml deleted file mode 100644 index 58bd2776..00000000 --- a/modules/nf-core/clustalo/align/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -clustalo/align: - - "modules/nf-core/clustalo/align/**" diff --git a/modules/nf-core/clustalo/guidetree/environment.yml b/modules/nf-core/clustalo/guidetree/environment.yml deleted file mode 100644 index 38b2f5b9..00000000 --- a/modules/nf-core/clustalo/guidetree/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: clustalo_guidetree -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::clustalo=1.2.4 diff --git a/modules/nf-core/clustalo/guidetree/main.nf b/modules/nf-core/clustalo/guidetree/main.nf deleted file mode 100644 index b94f2aa6..00000000 --- a/modules/nf-core/clustalo/guidetree/main.nf +++ /dev/null @@ -1,47 +0,0 @@ -process CLUSTALO_GUIDETREE { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/clustalo:1.2.4--h87f3376_5': - 'biocontainers/clustalo:1.2.4--h87f3376_5' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path("*.dnd"), emit: tree - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - clustalo \\ - -i ${fasta} \\ - --guidetree-out ${prefix}.dnd \\ - --threads=${task.cpus} \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - clustalo: \$( clustalo --version ) - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.dnd - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - clustalo: \$( clustalo --version ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/clustalo/guidetree/meta.yml b/modules/nf-core/clustalo/guidetree/meta.yml deleted file mode 100644 index b8e02352..00000000 --- a/modules/nf-core/clustalo/guidetree/meta.yml +++ /dev/null @@ -1,46 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: "clustalo_guidetree" -description: Renders a guidetree in clustalo -keywords: - - guide tree - - msa - - newick -tools: - - "clustalo": - description: "Latest version of Clustal: a multiple sequence alignment program for DNA or proteins" - homepage: "http://www.clustal.org/omega/" - documentation: "http://www.clustal.org/omega/" - tool_dev_url: "http://www.clustal.org/omega/" - doi: "10.1038/msb.2011.75" - licence: ["GPL v2"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - fasta: - type: file - description: Input sequences in FASTA format - pattern: "*.{fa,fasta}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - tree: - type: file - description: Guide tree file in Newick format - pattern: "*.{dnd}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@luisas" - - "@JoseEspinosa" -maintainers: - - "@luisas" - - "@JoseEspinosa" diff --git a/modules/nf-core/clustalo/guidetree/tests/main.nf.test b/modules/nf-core/clustalo/guidetree/tests/main.nf.test deleted file mode 100644 index d670ae03..00000000 --- a/modules/nf-core/clustalo/guidetree/tests/main.nf.test +++ /dev/null @@ -1,33 +0,0 @@ -nextflow_process { - - name "Test Process CLUSTALO_GUIDETREE" - script "../main.nf" - process "CLUSTALO_GUIDETREE" - - tag "modules" - tag "modules_nfcore" - tag "clustalo" - tag "clustalo/guidetree" - - test("sarscov2 - contigs-fasta") { - - when { - process { - """ - input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.tree).match("tree")}, - { assert snapshot(process.out.versions).match("versions") } - ) - } - } - -} diff --git a/modules/nf-core/clustalo/guidetree/tests/main.nf.test.snap b/modules/nf-core/clustalo/guidetree/tests/main.nf.test.snap deleted file mode 100644 index 6e3fdfc1..00000000 --- a/modules/nf-core/clustalo/guidetree/tests/main.nf.test.snap +++ /dev/null @@ -1,23 +0,0 @@ -{ - "versions": { - "content": [ - [ - "versions.yml:md5,64796b9beb7201a42b2c78cbdad51049" - ] - ], - "timestamp": "2023-11-27T22:49:13.44908228" - }, - "tree": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.dnd:md5,5428bad500a0a0bd985744bec1a12a70" - ] - ] - ], - "timestamp": "2023-11-27T22:49:13.43743393" - } -} \ No newline at end of file diff --git a/modules/nf-core/clustalo/guidetree/tests/tags.yml b/modules/nf-core/clustalo/guidetree/tests/tags.yml deleted file mode 100644 index 9b07c866..00000000 --- a/modules/nf-core/clustalo/guidetree/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -clustalo/guidetree: - - "modules/nf-core/clustalo/guidetree/**" diff --git a/modules/nf-core/famsa/align/environment.yml b/modules/nf-core/famsa/align/environment.yml deleted file mode 100644 index c41cda2a..00000000 --- a/modules/nf-core/famsa/align/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: famsa_align -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::famsa=2.2.2 diff --git a/modules/nf-core/famsa/align/main.nf b/modules/nf-core/famsa/align/main.nf deleted file mode 100644 index 096d8ff3..00000000 --- a/modules/nf-core/famsa/align/main.nf +++ /dev/null @@ -1,53 +0,0 @@ - - -process FAMSA_ALIGN { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/famsa:2.2.2--h9f5acd7_0': - 'biocontainers/famsa:2.2.2--h9f5acd7_0' }" - - input: - tuple val(meta) , path(fasta) - tuple val(meta2), path(tree) - val(compress) - - output: - tuple val(meta), path("*.aln{.gz,}"), emit: alignment - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def compress_args = compress ? '-gz' : '' - def prefix = task.ext.prefix ?: "${meta.id}" - def options_tree = tree ? "-gt import $tree" : "" - """ - famsa $options_tree \\ - $compress_args \\ - $args \\ - -t ${task.cpus} \\ - ${fasta} \\ - ${prefix}.aln${compress ? '.gz':''} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - famsa: \$( famsa -help 2>&1 | head -n 2 | tail -n 1 | sed 's/ version //g' ) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.aln${compress ? '.gz' : ''} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - famsa: \$( famsa -help 2>&1 | head -n 2 | tail -n 1 | sed 's/ version //g' ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/famsa/align/meta.yml b/modules/nf-core/famsa/align/meta.yml deleted file mode 100644 index 6acf3c21..00000000 --- a/modules/nf-core/famsa/align/meta.yml +++ /dev/null @@ -1,58 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: "famsa_align" -description: Aligns sequences using FAMSA -keywords: - - alignment - - MSA - - genomics -tools: - - "famsa": - description: "Algorithm for large-scale multiple sequence alignments" - homepage: "https://github.com/refresh-bio/FAMSA" - documentation: "https://github.com/refresh-bio/FAMSA" - tool_dev_url: "https://github.com/refresh-bio/FAMSA" - doi: "10.1038/srep33964" - licence: ["GPL v3"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - fasta: - type: file - description: Input sequences in FASTA format - pattern: "*.{fa,fasta}" - - meta2: - type: map - description: | - Groovy Map containing tree information - e.g. `[ id:'test_tree']` - - tree: - type: file - description: Input guide tree in Newick format - pattern: "*.{dnd}" - - compress: - type: boolean - description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is handled by passing '-gz' to FAMSA along with any other options specified in task.ext.args. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - alignment: - type: file - description: Alignment file, in FASTA format. May be gzipped or uncompressed, depending on if compress is set to true or false - pattern: "*.aln{.gz,}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@luisas" - - "@JoseEspinosa" -maintainers: - - "@luisas" - - "@JoseEspinosa" diff --git a/modules/nf-core/famsa/align/tests/main.nf.test b/modules/nf-core/famsa/align/tests/main.nf.test deleted file mode 100644 index 2d7cac3c..00000000 --- a/modules/nf-core/famsa/align/tests/main.nf.test +++ /dev/null @@ -1,96 +0,0 @@ -nextflow_process { - - name "Test Process FAMSA_ALIGN" - script "../main.nf" - process "FAMSA_ALIGN" - - tag "modules" - tag "modules_nfcore" - tag "famsa" - tag "famsa/align" - tag "famsa/guidetree" - - test("sarscov2 - fasta - uncompressed") { - - when { - process { - """ - input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) - ] - input[1] = [[:],[]] - input[2] = false - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.alignment).match("alignment_uncompressed")}, - { assert snapshot(process.out.versions).match("versions0") } - ) - } - - } - - test("sarscov2 - fasta - compressed") { - - when { - process { - """ - input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) - ] - input[1] = [[:],[]] - input[2] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.alignment).match("alignment_compressed")}, - { assert snapshot(process.out.versions).match("versions1") } - ) - } - - } - - test("sarscov2 - fasta - guide_tree") { - - setup { - run("FAMSA_GUIDETREE") { - script "../../guidetree/main.nf" - process { - """ - input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) - ] - """ - } - } - } - - when { - process { - """ - input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) - ] - input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree]} - input[2] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.alignment).match("with_guide_tree_alignment")}, - { assert snapshot(process.out.versions).match("with_guide_tree_versions") } - ) - } - } -} \ No newline at end of file diff --git a/modules/nf-core/famsa/align/tests/main.nf.test.snap b/modules/nf-core/famsa/align/tests/main.nf.test.snap deleted file mode 100644 index 95bbbf17..00000000 --- a/modules/nf-core/famsa/align/tests/main.nf.test.snap +++ /dev/null @@ -1,57 +0,0 @@ -{ - "alignment_uncompressed": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.aln:md5,7cf7375f2ba360814ea978731838b972" - ] - ] - ], - "timestamp": "2024-02-09T19:08:43.577982822" - }, - "versions": { - "content": [ - [ - "versions.yml:md5,7d9e0a8c263fa6d9017075fe88c9e9dc" - ] - ], - "timestamp": "2024-02-09T19:08:43.670136799" - }, - "with_guide_tree_alignment": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,7cf7375f2ba360814ea978731838b972" - ] - ] - ], - "timestamp": "2024-02-09T19:10:05.167368314" - }, - "alignment_compressed": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,7cf7375f2ba360814ea978731838b972" - ] - ] - ], - "timestamp": "2024-02-09T19:09:25.819156831" - }, - "with_guide_tree_versions": { - "content": [ - [ - "versions.yml:md5,7d9e0a8c263fa6d9017075fe88c9e9dc" - ] - ], - "timestamp": "2024-02-09T19:10:05.231995851" - } -} \ No newline at end of file diff --git a/modules/nf-core/famsa/align/tests/tags.yml b/modules/nf-core/famsa/align/tests/tags.yml deleted file mode 100644 index d010f3b7..00000000 --- a/modules/nf-core/famsa/align/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -famsa/align: - - "modules/nf-core/famsa/align/**" diff --git a/modules/nf-core/famsa/guidetree/environment.yml b/modules/nf-core/famsa/guidetree/environment.yml deleted file mode 100644 index 28be1c7f..00000000 --- a/modules/nf-core/famsa/guidetree/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: famsa_guidetree -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::famsa=2.2.2 diff --git a/modules/nf-core/famsa/guidetree/main.nf b/modules/nf-core/famsa/guidetree/main.nf deleted file mode 100644 index 7d8f46cd..00000000 --- a/modules/nf-core/famsa/guidetree/main.nf +++ /dev/null @@ -1,49 +0,0 @@ - -process FAMSA_GUIDETREE { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/famsa:2.2.2--h9f5acd7_0': - 'biocontainers/famsa:2.2.2--h9f5acd7_0' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path("*.dnd"), emit: tree - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - famsa -gt_export \\ - $args \\ - -t ${task.cpus} \\ - ${fasta} \\ - ${prefix}.dnd - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - famsa: \$( famsa -help 2>&1 | head -n 2 | tail -n 1 | sed 's/ version //g' ) - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.dnd - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - famsa: \$( famsa -help 2>&1 | head -n 2 | tail -n 1 | sed 's/ version //g' ) - END_VERSIONS - """ -} - diff --git a/modules/nf-core/famsa/guidetree/meta.yml b/modules/nf-core/famsa/guidetree/meta.yml deleted file mode 100644 index 2bd4e798..00000000 --- a/modules/nf-core/famsa/guidetree/meta.yml +++ /dev/null @@ -1,46 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: "famsa_guidetree" -description: Renders a guidetree in famsa -keywords: - - guide tree - - msa - - newick -tools: - - "famsa": - description: "Algorithm for large-scale multiple sequence alignments" - homepage: "https://github.com/refresh-bio/FAMSA" - documentation: "https://github.com/refresh-bio/FAMSA" - tool_dev_url: "https://github.com/refresh-bio/FAMSA" - doi: "10.1038/srep33964" - licence: ["GPL v3"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - fasta: - type: file - description: Input sequences in FASTA format - pattern: "*.{fa,fasta}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - tree: - type: file - description: Guide tree file in Newick format - pattern: "*.{dnd}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@luisas" - - "@JoseEspinosa" -maintainers: - - "@luisas" - - "@JoseEspinosa" diff --git a/modules/nf-core/kalign/align/environment.yml b/modules/nf-core/kalign/align/environment.yml deleted file mode 100644 index 93563eae..00000000 --- a/modules/nf-core/kalign/align/environment.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: kalign_align -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::kalign3=3.4.0 - - conda-forge::pigz=2.8 diff --git a/modules/nf-core/kalign/align/main.nf b/modules/nf-core/kalign/align/main.nf deleted file mode 100644 index e1601e15..00000000 --- a/modules/nf-core/kalign/align/main.nf +++ /dev/null @@ -1,50 +0,0 @@ -process KALIGN_ALIGN { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-5cd0277547c6b33133225c8ce14c0cf2a4396ea2:0a70b6d89a3e06fbdc4a735461e8b98ff32ee5de-0': - 'biocontainers/mulled-v2-5cd0277547c6b33133225c8ce14c0cf2a4396ea2:0a70b6d89a3e06fbdc4a735461e8b98ff32ee5de-0' }" - - input: - tuple val(meta), path(fasta) - val(compress) - - output: - tuple val(meta), path("*.aln{.gz,}"), emit: alignment - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def write_output = compress ? ">(pigz -cp ${task.cpus} > ${prefix}.aln.gz)" : "${prefix}.aln" - """ - unpigz -cdf $fasta | \\ - kalign \\ - $args \\ - -o ${write_output} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - kalign: \$(echo \$(kalign -v) | sed 's/kalign //g' ) - pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.aln${compress ? '.gz' : ''} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - kalign : \$(echo \$(kalign -v) | sed 's/kalign //g' ) - pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) - END_VERSIONS - """ -} diff --git a/modules/nf-core/kalign/align/meta.yml b/modules/nf-core/kalign/align/meta.yml deleted file mode 100644 index 187f6bc3..00000000 --- a/modules/nf-core/kalign/align/meta.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: "kalign_align" -description: "Aligns sequences using kalign" -keywords: - - alignment - - MSA - - genomics -tools: - - "kalign": - description: "Kalign is a fast and accurate multiple sequence alignment algorithm." - homepage: "https://msa.sbc.su.se/cgi-bin/msa.cgi" - documentation: "https://github.com/TimoLassmann/kalign" - tool_dev_url: "https://github.com/TimoLassmann/kalign" - doi: "10.1093/bioinformatics/btz795" - licence: ["GPL v3"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - fasta: - type: file - description: Input sequences in FASTA format. May be gzipped or uncompressed. - pattern: "*.{fa,fasta}{.gz,}" - - compress: - type: boolean - description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is done using pigz, and is multithreaded. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - alignment: - type: file - description: Alignment file. May be gzipped or uncompressed, depending on if `compress` is set to `true` or `false`. - pattern: "*.{aln}{.gz,}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@luisas" - - "@JoseEspinosa" -maintainers: - - "@luisas" - - "@JoseEspinosa" diff --git a/modules/nf-core/kalign/align/tests/main.nf.test b/modules/nf-core/kalign/align/tests/main.nf.test deleted file mode 100644 index d90ad9b0..00000000 --- a/modules/nf-core/kalign/align/tests/main.nf.test +++ /dev/null @@ -1,54 +0,0 @@ -// nf-core modules test kalign/align -nextflow_process { - - name "Test Process KALIGN_ALIGN" - script "../main.nf" - process "KALIGN_ALIGN" - - tag "modules" - tag "modules_nfcore" - tag "kalign" - tag "kalign/align" - - test("sarscov2 - fasta - uncompressed") { - - when { - process { - """ - input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) - ] - input[1] = false - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - uncompressed")}, - ) - } - } - - test("sarscov2 - fasta - compressed") { - - when { - process { - """ - input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) - ] - input[1] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - compressed")}, - ) - } - } -} \ No newline at end of file diff --git a/modules/nf-core/kalign/align/tests/main.nf.test.snap b/modules/nf-core/kalign/align/tests/main.nf.test.snap deleted file mode 100644 index da6fc94c..00000000 --- a/modules/nf-core/kalign/align/tests/main.nf.test.snap +++ /dev/null @@ -1,60 +0,0 @@ -{ - "SARS-CoV-2 scaffolds fasta - uncompressed": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.aln:md5,c165ecf48fb89862cc2a991cc3cadb2d" - ] - ], - "1": [ - "versions.yml:md5,0764ff5c30fd8befd86baa9026493ffe" - ], - "alignment": [ - [ - { - "id": "test" - }, - "test.aln:md5,c165ecf48fb89862cc2a991cc3cadb2d" - ] - ], - "versions": [ - "versions.yml:md5,0764ff5c30fd8befd86baa9026493ffe" - ] - } - ], - "timestamp": "2024-03-22T16:42:01.934768" - }, - "SARS-CoV-2 scaffolds fasta - compressed": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,c165ecf48fb89862cc2a991cc3cadb2d" - ] - ], - "1": [ - "versions.yml:md5,0764ff5c30fd8befd86baa9026493ffe" - ], - "alignment": [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,c165ecf48fb89862cc2a991cc3cadb2d" - ] - ], - "versions": [ - "versions.yml:md5,0764ff5c30fd8befd86baa9026493ffe" - ] - } - ], - "timestamp": "2024-03-22T16:42:07.734293" - } -} \ No newline at end of file diff --git a/modules/nf-core/kalign/align/tests/tags.yml b/modules/nf-core/kalign/align/tests/tags.yml deleted file mode 100644 index fa93d172..00000000 --- a/modules/nf-core/kalign/align/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -kalign/align: - - "modules/nf-core/kalign/align/**" diff --git a/modules/nf-core/learnmsa/align/environment.yml b/modules/nf-core/learnmsa/align/environment.yml deleted file mode 100644 index 124b8d84..00000000 --- a/modules/nf-core/learnmsa/align/environment.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: learnmsa_align -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::learnmsa=2.0.1 - - conda-forge::pigz=2.8 diff --git a/modules/nf-core/learnmsa/align/main.nf b/modules/nf-core/learnmsa/align/main.nf deleted file mode 100644 index 304fb07c..00000000 --- a/modules/nf-core/learnmsa/align/main.nf +++ /dev/null @@ -1,50 +0,0 @@ -process LEARNMSA_ALIGN { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-741e0da5cf2d6d964f559672e2908c2111cbb46b:4930edd009376542543bfd2e20008bb1ae58f841-0' : - 'biocontainers/mulled-v2-741e0da5cf2d6d964f559672e2908c2111cbb46b:4930edd009376542543bfd2e20008bb1ae58f841-0' }" - - input: - tuple val(meta), path(fasta) - val(compress) - - output: - tuple val(meta), path("*.aln{.gz,}"), emit: alignment - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def write_output = compress ? ">(pigz -cp ${task.cpus} > ${prefix}.aln.gz)" : "${prefix}.aln" - """ - learnMSA \\ - $args \\ - -i <(unpigz -cdf $fasta) \\ - -o $write_output - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - learnmsa: \$(learnMSA -h | grep 'version' | awk -F 'version ' '{print \$2}' | awk '{print \$1}' | sed 's/)//g') - pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.aln${compress ? '.gz' : ''} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - learnmsa: \$(learnMSA -h | grep 'version' | awk -F 'version ' '{print \$2}' | awk '{print \$1}' | sed 's/)//g') - pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) - END_VERSIONS - """ -} diff --git a/modules/nf-core/learnmsa/align/meta.yml b/modules/nf-core/learnmsa/align/meta.yml deleted file mode 100644 index 66a9f7d1..00000000 --- a/modules/nf-core/learnmsa/align/meta.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: "learnmsa_align" -description: Align sequences using learnMSA -keywords: - - alignment - - MSA - - genomics -tools: - - "learnmsa": - description: "learnMSA: Learning and Aligning large Protein Families" - homepage: "https://github.com/Gaius-Augustus/learnMSA" - documentation: "https://github.com/Gaius-Augustus/learnMSA" - tool_dev_url: "https://github.com/Gaius-Augustus/learnMSA" - doi: "10.1093/gigascience/giac104" - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - fasta: - type: file - description: Input sequences in FASTA format. May be gz-compressed or uncompressed. - pattern: "*.{fa,fasta}{.gz,}" - - compress: - type: boolean - description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is done using pigz, and is multithreaded. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - alignment: - type: file - description: Alignment file, in FASTA format. May be gzipped or uncompressed. - pattern: "*.aln{.gz,}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@luisas" - - "@JoseEspinosa" -maintainers: - - "@luisas" - - "@JoseEspinosa" diff --git a/modules/nf-core/learnmsa/align/tests/main.nf.test b/modules/nf-core/learnmsa/align/tests/main.nf.test deleted file mode 100644 index 8459ead3..00000000 --- a/modules/nf-core/learnmsa/align/tests/main.nf.test +++ /dev/null @@ -1,59 +0,0 @@ -// nf-core modules test learnmsa/align -nextflow_process { - - name "Test Process LEARNMSA_ALIGN" - script "../main.nf" - process "LEARNMSA_ALIGN" - - tag "modules" - tag "modules_nfcore" - tag "learnmsa" - tag "learnmsa/align" - - test("sarscov2 - fasta - uncompressed") { - - when { - process { - """ - input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) - ] - input[1] = false - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert path(process.out.alignment.get(0).get(1)).getText().contains(">sample1") }, - { assert snapshot(process.out.versions).match("versions") } - ) - } - - } - - test("sarscov2 - fasta - compressed") { - - when { - process { - """ - input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) - ] - input[1] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert path(process.out.alignment.get(0).get(1)).getTextGzip().contains(">sample1") }, - { assert snapshot(process.out.versions).match("versions1") } - ) - } - - } - -} \ No newline at end of file diff --git a/modules/nf-core/learnmsa/align/tests/main.nf.test.snap b/modules/nf-core/learnmsa/align/tests/main.nf.test.snap deleted file mode 100644 index 981738a2..00000000 --- a/modules/nf-core/learnmsa/align/tests/main.nf.test.snap +++ /dev/null @@ -1,26 +0,0 @@ -{ - "versions": { - "content": [ - [ - "versions.yml:md5,85322b0f038aa768f202fd0d748d6c7c" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-20T16:06:48.867020809" - }, - "versions1": { - "content": [ - [ - "versions.yml:md5,85322b0f038aa768f202fd0d748d6c7c" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-20T16:12:13.921813607" - } -} \ No newline at end of file diff --git a/modules/nf-core/learnmsa/align/tests/tags.yml b/modules/nf-core/learnmsa/align/tests/tags.yml deleted file mode 100644 index 127b6282..00000000 --- a/modules/nf-core/learnmsa/align/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -learnmsa/align: - - "modules/nf-core/learnmsa/align/**" diff --git a/modules/nf-core/mafft/environment.yml b/modules/nf-core/mafft/environment.yml deleted file mode 100644 index 595252e0..00000000 --- a/modules/nf-core/mafft/environment.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: mafft -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::mafft=7.520 - - conda-forge::pigz=2.8 diff --git a/modules/nf-core/mafft/main.nf b/modules/nf-core/mafft/main.nf deleted file mode 100644 index f09a0c96..00000000 --- a/modules/nf-core/mafft/main.nf +++ /dev/null @@ -1,75 +0,0 @@ -process MAFFT { - tag "$meta.id" - label 'process_high' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-12eba4a074f913c639117640936668f5a6a01da6:425707898cf4f85051b77848be253b88f1d2298a-0': - 'biocontainers/mulled-v2-12eba4a074f913c639117640936668f5a6a01da6:425707898cf4f85051b77848be253b88f1d2298a-0' }" - - input: - tuple val(meta) , path(fasta) - tuple val(meta2), path(add) - tuple val(meta3), path(addfragments) - tuple val(meta4), path(addfull) - tuple val(meta5), path(addprofile) - tuple val(meta6), path(addlong) - val(compress) - - output: - tuple val(meta), path("*.fas{.gz,}"), emit: fas - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def add = add ? "--add <(unpigz -cdf ${add})" : '' - def addfragments = addfragments ? "--addfragments <(unpigz -cdf ${addfragments})" : '' - def addfull = addfull ? "--addfull <(unpigz -cdf ${addfull})" : '' - def addprofile = addprofile ? "--addprofile <(unpigz -cdf ${addprofile})" : '' - def addlong = addlong ? "--addlong <(unpigz -cdf ${addlong})" : '' - def write_output = compress ? " | pigz -cp ${task.cpus} > ${prefix}.fas.gz" : "> ${prefix}.fas" - // this will not preserve MAFFTs return value, but mafft crashes when it receives a process substitution - if ("$fasta" == "${prefix}.fas" ) error "Input and output names are the same, set prefix in module configuration to disambiguate!" - """ - mafft \\ - --thread ${task.cpus} \\ - ${add} \\ - ${addfragments} \\ - ${addfull} \\ - ${addprofile} \\ - ${addlong} \\ - ${args} \\ - ${fasta} \\ - ${write_output} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mafft: \$(mafft --version 2>&1 | sed 's/^v//' | sed 's/ (.*)//') - pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def add = add ? "--add ${add}" : '' - def addfragments = addfragments ? "--addfragments ${addfragments}" : '' - def addfull = addfull ? "--addfull ${addfull}" : '' - def addprofile = addprofile ? "--addprofile ${addprofile}" : '' - def addlong = addlong ? "--addlong ${addlong}" : '' - if ("$fasta" == "${prefix}.fas" ) error "Input and output names are the same, set prefix in module configuration to disambiguate!" - """ - touch ${prefix}.fas${compress ? '.gz' : ''} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mafft: \$(mafft --version 2>&1 | sed 's/^v//' | sed 's/ (.*)//') - pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) - END_VERSIONS - """ - -} diff --git a/modules/nf-core/mafft/meta.yml b/modules/nf-core/mafft/meta.yml deleted file mode 100644 index 90b9ed39..00000000 --- a/modules/nf-core/mafft/meta.yml +++ /dev/null @@ -1,95 +0,0 @@ -name: mafft -description: Multiple sequence alignment using MAFFT -keywords: - - fasta - - msa - - multiple sequence alignment -tools: - - "mafft": - description: Multiple alignment program for amino acid or nucleotide sequences based on fast Fourier transform - homepage: https://mafft.cbrc.jp/alignment/software/ - documentation: https://mafft.cbrc.jp/alignment/software/manual/manual.html - tool_dev_url: https://mafft.cbrc.jp/alignment/software/source.html - doi: "10.1093/nar/gkf436" - licence: ["BSD"] - - "pigz": - description: "Parallel implementation of the gzip algorithm." - homepage: "https://zlib.net/pigz/" - documentation: "https://zlib.net/pigz/pigz.pdf" -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: FASTA file containing the sequences to align. May be gzipped or uncompressed. - pattern: "*.{fa,fasta}{.gz,}" - - meta2: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - add: - type: file - description: FASTA file containing sequences to align to the sequences in `fasta` using `--add`. May be gzipped or uncompressed. - pattern: "*.{fa,fasta}{.gz,}" - - meta3: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - addfragments: - type: file - description: FASTA file containing sequences to align to the sequences in `fasta` using `--addfragments`. May be gzipped or uncompressed. - pattern: "*.{fa,fasta}{.gz,}" - - meta4: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - addfull: - type: file - description: FASTA file containing sequences to align to the sequences in `fasta` using `--addfull`. May be gzipped or uncompressed. - pattern: "*.{fa,fasta}{.gz,}" - - meta5: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - addprofile: - type: file - description: FASTA file containing sequences to align to the sequences in `fasta` using `--addprofile`. May be gzipped or uncompressed. - pattern: "*.{fa,fasta}{.gz,}" - - meta6: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - addlong: - type: file - description: FASTA file containing sequences to align to the sequences in `fasta` using `--addlong`. May be gzipped or uncompressed. - pattern: "*.{fa,fasta}{.gz,}" - - compress: - type: boolean - description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is done using pigz, and is multithreaded. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - fas: - type: file - description: Aligned sequences in FASTA format. May be gzipped or uncompressed. - pattern: "*.fas{.gz,}" -authors: - - "@MillironX" -maintainers: - - "@MillironX" - - "@Joon-Klaps" diff --git a/modules/nf-core/mafft/tests/main.nf.test b/modules/nf-core/mafft/tests/main.nf.test deleted file mode 100644 index f57ab496..00000000 --- a/modules/nf-core/mafft/tests/main.nf.test +++ /dev/null @@ -1,248 +0,0 @@ -nextflow_process { - - name "Test Process MAFFT" - script "../main.nf" - process "MAFFT" - tag "modules" - tag "modules_nfcore" - tag "mafft" - - test("SARS-CoV-2 scaffolds fasta - uncompressed") { - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['scaffolds_fasta'], checkIfExists: true) - ] - input[1] = [[:], []] - input[2] = [[:], []] - input[3] = [[:], []] - input[4] = [[:], []] - input[5] = [[:], []] - input[6] = false - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - uncompressed")} - ) - } - - } - - test("SARS-CoV-2 scaffolds fasta - compressed") { - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['scaffolds_fasta'], checkIfExists: true) - ] - input[1] = [[:], []] - input[2] = [[:], []] - input[3] = [[:], []] - input[4] = [[:], []] - input[5] = [[:], []] - input[6] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - compressed")} - ) - } - - } - - test("SARS-CoV-2 scaffolds fasta - add informative sites fasta normal") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - ] - input[1] = [[ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) - ] - input[2] = [[:], []] - input[3] = [[:], []] - input[4] = [[:], []] - input[5] = [[:], []] - input[6] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - add informative sites fasta normal") } - ) - } - } - - test("SARS-CoV-2 scaffolds fasta - add informative sites fasta fragments") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - ] - input[1] = [[:], []] - input[2] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) - ] - input[3] = [[:], []] - input[4] = [[:], []] - input[5] = [[:], []] - input[6] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - add informative sites fasta fragments") } - ) - } - } - - test("SARS-CoV-2 scaffolds fasta - add informative sites fasta full") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - ] - input[1] = [[:], []] - input[2] = [[:], []] - input[3] = [[ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) - ] - input[4] = [[:], []] - input[5] = [[:], []] - input[6] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - add informative sites fasta full") } - ) - } - - } - - test("SARS-CoV-2 scaffolds fasta - add informative sites fasta profile") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - ] - input[1] = [[:], []] - input[2] = [[:], []] - input[3] = [[:], []] - input[4] = [[ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) - ] - input[5] = [[:], []] - input[6] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - add informative sites fasta profile") } - ) - } - - } - - test("SARS-CoV-2 scaffolds fasta - add informative sites fasta long") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - ] - input[1] = [[:], []] - input[2] = [[:], []] - input[3] = [[:], []] - input[4] = [[:], []] - input[5] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) - ] - input[6] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - add informative sites fasta long") } - ) - } - - } - - test("SARS-CoV-2 scaffolds fasta - add informative sites all sites fasta multiple") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - ] - input[1] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['all_sites_fas'], checkIfExists: true) - ] - input[2] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) - ] - input[3] = [[:], []] - input[4] = [[:], []] - input[5] = [[:], []] - input[6] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - add informative sites fasta multiple") } - ) - } - - } - -} \ No newline at end of file diff --git a/modules/nf-core/mafft/tests/main.nf.test.snap b/modules/nf-core/mafft/tests/main.nf.test.snap deleted file mode 100644 index c14ad086..00000000 --- a/modules/nf-core/mafft/tests/main.nf.test.snap +++ /dev/null @@ -1,250 +0,0 @@ -{ - "SARS-CoV-2 scaffolds fasta - uncompressed": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas:md5,23426611f4a0df532b6708f072bd445b" - ] - ], - "1": [ - "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" - ], - "fas": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas:md5,23426611f4a0df532b6708f072bd445b" - ] - ], - "versions": [ - "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" - ] - } - ], - "timestamp": "2024-02-09T19:08:41.735774847" - }, - "SARS-CoV-2 scaffolds fasta - add informative sites fasta multiple": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" - ] - ], - "1": [ - "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" - ], - "fas": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" - ] - ], - "versions": [ - "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" - ] - } - ], - "timestamp": "2024-02-09T19:10:38.940555785" - }, - "SARS-CoV-2 scaffolds fasta - add informative sites fasta normal": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas.gz:md5,a57a34f1c566dea114dc1b13416536d4" - ] - ], - "1": [ - "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" - ], - "fas": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas.gz:md5,a57a34f1c566dea114dc1b13416536d4" - ] - ], - "versions": [ - "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" - ] - } - ], - "timestamp": "2024-02-09T19:09:35.656248409" - }, - "SARS-CoV-2 scaffolds fasta - add informative sites fasta long": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas.gz:md5,e8868da70d1f3050a8daaee0e53b2fd9" - ] - ], - "1": [ - "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" - ], - "fas": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas.gz:md5,e8868da70d1f3050a8daaee0e53b2fd9" - ] - ], - "versions": [ - "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" - ] - } - ], - "timestamp": "2024-02-09T19:10:26.372655394" - }, - "SARS-CoV-2 scaffolds fasta - add informative sites fasta profile": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas.gz:md5,c2b5caf39beff4473878e6aa4036ad43" - ] - ], - "1": [ - "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" - ], - "fas": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas.gz:md5,c2b5caf39beff4473878e6aa4036ad43" - ] - ], - "versions": [ - "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" - ] - } - ], - "timestamp": "2024-02-09T19:10:14.039053212" - }, - "SARS-CoV-2 scaffolds fasta - add informative sites fasta fragments": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" - ] - ], - "1": [ - "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" - ], - "fas": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" - ] - ], - "versions": [ - "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" - ] - } - ], - "timestamp": "2024-02-09T19:09:49.737364197" - }, - "SARS-CoV-2 scaffolds fasta - add informative sites fasta full": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas.gz:md5,611cb0a65195a282f110f7f56e310c66" - ] - ], - "1": [ - "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" - ], - "fas": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas.gz:md5,611cb0a65195a282f110f7f56e310c66" - ] - ], - "versions": [ - "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" - ] - } - ], - "timestamp": "2024-02-09T19:10:02.952480822" - }, - "SARS-CoV-2 scaffolds fasta - compressed": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas.gz:md5,23426611f4a0df532b6708f072bd445b" - ] - ], - "1": [ - "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" - ], - "fas": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas.gz:md5,23426611f4a0df532b6708f072bd445b" - ] - ], - "versions": [ - "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" - ] - } - ], - "timestamp": "2024-02-09T19:09:21.096197597" - } -} \ No newline at end of file diff --git a/modules/nf-core/mafft/tests/tags.yml b/modules/nf-core/mafft/tests/tags.yml deleted file mode 100644 index caddc3cd..00000000 --- a/modules/nf-core/mafft/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -mafft: - - modules/nf-core/mafft/** diff --git a/modules/nf-core/magus/align/environment.yml b/modules/nf-core/magus/align/environment.yml deleted file mode 100644 index 685f5a87..00000000 --- a/modules/nf-core/magus/align/environment.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: magus_align -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::magus-msa=0.2.0 - - conda-forge::pigz=2.8 diff --git a/modules/nf-core/magus/align/main.nf b/modules/nf-core/magus/align/main.nf deleted file mode 100644 index 8c077e26..00000000 --- a/modules/nf-core/magus/align/main.nf +++ /dev/null @@ -1,58 +0,0 @@ -process MAGUS_ALIGN { - tag "$meta.id" - label 'process_high' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-ae4ea1182e75371808710b6c081bef8b228c4815:10b41722a6b9471a0945fe6baeb9aff444d8eb1d-0': - 'biocontainers/mulled-v2-ae4ea1182e75371808710b6c081bef8b228c4815:10b41722a6b9471a0945fe6baeb9aff444d8eb1d-0' }" - - input: - tuple val(meta) , path(fasta) - tuple val(meta2), path(tree) - val(compress) - - output: - tuple val(meta), path("*.aln{.gz,}"), emit: alignment - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def loadtree = tree ? "-t $tree" : '' - def write_output = compress ? "--overwrite -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz)" : "-o ${prefix}.aln" - // using >() is necessary to preserve the return value, - // so nextflow knows to display an error when it failed - // using --overwrite is necessary, as the file descriptor generated by the named file will already exist - """ - magus \\ - -np $task.cpus \\ - -i $fasta \\ - -d ./ \\ - $write_output \\ - $loadtree \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - MAGUS: \$(magus --version) - pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - echo "" | gzip > ${prefix}.aln${compress ? '.gz' : ''} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - MAGUS: \$(magus --version) - pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) - END_VERSIONS - """ -} diff --git a/modules/nf-core/magus/align/meta.yml b/modules/nf-core/magus/align/meta.yml deleted file mode 100644 index f9ada96a..00000000 --- a/modules/nf-core/magus/align/meta.yml +++ /dev/null @@ -1,67 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: "magus_align" -description: Multiple Sequence Alignment using Graph Clustering -keywords: - - MSA - - alignment - - genomics - - graph -tools: - - "magus": - description: "Multiple Sequence Alignment using Graph Clustering" - homepage: "https://github.com/vlasmirnov/MAGUS" - documentation: "https://github.com/vlasmirnov/MAGUS" - tool_dev_url: "https://github.com/vlasmirnov/MAGUS" - doi: "10.1093/bioinformatics/btaa992" - licence: ["MIT"] - -input: - - meta: - type: map - description: | - Groovy Map containing the fasta meta information - e.g. `[ id:'test', single_end:false ]` - - - meta2: - type: map - description: | - Groovy Map containing sample information for the specified guide tree (if supplied) - e.g. `[ id:'test', single_end:false ]` - - - fasta: - type: file - description: Input sequences in FASTA format. - pattern: "*.{fa,fna,fasta}" - - - tree: - type: file - description: Optional path to a file containing a guide tree in newick format to use as input. - If empty, or overwritten by passing `-t [fasttree|fasttree-noml|clustal|parttree]`, MAGUS will construct its own guide tree. If empty, `fasttree` is used as a default. - pattern: "*.{dnd,tree}" - - - compress: - type: boolean - description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is done using pigz, and is multithreaded. - -output: - - meta: - type: map - description: | - Groovy Map containing sample meta information. - e.g. `[ id:'test', single_end:false ]` - - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - - alignment: - type: file - description: File containing the output alignment, in FASTA format containing gaps. - The sequences may be in a different order than in the input FASTA. - The output file may or may not be gzipped, depending on the value supplied to `compress`. - pattern: "*.aln{.gz,}" - -authors: - - "@lrauschning" diff --git a/modules/nf-core/magus/align/tests/main.nf.test b/modules/nf-core/magus/align/tests/main.nf.test deleted file mode 100644 index 6b603f20..00000000 --- a/modules/nf-core/magus/align/tests/main.nf.test +++ /dev/null @@ -1,113 +0,0 @@ -nextflow_process { - - name "Test Process MAGUS_ALIGN" - script "../main.nf" - process "MAGUS_ALIGN" - - tag "modules" - tag "modules_nfcore" - tag "magus" - tag "magus/align" - tag "magus/guidetree" - - test("setoxin - fasta - uncompressed") { - - when { - process { - """ - input[0] = [ [ id:'test' ], // meta map - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) - ] - input[1] = [[:],[]] - input[2] = false - """ - } - } - - then { - assertAll( - { assert process.success }, - // tests seem to be reproducible on a single machine, but not across different machines - // test the correct samples are in there - { assert path(process.out.alignment[0][1]).getText().contains(">1apf") }, - { assert path(process.out.alignment[0][1]).getText().contains(">1ahl") }, - { assert path(process.out.alignment[0][1]).getText().contains(">1atx") }, - { assert path(process.out.alignment[0][1]).getText().contains(">1sh1") }, - { assert path(process.out.alignment[0][1]).getText().contains(">1bds") }, - { assert snapshot(process.out.versions).match("versions0") } - ) - } - - } - - test("setoxin - fasta - compressed") { - - when { - process { - """ - input[0] = [ [ id:'test' ], // meta map - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) - ] - input[1] = [[:],[]] - input[2] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - // tests seem to be reproducible on a single machine, but not across different machines - // test the correct samples are in there - { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1apf") }, - { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1ahl") }, - { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1atx") }, - { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1sh1") }, - { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1bds") }, - { assert snapshot(process.out.versions).match("versions1") } - ) - } - - } - test("setoxin - fasta - guide_tree") { - - setup { - run("MAGUS_GUIDETREE") { - script "../../guidetree/main.nf" - process { - """ - input[0] = [ [ id:'test' ], // meta map - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) - ] - """ - } - } - } - - when { - process { - """ - input[0] = [ [ id:'test' ], // meta map - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) - ] - input[1] = MAGUS_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree]} - input[2] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - // tests seem to be reproducible on a single machine, but not across different machines - // test the correct samples are in there - { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1apf") }, - { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1ahl") }, - { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1atx") }, - { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1sh1") }, - { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1bds") }, - { assert snapshot(process.out.versions).match("with_guide_tree_versions") } - ) - } - } -} \ No newline at end of file diff --git a/modules/nf-core/magus/align/tests/main.nf.test.snap b/modules/nf-core/magus/align/tests/main.nf.test.snap deleted file mode 100644 index 9b4f5f69..00000000 --- a/modules/nf-core/magus/align/tests/main.nf.test.snap +++ /dev/null @@ -1,65 +0,0 @@ -{ - "alignment_uncompressed": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.aln:md5,36feabc3daa2e02bade367fbbb25998b" - ] - ] - ], - "timestamp": "2024-03-28T18:20:59.519188125" - }, - "versions0": { - "content": [ - [ - "versions.yml:md5,ef9456e058ce51bce10dbc3703da29c7" - ] - ], - "timestamp": "2024-03-28T18:16:37.893176542" - }, - "versions1": { - "content": [ - [ - "versions.yml:md5,ef9456e058ce51bce10dbc3703da29c7" - ] - ], - "timestamp": "2024-03-28T18:17:23.679862847" - }, - "with_guide_tree_alignment": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,f3c292f0d5594cdafa9a3a270be706d7" - ] - ] - ], - "timestamp": "2024-03-28T18:24:31.933699545" - }, - "alignment_compressed": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,0be13ed7dc73a21accf406b7355c360e" - ] - ] - ], - "timestamp": "2024-03-04T18:07:34.889870533" - }, - "with_guide_tree_versions": { - "content": [ - [ - "versions.yml:md5,ef9456e058ce51bce10dbc3703da29c7" - ] - ], - "timestamp": "2024-03-28T18:24:32.0238804" - } -} \ No newline at end of file diff --git a/modules/nf-core/magus/align/tests/tags.yml b/modules/nf-core/magus/align/tests/tags.yml deleted file mode 100644 index f2dfd3a7..00000000 --- a/modules/nf-core/magus/align/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -magus/align: - - "modules/nf-core/magus/align/**" diff --git a/modules/nf-core/magus/guidetree/environment.yml b/modules/nf-core/magus/guidetree/environment.yml deleted file mode 100644 index 8e750334..00000000 --- a/modules/nf-core/magus/guidetree/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: magus_guidetree -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::magus-msa=0.2.0 diff --git a/modules/nf-core/magus/guidetree/main.nf b/modules/nf-core/magus/guidetree/main.nf deleted file mode 100644 index eb37fb9d..00000000 --- a/modules/nf-core/magus/guidetree/main.nf +++ /dev/null @@ -1,48 +0,0 @@ -process MAGUS_GUIDETREE { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/magus-msa:0.2.0--pyhdfd78af_0': - 'biocontainers/magus-msa:0.2.0--pyhdfd78af_0' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path("*.tree"), emit: tree - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - magus \\ - -np $task.cpus \\ - -i $fasta \\ - -o ${prefix}.tree \\ - --onlyguidetree TRUE \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - MAGUS: \$(magus --version) - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.tree - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - MAGUS: \$(magus --version) - END_VERSIONS - """ -} diff --git a/modules/nf-core/magus/guidetree/meta.yml b/modules/nf-core/magus/guidetree/meta.yml deleted file mode 100644 index 7d21391a..00000000 --- a/modules/nf-core/magus/guidetree/meta.yml +++ /dev/null @@ -1,49 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: "magus_guidetree" -description: Multiple Sequence Alignment using Graph Clustering -keywords: - - MSA - - guidetree - - genomics - - graph -tools: - - "magus": - description: "Multiple Sequence Alignment using Graph Clustering" - homepage: "https://github.com/vlasmirnov/MAGUS" - documentation: "https://github.com/vlasmirnov/MAGUS" - tool_dev_url: "https://github.com/vlasmirnov/MAGUS" - doi: "10.1093/bioinformatics/btaa992" - licence: ["MIT"] - -input: - - meta: - type: map - description: | - Groovy Map containing fasta meta information - e.g. `[ id:'test', single_end:false ]` - - - fasta: - type: file - description: Input sequences in FASTA format. - pattern: "*.{fa,fna,fasta}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test', single_end:false ]` - - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - - tree: - type: file - description: File containing the output guidetree, in newick format. - pattern: "*.tree" - -authors: - - "@lrauschning" diff --git a/modules/nf-core/magus/guidetree/tests/main.nf.test b/modules/nf-core/magus/guidetree/tests/main.nf.test deleted file mode 100644 index 953e37e5..00000000 --- a/modules/nf-core/magus/guidetree/tests/main.nf.test +++ /dev/null @@ -1,39 +0,0 @@ -nextflow_process { - - name "Test Process MAGUS_GUIDETREE" - script "../main.nf" - process "MAGUS_GUIDETREE" - - tag "modules" - tag "modules_nfcore" - tag "magus" - tag "magus/guidetree" - - test("setoxin - fasta") { - - when { - process { - """ - input[0] = [ [ id:'test' ], // meta map - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - //{ assert snapshot(process.out.tree).match("tree")}, - // tests seem to be reproducible on a single machine, but not across different machines - // test the correct samples are in there - { assert path(process.out.tree[0][1]).getText().contains("1apf") }, - { assert path(process.out.tree[0][1]).getText().contains("1ahl") }, - { assert path(process.out.tree[0][1]).getText().contains("1atx") }, - { assert path(process.out.tree[0][1]).getText().contains("1sh1") }, - { assert path(process.out.tree[0][1]).getText().contains("1bds") }, - { assert snapshot(process.out.versions).match("versions") } - ) - } - } -} \ No newline at end of file diff --git a/modules/nf-core/magus/guidetree/tests/main.nf.test.snap b/modules/nf-core/magus/guidetree/tests/main.nf.test.snap deleted file mode 100644 index d564be3d..00000000 --- a/modules/nf-core/magus/guidetree/tests/main.nf.test.snap +++ /dev/null @@ -1,23 +0,0 @@ -{ - "versions": { - "content": [ - [ - "versions.yml:md5,5a2ad92c9ea945c4bf4890f02ca2562f" - ] - ], - "timestamp": "2024-03-28T18:25:41.292337485" - }, - "tree": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.tree:md5,c742636229d166322a2824d409595738" - ] - ] - ], - "timestamp": "2024-03-28T18:25:41.226027114" - } -} \ No newline at end of file diff --git a/modules/nf-core/magus/guidetree/tests/tags.yml b/modules/nf-core/magus/guidetree/tests/tags.yml deleted file mode 100644 index 12b89af8..00000000 --- a/modules/nf-core/magus/guidetree/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -magus/guidetree: - - "modules/nf-core/magus/guidetree/**" diff --git a/modules/nf-core/mtmalign/align/environment.yml b/modules/nf-core/mtmalign/align/environment.yml deleted file mode 100644 index 59d426bb..00000000 --- a/modules/nf-core/mtmalign/align/environment.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: mtmalign_align -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::mtm-align=20220104 - - conda-forge::pigz=2.8 diff --git a/modules/nf-core/mtmalign/align/main.nf b/modules/nf-core/mtmalign/align/main.nf deleted file mode 100644 index 933d2c74..00000000 --- a/modules/nf-core/mtmalign/align/main.nf +++ /dev/null @@ -1,73 +0,0 @@ - - -process MTMALIGN_ALIGN { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-5bcf71dc66dac33d8e003c5e78043b80f5c7f269:8f0e486d46f7ab38892c1a8f78d2894a549d03b5-0': - 'biocontainers/mulled-v2-5bcf71dc66dac33d8e003c5e78043b80f5c7f269:8f0e486d46f7ab38892c1a8f78d2894a549d03b5-0' }" - - input: - tuple val(meta), path(pdbs) - val(compress) - - output: - tuple val(meta), path("${prefix}.aln${compress ? '.gz' : ''}"), emit: alignment - tuple val(meta), path("${prefix}.pdb${compress ? '.gz' : ''}"), emit: structure - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - // mTMalign is not capable of writing to stdout - // if -o /dev/stdout is specified, the output file will be polluted with debug messages emitted by mTMalign - """ - # decompress input files if required - if ls ./*.pdb.gz 2&> /dev/null; then # check if any files are compressed; calling unpigz with an empty arg will cause it to panic - unpigz -d ./*.pdb.gz - fi - - # construct input file for mtmalign - ls *.pdb | sed s/\\ /\\n/ > input_list.txt - - mtm-align -i input_list.txt -o ${prefix}.pdb - # -o does not affect the fasta naming, so move it to the new name - mv ./mTM_result/result.fasta ./mTM_result/${prefix}.aln - # Remove ".pdb" from the ids in the alignment file - sed -i 's/\\.pdb//g' ./mTM_result/${prefix}.aln - - # compress both output files - if ${compress}; then - pigz -p ${task.cpus} ./mTM_result/${prefix}.aln ./mTM_result/${prefix}.pdb - fi - - # move everything in mTM_result to the working directory - mv ./mTM_result/* . - - # mtm-align -v prints the wrong version 20180725, so extract it from the cosmetic output in the help message - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mTM-align: \$( mtm-align -h | grep -e "\\(Version [[:digit:]]*\\)" | grep -oe "[[:digit:]]*" ) - pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) - END_VERSIONS - """ - - stub: - prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.aln${compress ? '.gz' : ''} - touch ${prefix}.pdb${compress ? '.gz' : ''} - - # mtm-align -v prints the wrong version 20180725, so extract it from the cosmetic output in the help message - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mTM-align: \$( mtm-align -h | grep -e "\\(Version [[:digit:]]*\\)" | grep -oe "[[:digit:]]*" ) - pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) - END_VERSIONS - """ -} diff --git a/modules/nf-core/mtmalign/align/meta.yml b/modules/nf-core/mtmalign/align/meta.yml deleted file mode 100644 index 1e444e1c..00000000 --- a/modules/nf-core/mtmalign/align/meta.yml +++ /dev/null @@ -1,57 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: "mtmalign_align" -description: Aligns protein structures using mTM-align -keywords: - - alignment - - MSA - - genomics - - structure -tools: - - "mTM-align": - description: "Algorithm for structural multiple sequence alignments" - homepage: "http://yanglab.nankai.edu.cn/mTM-align/" - documentation: "http://yanglab.nankai.edu.cn/mTM-align/help/" - tool_dev_url: "http://yanglab.nankai.edu.cn/mTM-align/" - doi: "10.1093/bioinformatics/btx828" - licence: ["None"] - - "pigz": - description: "Parallel implementation of the gzip algorithm." - homepage: "https://zlib.net/pigz/" - documentation: "https://zlib.net/pigz/pigz.pdf" -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - pdbs: - type: file - description: Input protein structures in PDB format. Files may be gzipped or uncompressed. - They should contain exactly one chain! - pattern: "*.{pdb}" - - compress: - type: boolean - description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is done using pigz, and is multithreaded. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - alignment: - type: file - description: Alignment in FASTA format. May be gzipped or uncompressed. - pattern: "*.aln{.gz,}" - - structure: - type: file - description: Overlaid structures in PDB format. May be gzipped or uncompressed. - pattern: "${prefix}.pdb{.gz,}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@lrauschning" -maintainers: - - "@lrauschning" diff --git a/modules/nf-core/mtmalign/align/tests/main.nf.test b/modules/nf-core/mtmalign/align/tests/main.nf.test deleted file mode 100644 index ada32c39..00000000 --- a/modules/nf-core/mtmalign/align/tests/main.nf.test +++ /dev/null @@ -1,90 +0,0 @@ -nextflow_process { - - name "Test Process MTMALIGN_ALIGN" - script "../main.nf" - process "MTMALIGN_ALIGN" - tag "modules" - tag "modules_nfcore" - tag "mtmalign" - tag "mtmalign/align" - tag "untar" - - test("Test on seatoxin dataset - uncompressed") { - setup { - - run("UNTAR") { - script "../../../../../modules/nf-core/untar/main.nf" - process { - """ - archive = file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz", checkIfExists: true) - input[0] = Channel.of(tuple([id:'test'], archive)) - """ - } - } - } - - when { - params { - } - process { - """ - input[0] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], file(dir).listFiles().collect()]} - input[1] = false - """ - } - } - - then { - assertAll( - { assert process.success }, - // mTMalign may be nondeterministic, just check if the pdbs are all in there - //{ assert snapshot(process.out).match() } - { assert path(process.out.alignment[0][1]).getText().contains(">1ahl") }, - { assert path(process.out.alignment[0][1]).getText().contains(">1apf") }, - { assert path(process.out.alignment[0][1]).getText().contains(">1atx") }, - { assert path(process.out.alignment[0][1]).getText().contains(">1bds") }, - { assert path(process.out.alignment[0][1]).getText().contains(">1sh1") }, - { assert snapshot(process.out.versions).match("versions0") } - ) - } - } - - test("Test on seatoxin dataset - compressed") { - setup { - - run("UNTAR") { - script "../../../../../modules/nf-core/untar/main.nf" - process { - """ - archive = file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz", checkIfExists: true) - input[0] = Channel.of(tuple([id:'test'], archive)) - """ - } - } - } - - when { - params { - } - process { - """ - input[0] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], file(dir).listFiles().collect()]} - input[1] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - // mTMalign may be nondeterministic, just check if the pdbs are all in there - { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1ahl") }, - { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1apf") }, - { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1atx") }, - { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1bds") }, - { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1sh1") }, - { assert snapshot(process.out.versions).match("versions1") } - ) - } - } -} \ No newline at end of file diff --git a/modules/nf-core/mtmalign/align/tests/main.nf.test.snap b/modules/nf-core/mtmalign/align/tests/main.nf.test.snap deleted file mode 100644 index 0eefb191..00000000 --- a/modules/nf-core/mtmalign/align/tests/main.nf.test.snap +++ /dev/null @@ -1,26 +0,0 @@ -{ - "versions0": { - "content": [ - [ - "versions.yml:md5,7cbacec15bb9e0c8cbb27610bde74c10" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-03T11:01:13.729263689" - }, - "versions1": { - "content": [ - [ - "versions.yml:md5,7cbacec15bb9e0c8cbb27610bde74c10" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-03T11:01:37.28539854" - } -} \ No newline at end of file diff --git a/modules/nf-core/mtmalign/align/tests/tags.yml b/modules/nf-core/mtmalign/align/tests/tags.yml deleted file mode 100644 index 87a2e3bc..00000000 --- a/modules/nf-core/mtmalign/align/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -mtmalign/align: - - modules/nf-core/mtmalign/align/** diff --git a/modules/nf-core/muscle5/super5/environment.yml b/modules/nf-core/muscle5/super5/environment.yml deleted file mode 100644 index fbaf4a2e..00000000 --- a/modules/nf-core/muscle5/super5/environment.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: muscle5_super5 -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::muscle=5.1 - - conda-forge::pigz=2.8 diff --git a/modules/nf-core/muscle5/super5/main.nf b/modules/nf-core/muscle5/super5/main.nf deleted file mode 100644 index 87af149b..00000000 --- a/modules/nf-core/muscle5/super5/main.nf +++ /dev/null @@ -1,63 +0,0 @@ -process MUSCLE5_SUPER5 { - tag "$meta.id" - label 'process_medium' - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-8eb01a3c2755c935d070dd03ff2dee698eeb4466:ceb6e65e00346ed20d0d8078dddf9858a7af0fe2-0': - 'biocontainers/mulled-v2-8eb01a3c2755c935d070dd03ff2dee698eeb4466:ceb6e65e00346ed20d0d8078dddf9858a7af0fe2-0' }" - - input: - tuple val(meta), path(fasta) - val(compress) - - output: - tuple val(meta), path("*.aln{.gz,}"), emit: alignment - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - prefix = args.contains('-perm all') ? "${prefix}@" : "${prefix}" - def write_output = (compress && !args.contains('-perm all')) ? " -output >(pigz -cp ${task.cpus} > ${prefix}.aln.gz)" : "-output ${prefix}.aln" - // muscle internally expands the shell pipe to a file descriptor of the form /dev/fd/ - // this causes it to fail, unless -output is left at the end of the call - // see also clustalo/align - // using >() is necessary to preserve the return value, - // so nextflow knows to display an error when it failed - """ - muscle \\ - -super5 ${fasta} \\ - ${args} \\ - -threads ${task.cpus} \\ - $write_output - - - # output may be multiple files if -perm all is set - # compress these individually if set to compress output - if ${args.contains('-perm all') && compress}; then - pigz -p ${task.cpus} *.aln - fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - muscle: \$(muscle -version | head -n 1 | cut -d ' ' -f 2 | sed 's/.linux64//') - pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.aln${compress ? '.gz' : ''} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - muscle: \$(muscle -version | head -n 1 | cut -d ' ' -f 2 | sed 's/.linux64//') - pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) - END_VERSIONS - """ -} diff --git a/modules/nf-core/muscle5/super5/meta.yml b/modules/nf-core/muscle5/super5/meta.yml deleted file mode 100644 index 057128dc..00000000 --- a/modules/nf-core/muscle5/super5/meta.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: "muscle5_super5" -description: Muscle is a program for creating multiple alignments of amino acid or nucleotide sequences. This particular module uses the super5 algorithm for very big alignments. It can permutate the guide tree according to a set of flags. -keywords: - - align - - msa - - multiple sequence alignment -tools: - - muscle -super5: - description: "Muscle v5 is a major re-write of MUSCLE based on new algorithms." - homepage: "https://drive5.com/muscle5/" - documentation: "https://drive5.com/muscle5/manual/" - doi: "10.1101/2021.06.20.449169" - licence: ["Public Domain"] - - "pigz": - description: "Parallel implementation of the gzip algorithm." - homepage: "https://zlib.net/pigz/" - documentation: "https://zlib.net/pigz/pigz.pdf" -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Input sequences for alignment must be in FASTA format - pattern: "*.{fasta,fa,fna}" - - compress: - type: boolean - description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is done using pigz, and is multithreaded. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test', single_end:false ]` - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - alignment: - type: file - description: Multiple sequence alignment produced in gzipped FASTA format. If '-perm all' is passed in ext.args, this will be multiple files per input! - pattern: "*.{aln.gz}" -authors: - - "@alessiovignoli" - - "@JoseEspinosa" -maintainers: - - "@alessiovignoli" - - "@JoseEspinosa" - - "@lrauschning" diff --git a/modules/nf-core/muscle5/super5/tests/main.nf.test b/modules/nf-core/muscle5/super5/tests/main.nf.test deleted file mode 100644 index c1541208..00000000 --- a/modules/nf-core/muscle5/super5/tests/main.nf.test +++ /dev/null @@ -1,72 +0,0 @@ -nextflow_process { - - name "Test Process MUSCLE5_SUPER5" - script "../main.nf" - process "MUSCLE5_SUPER5" - config "./nextflow.config" - - tag "modules" - tag "modules_nfcore" - tag "muscle5" - tag "muscle5/super5" - - test("fasta - align_sequence - uncompressed") { - when { - process { - """ - input[0] = [ [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) - ] - input[1] = false - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.alignment).match("alignment - uncompressed")}, - ) - } - } - - test("fasta - align_sequence - compressed") { - when { - process { - """ - input[0] = [ [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) - ] - input[1] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.alignment).match("alignment - compressed")}, - ) - } - } - test("fasta - align_sequence - compressed - perm_all") { - config "./perm_all.config" - when { - process { - """ - input[0] = [ [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) - ] - input[1] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.alignment).match("perm-all")}, - ) - } - } -} \ No newline at end of file diff --git a/modules/nf-core/muscle5/super5/tests/main.nf.test.snap b/modules/nf-core/muscle5/super5/tests/main.nf.test.snap deleted file mode 100644 index ce7aadf0..00000000 --- a/modules/nf-core/muscle5/super5/tests/main.nf.test.snap +++ /dev/null @@ -1,46 +0,0 @@ -{ - "alignment - compressed": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a" - ] - ] - ], - "timestamp": "2024-02-09T19:08:23.498404397" - }, - "perm-all": { - "content": [ - [ - [ - { - "id": "test" - }, - [ - "testabc.0.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a", - "testacb.0.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a", - "testbca.0.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a", - "testnone.0.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a" - ] - ] - ] - ], - "timestamp": "2024-02-09T19:08:37.386512953" - }, - "alignment - uncompressed": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.aln:md5,46ba556df08f7aabbe5e1ba31d226b6a" - ] - ] - ], - "timestamp": "2024-02-09T19:16:25.330353817" - } -} \ No newline at end of file diff --git a/modules/nf-core/muscle5/super5/tests/nextflow.config b/modules/nf-core/muscle5/super5/tests/nextflow.config deleted file mode 100644 index e69de29b..00000000 diff --git a/modules/nf-core/muscle5/super5/tests/tags.yml b/modules/nf-core/muscle5/super5/tests/tags.yml deleted file mode 100644 index c915c6a3..00000000 --- a/modules/nf-core/muscle5/super5/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -muscle5/super5: - - "modules/nf-core/muscle5/super5/**" diff --git a/modules/nf-core/tcoffee/align/environment.yml b/modules/nf-core/tcoffee/align/environment.yml deleted file mode 100644 index 28f159fd..00000000 --- a/modules/nf-core/tcoffee/align/environment.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: tcoffee_align -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::t-coffee=13.46.0.919e8c6b - - conda-forge::pigz=2.8 diff --git a/modules/nf-core/tcoffee/align/main.nf b/modules/nf-core/tcoffee/align/main.nf deleted file mode 100644 index a14964c9..00000000 --- a/modules/nf-core/tcoffee/align/main.nf +++ /dev/null @@ -1,68 +0,0 @@ -process TCOFFEE_ALIGN { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0': - 'biocontainers/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0' }" - - input: - tuple val(meta) , path(fasta) - tuple val(meta2), path(tree) - tuple val(meta3), path(template), path(accessory_informations) - val(compress) - - output: - tuple val(meta), path("*.aln{.gz,}"), emit: alignment - // in the args there might be the request to generate a lib file, so the following is an optional output - tuple val(meta), path("*.*lib") , emit: lib, optional : true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def tree_args = tree ? "-usetree $tree" : "" - def template_args = template ? "-template_file $template" : "" - def outfile = compress ? "stdout" : "${prefix}.aln" - def write_output = compress ? " | pigz -cp ${task.cpus} > ${prefix}.aln.gz" : "" - """ - export TEMP='./' - t_coffee -seq ${fasta} \ - $tree_args \ - $template_args \ - $args \ - -thread ${task.cpus} \ - -outfile $outfile \ - $write_output - - # If stdout file exist and compress is true, then compress the file - # This is a patch for the current behaviour of the regressive algorithm - # that does not support the stdout redirection - if [ -f stdout ] && [ "$compress" = true ]; then - pigz -cp ${task.cpus} < stdout > ${prefix}.aln.gz - rm stdout - fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') - pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.aln${compress ? '.gz':''} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') - pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) - END_VERSIONS - """ -} diff --git a/modules/nf-core/tcoffee/align/meta.yml b/modules/nf-core/tcoffee/align/meta.yml deleted file mode 100644 index 4125d1ed..00000000 --- a/modules/nf-core/tcoffee/align/meta.yml +++ /dev/null @@ -1,80 +0,0 @@ -name: "tcoffee_align" -description: Aligns sequences using T_COFFEE -keywords: - - alignment - - MSA - - genomics -tools: - - "tcoffee": - description: "A collection of tools for Computing, Evaluating and Manipulating Multiple Alignments of DNA, RNA, Protein Sequences and Structures." - homepage: "http://www.tcoffee.org/Projects/tcoffee/" - documentation: "https://tcoffee.readthedocs.io/en/latest/tcoffee_main_documentation.html" - tool_dev_url: "https://github.com/cbcrg/tcoffee" - doi: "10.1006/jmbi.2000.4042" - licence: ["GPL v3"] - - "pigz": - description: "Parallel implementation of the gzip algorithm." - homepage: "https://zlib.net/pigz/" - documentation: "https://zlib.net/pigz/pigz.pdf" -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - fasta: - type: file - description: Input sequences in FASTA format - pattern: "*.{fa,fasta}" - - meta2: - type: map - description: | - Groovy Map containing tree information - e.g. `[ id:'test_tree']` - - tree: - type: file - description: Input guide tree in Newick format - pattern: "*.{dnd}" - - meta3: - type: map - description: | - Groovy Map containing tree information - e.g. `[ id:'test_infos']` - - template: - type: file - description: T_coffee template file that maps sequences to the accessory information files to be used. - pattern: "*" - - accessory_informations: - type: file - description: Accessory files to be used in the alignment. For example, it could be protein structures or secondary structures. - pattern: "*" - - compress: - type: boolean - description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is done using pigz, and is multithreaded. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - alignment: - type: file - description: Alignment file in FASTA format. May be gzipped. - pattern: "*.aln{.gz,}" - - lib: - type: file - description: optional output, the library generated from the MSA file. - pattern: "*.*lib" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@luisas" - - "@JoseEspinosa" - - "@alessiovignoli" -maintainers: - - "@luisas" - - "@JoseEspinosa" - - "@lrauschning" - - "@alessiovignoli" diff --git a/modules/nf-core/tcoffee/align/tests/lib.config b/modules/nf-core/tcoffee/align/tests/lib.config deleted file mode 100644 index 2fc113ef..00000000 --- a/modules/nf-core/tcoffee/align/tests/lib.config +++ /dev/null @@ -1,3 +0,0 @@ -process { - ext.args = { "-output fasta_aln -out_lib=sample_lib1.tc_lib" } -} \ No newline at end of file diff --git a/modules/nf-core/tcoffee/align/tests/main.nf.test b/modules/nf-core/tcoffee/align/tests/main.nf.test deleted file mode 100644 index 307534fb..00000000 --- a/modules/nf-core/tcoffee/align/tests/main.nf.test +++ /dev/null @@ -1,177 +0,0 @@ -nextflow_process { - - name "Test Process TCOFFEE_ALIGN" - script "../main.nf" - process "TCOFFEE_ALIGN" - - tag "modules" - tag "modules_nfcore" - tag "tcoffee" - tag "tcoffee/align" - tag "famsa/guidetree" - tag "untar" - - test("fasta - align_sequence") { - - config "./sequence.config" - - when { - process { - """ - input[0] = [ [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) - ] - input[1] = [[:],[]] - input[2] = [[:],[],[]] - input[3] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.alignment).match("alignment")}, - { assert snapshot(process.out.versions).match("versions_uncomp") } - ) - } - } - - test("fasta - align_sequence - uncompressed") { - - config "./sequence.config" - - when { - process { - """ - input[0] = [ [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) - ] - input[1] = [[:],[]] - input[2] = [[:],[],[]] - input[3] = false - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.alignment).match("alignment - uncompressed")}, - { assert snapshot(process.out.versions).match("versions_comp") } - ) - } - } - - test("sarscov2 - fasta - align_with_guide_tree") { - - config "./tree.config" - - setup { - - run("FAMSA_GUIDETREE") { - script "../../../famsa/guidetree//main.nf" - process { - """ - input[0] = [ [ id:'test' ], - file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) - ] - - """ - } - } - } - - when { - process { - """ - input[0] = [ [ id:'test' ], - file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) - ] - input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test'], tree]} - input[2] = [ [:], [], [] ] - input[3] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.alignment).match("alignment_guidetree")}, - { assert snapshot(process.out.versions).match("versions_guidetree") } - ) - } - - } - - test("fasta - align_with_structure") { - - config "./structure.config" - - setup { - - run("UNTAR") { - script "../../../untar/main.nf" - process { - """ - input[0] = [ [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz", checkIfExists: true) - ] - - """ - } - } - } - - when { - process { - """ - input[0] = [ [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) - ] - input[1] = [ [:], [] ] - input[2] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], [] ,file(dir).listFiles().collect()]} - input[3] = true - """ - - } - } - - then { - assertAll( - { assert process.success }, - { assert path(process.out.alignment.get(0).get(1)).getTextGzip().contains("1ahl") }, - { assert snapshot(process.out.versions).match("versions_structure") } - ) - } - - } - - test("fasta - align_with_lib") { - - config "./lib.config" - - when { - process { - """ - input[0] = [ [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) - ] - input[1] = [[:],[]] - input[2] = [[:],[],[]] - input[3] = true - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.alignment).match("alignment - lib") }, - { assert path(process.out.lib.get(0).get(1)).getText().contains("1ahl") }, - { assert snapshot(process.out.versions).match("versions_lib") } - ) - } - } -} \ No newline at end of file diff --git a/modules/nf-core/tcoffee/align/tests/main.nf.test.snap b/modules/nf-core/tcoffee/align/tests/main.nf.test.snap deleted file mode 100644 index dfef40a7..00000000 --- a/modules/nf-core/tcoffee/align/tests/main.nf.test.snap +++ /dev/null @@ -1,130 +0,0 @@ -{ - "versions_structure": { - "content": [ - [ - "versions.yml:md5,fb187c9186b50a8076d08cd3be3c1b70" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" - }, - "timestamp": "2024-02-28T19:00:28.712838" - }, - "versions_lib": { - "content": [ - [ - "versions.yml:md5,fb187c9186b50a8076d08cd3be3c1b70" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-19T14:04:06.031557" - }, - "alignment - uncompressed": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.aln:md5,bd1db08ad04514cc6d1334598c1a6ef0" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" - }, - "timestamp": "2024-02-28T18:59:54.582504" - }, - "versions_comp": { - "content": [ - [ - "versions.yml:md5,fb187c9186b50a8076d08cd3be3c1b70" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" - }, - "timestamp": "2024-02-28T18:59:54.593312" - }, - "versions_guidetree": { - "content": [ - [ - "versions.yml:md5,fb187c9186b50a8076d08cd3be3c1b70" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" - }, - "timestamp": "2024-02-28T19:00:10.618213" - }, - "alignment - lib": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,bd1db08ad04514cc6d1334598c1a6ef0" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-19T13:57:39.653762" - }, - "alignment": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,bd1db08ad04514cc6d1334598c1a6ef0" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" - }, - "timestamp": "2024-02-28T18:59:35.169119" - }, - "versions_uncomp": { - "content": [ - [ - "versions.yml:md5,fb187c9186b50a8076d08cd3be3c1b70" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" - }, - "timestamp": "2024-02-28T18:59:35.2062" - }, - "alignment_guidetree": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,93bc8adfcd88f7913718eacc13da8e4a" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" - }, - "timestamp": "2024-02-28T19:00:10.611489" - } -} \ No newline at end of file diff --git a/modules/nf-core/tcoffee/align/tests/sequence.config b/modules/nf-core/tcoffee/align/tests/sequence.config deleted file mode 100644 index 69c6fc17..00000000 --- a/modules/nf-core/tcoffee/align/tests/sequence.config +++ /dev/null @@ -1,3 +0,0 @@ -process { - ext.args = { "-output fasta_aln" } -} diff --git a/modules/nf-core/tcoffee/align/tests/structure.config b/modules/nf-core/tcoffee/align/tests/structure.config deleted file mode 100644 index 1cbd9c9c..00000000 --- a/modules/nf-core/tcoffee/align/tests/structure.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: "TCOFFEE_ALIGN" { - ext.args = { "-method TMalign_pair -output fasta_aln" } - } -} diff --git a/modules/nf-core/tcoffee/align/tests/tags.yml b/modules/nf-core/tcoffee/align/tests/tags.yml deleted file mode 100644 index b367ce02..00000000 --- a/modules/nf-core/tcoffee/align/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -tcoffee/align: - - "modules/nf-core/tcoffee/align/**" diff --git a/modules/nf-core/tcoffee/align/tests/tree.config b/modules/nf-core/tcoffee/align/tests/tree.config deleted file mode 100644 index d426ed45..00000000 --- a/modules/nf-core/tcoffee/align/tests/tree.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: "TCOFFEE_ALIGN"{ - ext.args = { "-output fasta_aln" } - } -} diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf deleted file mode 100644 index 118d5df6..00000000 --- a/subworkflows/local/align.nf +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Compute trees if needed and run alignment - */ - -// -// Include the subworkflows -// -include { COMPUTE_TREES } from '../../subworkflows/local/compute_trees.nf' - -// Include the nf-core modules -include { CLUSTALO_ALIGN } from '../../modules/nf-core/clustalo/align/main' -include { FAMSA_ALIGN } from '../../modules/nf-core/famsa/align/main' -include { KALIGN_ALIGN } from '../../modules/nf-core/kalign/align/main' -include { LEARNMSA_ALIGN } from '../../modules/nf-core/learnmsa/align/main' -include { MAFFT } from '../../modules/nf-core/mafft/main' -include { MAGUS_ALIGN } from '../../modules/nf-core/magus/align/main' -include { MUSCLE5_SUPER5 } from '../../modules/nf-core/muscle5/super5/main' -include { TCOFFEE_ALIGN } from '../../modules/nf-core/tcoffee/align/main' -include { TCOFFEE_ALIGN as TCOFFEE3D_ALIGN } from '../../modules/nf-core/tcoffee/align/main' -include { TCOFFEE_ALIGN as REGRESSIVE_ALIGN } from '../../modules/nf-core/tcoffee/align/main' -include { MTMALIGN_ALIGN } from '../../modules/nf-core/mtmalign/align/main' - -workflow ALIGN { - take: - ch_fastas // channel: [ val(meta), [ path(fastas) ] ] - ch_tools // channel: [ val(meta_tree), val(meta_aligner) ] - // [[tree:, args_tree:, args_tree_clean: ], [aligner:, args_aligner:, args_aligner_clean:]] - // e.g.[[tree:FAMSA, args_tree:-gt upgma -parttree, args_tree_clean:-gt_upgma_-parttree], [aligner:FAMSA, args_aligner:null, args_aligner_clean:null]] - // e.g.[[tree:null, args_tree:null, args_tree_clean:null], [aligner:TCOFFEE, args_aligner:-output fasta_aln, args_aligner_clean:-output_fasta_aln]] - ch_structures // channel: meta, [/path/to/file.pdb,/path/to/file.pdb,/path/to/file.pdb] - compress // boolean: true or false - - main: - - ch_msa = Channel.empty() - ch_versions = Channel.empty() - - // Branch the toolsheet information into two channels - // This way, it can direct the computation of guidetrees - // and aligners separately - ch_tools - .multiMap { - it -> - tree: it[0] - align: it[1] - } - .set { ch_tools_split } - - // ------------------------------------------------ - // Compute the required trees - // ------------------------------------------------ - COMPUTE_TREES ( - ch_fastas, - ch_tools_split.tree.unique() - ) - trees = COMPUTE_TREES.out.trees - ch_versions = ch_versions.mix(COMPUTE_TREES.out.versions) - - ch_fastas.combine(ch_tools) - .map { - metafasta, fasta, metatree, metaalign -> - [ metafasta+metatree , metaalign, fasta ] - } - .set { ch_fasta_tools } - - // ------------------------------------------------ - // Add back trees to the fasta channel - // ------------------------------------------------ - ch_fasta_tools - .join(trees, by: [0], remainder:true ) - .map { - metafasta_tree, metaalign, fasta, tree -> - [ metafasta_tree + metaalign, fasta, tree ] - } - .map { - meta, fasta, tree -> - tree ? [ meta,fasta, tree ] : [meta, fasta, [ ] ] - } - .branch { - clustalo: it[0]["aligner"] == "CLUSTALO" - famsa: it[0]["aligner"] == "FAMSA" - kalign: it[0]["aligner"] == "KALIGN" - learnmsa: it[0]["aligner"] == "LEARNMSA" - mafft: it[0]["aligner"] == "MAFFT" - magus: it[0]["aligner"] == "MAGUS" - muscle5: it[0]["aligner"] == "MUSCLE5" - mtmalign: it[0]["aligner"] == "MTMALIGN" - regressive: it[0]["aligner"] == "REGRESSIVE" - tcoffee: it[0]["aligner"] == "TCOFFEE" - tcoffee3d: it[0]["aligner"] == "3DCOFFEE" - } - .set { ch_fasta_trees } - - ch_structures.combine(ch_tools) - .map { - metastruct, template, struct, metatree, metaalign -> - [ metastruct+metatree+metaalign, template, struct ] - } - .branch { - mtmalign: it[0]["aligner"] == "MTMALIGN" - } - .set { ch_structures_tools } - - // ------------------------------------------------ - // Compute the alignments - // ------------------------------------------------ - - // 1. SEQUENCE BASED - - // ----------------- CLUSTALO ------------------ - ch_fasta_trees.clustalo - .multiMap { - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - } - .set { ch_fasta_trees_clustalo } - - CLUSTALO_ALIGN ( - ch_fasta_trees_clustalo.fasta, - ch_fasta_trees_clustalo.tree, - compress - ) - ch_msa = ch_msa.mix(CLUSTALO_ALIGN.out.alignment) - ch_versions = ch_versions.mix(CLUSTALO_ALIGN.out.versions.first()) - - // ----------------- FAMSA --------------------- - ch_fasta_trees.famsa - .multiMap { - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - } - .set { ch_fasta_trees_famsa} - - FAMSA_ALIGN (ch_fasta_trees_famsa.fasta, - ch_fasta_trees_famsa.tree, - compress - ) - ch_msa = ch_msa.mix(FAMSA_ALIGN.out.alignment) - ch_versions = ch_versions.mix(FAMSA_ALIGN.out.versions.first()) - - // ---------------- KALIGN ----------------------- - ch_fasta_trees.kalign - .multiMap { - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - } - .set { ch_fasta_kalign } - - KALIGN_ALIGN ( - ch_fasta_kalign.fasta, - compress - ) - ch_msa = ch_msa.mix(KALIGN_ALIGN.out.alignment) - ch_versions = ch_versions.mix(KALIGN_ALIGN.out.versions.first()) - - // ---------------- LEARNMSA ---------------------- - ch_fasta_trees.learnmsa - .multiMap { - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - } - .set { ch_fasta_learnmsa } - - LEARNMSA_ALIGN ( - ch_fasta_learnmsa.fasta, - compress - ) - ch_msa = ch_msa.mix(LEARNMSA_ALIGN.out.alignment) - ch_versions = ch_versions.mix(LEARNMSA_ALIGN.out.versions.first()) - - // ---------------- MAFFT ----------------------- - ch_fasta_trees.mafft - .multiMap{ - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - } - .set { ch_fasta_mafft } - - MAFFT ( - ch_fasta_mafft.fasta, - [ [:], [] ], - [ [:], [] ], - [ [:], [] ], - [ [:], [] ], - [ [:], [] ], - compress - ) - ch_msa = ch_msa.mix(MAFFT.out.fas) // the MAFFT module calls its output fas instead of alignment - ch_versions = ch_versions.mix(MAFFT.out.versions.first()) - - // ----------------- MAGUS ------------------ - ch_fasta_trees.magus - .multiMap{ - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - } - .set { ch_fasta_trees_magus } - - MAGUS_ALIGN ( - ch_fasta_trees_magus.fasta, - ch_fasta_trees_magus.tree, - compress - ) - ch_msa = ch_msa.mix(MAGUS_ALIGN.out.alignment) - ch_versions = ch_versions.mix(MAGUS_ALIGN.out.versions.first()) - - // ----------------- MUSCLE5 ------------------ - ch_fasta_trees.muscle5 - .multiMap{ - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - } - .set { ch_fasta_muscle5 } - - MUSCLE5_SUPER5 ( - ch_fasta_muscle5.fasta, - compress - ) - ch_msa = ch_msa.mix(MUSCLE5_SUPER5.out.alignment.first()) - ch_versions = ch_versions.mix(MUSCLE5_SUPER5.out.versions.first()) - - // ----------------- TCOFFEE ------------------ - ch_fasta_trees.tcoffee - .multiMap{ - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - } - .set { ch_fasta_trees_tcoffee } - - TCOFFEE_ALIGN ( - ch_fasta_trees_tcoffee.fasta, - ch_fasta_trees_tcoffee.tree, - [ [:], [], [] ], - compress - ) - ch_msa = ch_msa.mix(TCOFFEE_ALIGN.out.alignment) - ch_versions = ch_versions.mix(TCOFFEE_ALIGN.out.versions.first()) - - // ----------------- REGRESSIVE ------------------ - ch_fasta_trees.regressive - .multiMap{ - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - } - .set { ch_fasta_trees_regressive } - - REGRESSIVE_ALIGN ( - ch_fasta_trees_regressive.fasta, - ch_fasta_trees_regressive.tree, - [ [:], [], [] ], - compress - ) - ch_msa = ch_msa.mix(REGRESSIVE_ALIGN.out.alignment) - ch_versions = ch_versions.mix(REGRESSIVE_ALIGN.out.versions.first()) - - // 2. SEQUENCE + STRUCTURE BASED - - // ----------------- 3DCOFFEE ------------------ - ch_fasta_trees.tcoffee3d - .map{ meta, fasta, tree -> [ meta["id"], meta, fasta, tree ] } - .combine(ch_structures.map{ meta, template, structures -> [ meta["id"], template, structures ] }, by: 0) - .multiMap{ - merging_id, meta, fastafile, treefile, templatefile, structuresfiles -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - structures: [ meta, templatefile, structuresfiles ] - } - .set { ch_fasta_trees_3dcoffee } - - TCOFFEE3D_ALIGN ( - ch_fasta_trees_3dcoffee.fasta, - ch_fasta_trees_3dcoffee.tree, - ch_fasta_trees_3dcoffee.structures, - compress - ) - ch_msa = ch_msa.mix(TCOFFEE3D_ALIGN.out.alignment) - ch_versions = ch_versions.mix(TCOFFEE3D_ALIGN.out.versions.first()) - - // 3. STRUCTURE BASED - - // ----------------- MTMALIGN ------------------ - ch_structures_tools.mtmalign - .multiMap { - meta, template, struct -> - pdbs: [ meta, struct ] - } - .set { ch_pdb_mtmalign } - - MTMALIGN_ALIGN ( - ch_pdb_mtmalign.pdbs, - compress - ) - ch_msa = ch_msa.mix(MTMALIGN_ALIGN.out.alignment) - ch_versions = ch_versions.mix(MTMALIGN_ALIGN.out.versions.first()) - - emit: - msa = ch_msa // channel: [ val(meta), path(msa) ] - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/local/compute_trees.nf b/subworkflows/local/compute_trees.nf deleted file mode 100644 index 90ae1971..00000000 --- a/subworkflows/local/compute_trees.nf +++ /dev/null @@ -1,43 +0,0 @@ -// -// Compute guide trees either with FAMSA or Clusta Omega -// - -include { FAMSA_GUIDETREE } from '../../modules/nf-core/famsa/guidetree/main' -include { CLUSTALO_GUIDETREE } from '../../modules/nf-core/clustalo/guidetree/main' - -workflow COMPUTE_TREES { - - take: - ch_fastas //channel: [ meta, /path/to/file.fasta ] - tree_tools //channel: [ meta ] ( tools to be run: meta.tree, meta.args_tree ) - - main: - ch_versions = Channel.empty() - - // - // Render the required guide trees - // - ch_fastas - .combine(tree_tools) - .map { - metafasta, fasta, metatree -> - [ metafasta + metatree, fasta ] - } - .branch { - famsa: it[0]["tree"] == "FAMSA" - clustalo: it[0]["tree"] == "CLUSTALO" - } - .set { ch_fastas_fortrees } - - FAMSA_GUIDETREE (ch_fastas_fortrees.famsa) - ch_trees = FAMSA_GUIDETREE.out.tree - ch_versions = ch_versions.mix(FAMSA_GUIDETREE.out.versions.first()) - - CLUSTALO_GUIDETREE (ch_fastas_fortrees.clustalo) - ch_trees = ch_trees.mix(CLUSTALO_GUIDETREE.out.tree) - ch_versions = ch_versions.mix(CLUSTALO_GUIDETREE.out.versions.first()) - - emit: - trees = ch_trees // channel: [ val(meta), path(tree) ] - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index 7ac039dd..5df45ef9 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -27,7 +27,6 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mult // SUBWORKFLOW: Local subworkflows // include { STATS } from '../subworkflows/local/stats' -include { ALIGN } from '../subworkflows/local/align' include { EVALUATE } from '../subworkflows/local/evaluate' include { CREATE_TCOFFEETEMPLATE } from '../modules/local/create_tcoffee_template' From 4f6cbbcae2d49a06cc66a0e8ef4ec9bf299b9095 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Thu, 7 Nov 2024 10:59:59 +0100 Subject: [PATCH 04/23] update all subworkflows --- modules.json | 134 ++------ subworkflows/mirpedrol/msa_alignment/main.nf | 79 +++-- subworkflows/mirpedrol/msa_alignment/meta.yml | 80 +++-- .../msa_alignment/tests/main.nf.test | 241 ++++++------- .../msa_alignment/tests/main.nf.test.snap | 321 ++++++++---------- subworkflows/mirpedrol/msa_guidetree/main.nf | 39 ++- subworkflows/mirpedrol/msa_guidetree/meta.yml | 67 ++-- .../msa_guidetree/tests/main.nf.test | 88 ++--- subworkflows/mirpedrol/msa_treealign/main.nf | 44 ++- .../msa_treealign/tests/main.nf.test | 21 +- .../nf-core/utils_nextflow_pipeline/main.nf | 70 ++-- .../tests/main.workflow.nf.test | 10 +- .../tests/nextflow.config | 2 +- .../nf-core/utils_nfcore_pipeline/main.nf | 306 +++++++++-------- .../tests/nextflow.config | 2 +- .../nf-core/utils_nfvalidation_plugin/main.nf | 17 +- .../tests/main.nf.test | 2 +- 17 files changed, 786 insertions(+), 737 deletions(-) diff --git a/modules.json b/modules.json index fc5bfbc0..757e7e38 100644 --- a/modules.json +++ b/modules.json @@ -8,107 +8,77 @@ "clustalo/align": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": [ - "msa_alignment" - ] + "installed_by": ["msa_alignment"] }, "clustalo/guidetree": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": [ - "msa_guidetree" - ] + "installed_by": ["msa_guidetree"] }, "clustalo/treealign": { "branch": "main", "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", - "installed_by": [ - "msa_treealign" - ] + "installed_by": ["msa_treealign"] }, "famsa/align": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": [ - "msa_alignment" - ] + "installed_by": ["msa_alignment"] }, "famsa/guidetree": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": [ - "msa_guidetree" - ] + "installed_by": ["msa_guidetree"] }, "famsa/treealign": { "branch": "main", "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", - "installed_by": [ - "msa_treealign" - ] + "installed_by": ["msa_treealign"] }, "kalign/align": { "branch": "main", "git_sha": "caf37f3ee943a8101000b25ba502f038f8bfeb87", - "installed_by": [ - "msa_alignment" - ] + "installed_by": ["msa_alignment"] }, "learnmsa/align": { "branch": "main", "git_sha": "caf37f3ee943a8101000b25ba502f038f8bfeb87", - "installed_by": [ - "msa_alignment" - ] + "installed_by": ["msa_alignment"] }, "mafft": { "branch": "main", "git_sha": "caf37f3ee943a8101000b25ba502f038f8bfeb87", - "installed_by": [ - "msa_alignment" - ] + "installed_by": ["msa_alignment"] }, "magus/align": { "branch": "main", "git_sha": "caf37f3ee943a8101000b25ba502f038f8bfeb87", - "installed_by": [ - "msa_alignment" - ] + "installed_by": ["msa_alignment"] }, "magus/guidetree": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": [ - "msa_guidetree" - ] + "installed_by": ["msa_guidetree"] }, "magus/treealign": { "branch": "main", "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", - "installed_by": [ - "msa_treealign" - ] + "installed_by": ["msa_treealign"] }, "muscle5/super5": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": [ - "msa_alignment" - ] + "installed_by": ["msa_alignment"] }, "tcoffee/align": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": [ - "msa_alignment" - ] + "installed_by": ["msa_alignment"] }, "tcoffee/treealign": { "branch": "main", "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", - "installed_by": [ - "msa_treealign" - ] + "installed_by": ["msa_treealign"] } } }, @@ -116,24 +86,18 @@ "mirpedrol": { "msa_alignment": { "branch": "main", - "git_sha": "da5b7cd83d0a060b0b5343671ab552ee5b7c8aa9", - "installed_by": [ - "subworkflows" - ] + "git_sha": "927094f07130b8fa3ac0b8d1f46fd7d252809418", + "installed_by": ["subworkflows"] }, "msa_guidetree": { "branch": "main", - "git_sha": "da5b7cd83d0a060b0b5343671ab552ee5b7c8aa9", - "installed_by": [ - "subworkflows" - ] + "git_sha": "4748294a96583ecb2c3952e7f81aca426386cc0b", + "installed_by": ["subworkflows"] }, "msa_treealign": { "branch": "main", - "git_sha": "0da6e13e2cade9d530dcf731a3f281998f72b5d1", - "installed_by": [ - "subworkflows" - ] + "git_sha": "0165b5b51bb1fe396a90c2db93c1f4e70b170816", + "installed_by": ["subworkflows"] } } } @@ -144,66 +108,48 @@ "csvtk/concat": { "branch": "master", "git_sha": "cfe2a24902bfdfe8132f11461ffda92d257f9f09", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "csvtk/join": { "branch": "master", "git_sha": "614abbf126f287a3068dc86997b2e1b6a93abe20", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/csvtk/join/csvtk-join.diff" }, "pigz/compress": { "branch": "master", "git_sha": "0eab94fc1e48703c1b0a8704bd665f554905c39d", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pigz/uncompress": { "branch": "master", "git_sha": "d7f0de8aae7bf84b080dfdcf4e294bf11a46a51c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/alncompare": { "branch": "master", "git_sha": "faf557ba56156ac0e5de76a25c1e3df11c944f59", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/irmsd": { "branch": "master", "git_sha": "faf557ba56156ac0e5de76a25c1e3df11c944f59", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/seqreformat": { "branch": "master", "git_sha": "32ae618a60a25a870b5fa47ea2060ddcd911ab53", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/tcs": { "branch": "master", "git_sha": "1cacaceabae75b0c3bc393dee52cb6a5020fcb5c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "untar": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -211,27 +157,21 @@ "nf-core": { "utils_nextflow_pipeline": { "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "git_sha": "56372688d8979092cafbe0c5c3895b491166ca1c", + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": [ - "subworkflows" - ] + "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", + "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "git_sha": "f533459a222ac53eb4c6bb7a5f574e4069197cdb", + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} diff --git a/subworkflows/mirpedrol/msa_alignment/main.nf b/subworkflows/mirpedrol/msa_alignment/main.nf index 56b9fcce..613c95f7 100644 --- a/subworkflows/mirpedrol/msa_alignment/main.nf +++ b/subworkflows/mirpedrol/msa_alignment/main.nf @@ -1,35 +1,66 @@ -if ( params.aligner == "clustalo/align" ) { - include { CLUSTALO_ALIGN as ALIGNER } from '../../../modules/mirpedrol/clustalo/align/main' -} else if ( params.aligner == "famsa/align" ) { - include { FAMSA_ALIGN as ALIGNER } from '../../../modules/mirpedrol/famsa/align/main' -} else if ( params.aligner == "kalign/align" ) { - include { KALIGN_ALIGN as ALIGNER } from '../../../modules/mirpedrol/kalign/align/main' -} else if ( params.aligner == "learnmsa/align" ) { - include { LEARNMSA_ALIGN as ALIGNER } from '../../../modules/mirpedrol/learnmsa/align/main' -} else if ( params.aligner == "mafft" ) { - include { MAFFT as ALIGNER } from '../../../modules/mirpedrol/mafft/main' -} else if ( params.aligner == "magus/align" ) { - include { MAGUS_ALIGN as ALIGNER } from '../../../modules/mirpedrol/magus/align/main' -} else if ( params.aligner == "muscle5/super5" ) { - include { MUSCLE5_SUPER5 as ALIGNER } from '../../../modules/mirpedrol/muscle5/super5/main' -} else if ( params.aligner == "tcoffee/align" ) { - include { TCOFFEE_ALIGN as ALIGNER } from '../../../modules/mirpedrol/tcoffee/align/main' -} +include { MAFFT } from '../../../modules/mirpedrol/mafft/main' +include { KALIGN_ALIGN } from '../../../modules/mirpedrol/kalign/align/main' +include { FAMSA_ALIGN } from '../../../modules/mirpedrol/famsa/align/main' +include { MUSCLE5_SUPER5 } from '../../../modules/mirpedrol/muscle5/super5/main' +include { MAGUS_ALIGN } from '../../../modules/mirpedrol/magus/align/main' +include { CLUSTALO_ALIGN } from '../../../modules/mirpedrol/clustalo/align/main' +include { TCOFFEE_ALIGN } from '../../../modules/mirpedrol/tcoffee/align/main' +include { LEARNMSA_ALIGN } from '../../../modules/mirpedrol/learnmsa/align/main' + workflow MSA_ALIGNMENT { take: - ch_fasta // channel: [ meta, fasta ] + ch_fasta main: + def ch_out_alignment = Channel.empty() + def ch_out_versions = Channel.empty() + if ( params.alignment == "mafft" ) { + MAFFT( ch_fasta ) + ch_out_alignment = ch_out_alignment.mix(MAFFT.out.alignment) + ch_out_versions = ch_out_versions.mix(MAFFT.out.versions) + } + else if ( params.alignment == "kalign/align" ) { + KALIGN_ALIGN( ch_fasta ) + ch_out_alignment = ch_out_alignment.mix(KALIGN_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(KALIGN_ALIGN.out.versions) + } + else if ( params.alignment == "famsa/align" ) { + FAMSA_ALIGN( ch_fasta ) + ch_out_alignment = ch_out_alignment.mix(FAMSA_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(FAMSA_ALIGN.out.versions) + } + else if ( params.alignment == "muscle5/super5" ) { + MUSCLE5_SUPER5( ch_fasta ) + ch_out_alignment = ch_out_alignment.mix(MUSCLE5_SUPER5.out.alignment) + ch_out_versions = ch_out_versions.mix(MUSCLE5_SUPER5.out.versions) + } + else if ( params.alignment == "magus/align" ) { + MAGUS_ALIGN( ch_fasta ) + ch_out_alignment = ch_out_alignment.mix(MAGUS_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(MAGUS_ALIGN.out.versions) + } + else if ( params.alignment == "clustalo/align" ) { + CLUSTALO_ALIGN( ch_fasta ) + ch_out_alignment = ch_out_alignment.mix(CLUSTALO_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(CLUSTALO_ALIGN.out.versions) + } + else if ( params.alignment == "tcoffee/align" ) { + TCOFFEE_ALIGN( ch_fasta ) + ch_out_alignment = ch_out_alignment.mix(TCOFFEE_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(TCOFFEE_ALIGN.out.versions) + } + else if ( params.alignment == "learnmsa/align" ) { + LEARNMSA_ALIGN( ch_fasta ) + ch_out_alignment = ch_out_alignment.mix(LEARNMSA_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(LEARNMSA_ALIGN.out.versions) + } - ch_versions = Channel.empty() - - ALIGNER ( ch_fasta ) - ch_versions = ch_versions.mix(ALIGNER.out.versions.first()) emit: - alignment = ALIGNER.out.alignment // channel: [ meta, *.aln.gz ] - versions = ch_versions // channel: [ versions.yml ] + alignment = ch_out_alignment + versions = ch_out_versions + } diff --git a/subworkflows/mirpedrol/msa_alignment/meta.yml b/subworkflows/mirpedrol/msa_alignment/meta.yml index 16f6b0b1..097106aa 100644 --- a/subworkflows/mirpedrol/msa_alignment/meta.yml +++ b/subworkflows/mirpedrol/msa_alignment/meta.yml @@ -1,38 +1,62 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/subworkflows/yaml-schema.json name: "msa_alignment" -description: Perform a multiple sequence alignment with one of the possible aligners -keywords: - - alignment - - MSA - - genomics +description: Perform multiple sequence alignment +keywords: ["alignment", "msa", "align"] components: - - clustalo/align - - famsa/align - - kalign/align - - learnmsa/align - - mafft - - magus/align - - muscle5/super5 - - tcoffee/align + [ + "mafft", + "kalign/align", + "famsa/align", + "muscle5/super5", + "magus/align", + "clustalo/align", + "tcoffee/align", + "learnmsa/align", + ] + input: - ch_fasta: - type: file - description: | - Structure: [ val(meta), path(fasta) ] - meta: Groovy Map containing sample information - fasta: Input sequences in FASTA format (*.{fa,fasta}) + description: "Channel containing: meta, fasta" + structure: + - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'sample1', single_end:false ]` + + " + type: map + - fasta: + description: Input sequences in FASTA format + ontologies: + - edam: http://edamontology.org/format_1929 + pattern: "*.{fa,fasta}" + type: file + output: - alignment: - type: file - description: | - Structure: [ val(meta), path(alignment) ] - meta: Groovy Map containing sample information - alignment: Alignment file, in gzipped fasta format (*.aln.gz) + description: Output channel alignment + structure: + - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'test']` + + " + type: map + - "*.aln.gz": + description: Alignment file, in FASTA format. + ontologies: + - edam: http://edamontology.org/format_1984 + pattern: "*.aln.gz" + type: file - versions: - type: file - description: | - File containing software versions - Structure: [ path(versions.yml) ] + description: Output channel versions + structure: + - versions.yml: + description: File containing software versions + pattern: versions.yml + type: file + authors: - "@mirpedrol" maintainers: diff --git a/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test b/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test index 827c4de2..edda6e08 100644 --- a/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test +++ b/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test @@ -7,187 +7,198 @@ nextflow_workflow { tag "subworkflows" tag "subworkflows_mirpedrol" tag "subworkflows/msa_alignment" - tag "clustalo/align" - tag "famsa/align" - tag "kalign/align" - tag "learnmsa/align" - tag "mafft" - tag "magus/align" - tag "muscle5/super5" - tag "tcoffee/align" - - test("sarscov2 - clustalo/align") { - - when { - params.aligner = "clustalo/align" - workflow { - """ - input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) - ] - """ - } - } + tag "mafft" + tag "kalign/align" + tag "famsa/align" + tag "muscle5/super5" + tag "magus/align" + tag "clustalo/align" + tag "tcoffee/align" + tag "learnmsa/align" - then { - assertAll( - { assert workflow.success}, - { assert snapshot(workflow.out).match()} - ) - } - } - test("sarscov2 - famsa/align") { + test("run mafft") { - when { - params.aligner = "famsa/align" - workflow { + when { + params.alignment = "mafft" + workflow { """ - input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) - ] + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['scaffolds_fasta'], checkIfExists: true) + ] """ - } - } + } + } then { assertAll( - { assert workflow.success}, - { assert snapshot(workflow.out).match()} + { assert workflow.success }, + { assert snapshot(workflow.out.alignment).match("mafft_alignment")}, + { assert snapshot(workflow.out.versions).match("mafft_versions")} ) - } - } + } + } - test("sarscov2 - kalign/align") { + test("run kalign/align") { - when { - params.aligner = "kalign/align" - workflow { + when { + params.alignment = "kalign/align" + workflow { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) - ] + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] """ - } - } + } + } then { assertAll( - { assert workflow.success}, - { assert snapshot(workflow.out).match()} + { assert workflow.success }, + { assert snapshot(workflow.out.alignment).match("kalign_align_alignment")}, + { assert snapshot(workflow.out.versions).match("kalign_align_versions")} ) - } - } + } + } - test("sarscov2 - learnmsa/align") { - tag "mytest" + test("run famsa/align") { - when { - params.aligner = "learnmsa/align" - workflow { + when { + params.alignment = "famsa/align" + workflow { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ] """ - } - } + } + } then { assertAll( { assert workflow.success }, - { assert path(workflow.out.alignment.get(0).get(1)).getTextGzip().contains(">sample1")}, - { assert snapshot(workflow.out.versions).match("versions") } + { assert snapshot(workflow.out.alignment).match("famsa_align_alignment")}, + { assert snapshot(workflow.out.versions).match("famsa_align_versions") } ) - } - } + } + } + + test("run muscle5/super5") { - test("sarscov2 - mafft") { + when { + params.alignment = "muscle5/super5" + workflow { + """ + input[0] = [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.alignment).match("muscle5_super5_alignment") }, + { assert snapshot(workflow.out.versions).match("muscle5_super5_versions") } + ) + } + } - when { - params.aligner = "mafft" - workflow { + test("run magus/align") { + + when { + params.alignment = "magus/align" + workflow { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) ] """ - } - } + } + } then { assertAll( - { assert workflow.success}, - { assert snapshot(workflow.out).match()} + { assert workflow.success }, + // tests seem to be reproducible on a single machine, but not across different machines + // test the correct samples are in there + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1apf") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1ahl") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1atx") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1sh1") }, + { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1bds") }, + { assert snapshot(workflow.out.versions).match("magus_align_versions") } ) - } - } + } + } - test("sarscov2 - magus/align") { + test("run clustalo/align") { - when { - params.aligner = "magus/align" - workflow { + when { + params.alignment = "clustalo/align" + workflow { """ input[0] = [ [ id:'test' ], // meta map - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ] """ - } - } + } + } then { assertAll( - { assert workflow.success}, - { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1apf") }, - { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1ahl") }, - { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1atx") }, - { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1sh1") }, - { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1bds") }, - { assert snapshot(workflow.out.versions).match("versions1") } + { assert workflow.success }, + { assert snapshot(workflow.out.alignment).match("clustalo_align_alignment")}, + { assert snapshot(workflow.out.versions).match("clustalo_align_versions") } ) - } - } + } + } - test("sarscov2 - muscle5/super5") { + test("run tcoffee/align") { - when { - params.aligner = "muscle5/super5" - workflow { + when { + params.alignment = "tcoffee/align" + workflow { """ input[0] = [ [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) - ] + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] """ - } - } + } + } then { assertAll( - { assert workflow.success}, - { assert snapshot(workflow.out).match()} + { assert workflow.success }, + { assert snapshot(workflow.out.alignment).match("tcoffee_align_alignment")}, + { assert snapshot(workflow.out.versions).match("tcoffee_align_versions") } ) - } - } + } + } - test("sarscov2 - tcoffee/align") { + test("run learnmsa/align") { - when { - params.aligner = "tcoffee/align" - workflow { + when { + params.alignment = "learnmsa/align" + workflow { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) ] """ - } - } + } + } then { assertAll( - { assert workflow.success}, - { assert snapshot(workflow.out).match()} + { assert workflow.success }, + { assert path(workflow.out.alignment.get(0).get(1)).getTextGzip().contains(">sample1") }, + { assert snapshot(workflow.out.versions).match("learnmsa_align_versions") } ) - } - } + } + } + + } diff --git a/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test.snap b/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test.snap index a2c6b9af..06e9cf67 100644 --- a/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test.snap +++ b/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test.snap @@ -1,224 +1,201 @@ { - "sarscov2 - mafft": { + "kalign_align_alignment": { "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,23426611f4a0df532b6708f072bd445b" - ] - ], - "1": [ - "versions.yml:md5,3376c16fb93c6f92f9f2a1c6c5d7d058" - ], - "alignment": [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,23426611f4a0df532b6708f072bd445b" - ] - ], - "versions": [ - "versions.yml:md5,3376c16fb93c6f92f9f2a1c6c5d7d058" + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,c165ecf48fb89862cc2a991cc3cadb2d" ] - } + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:09:44.903862641" + }, + "learnmsa_align_versions": { + "content": [ + [ + "versions.yml:md5,475f61f05c9729887f723221b87de01d" + ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-12T12:28:57.275899399" + "timestamp": "2024-11-07T09:16:10.257788333" }, - "sarscov2 - famsa/align": { + "clustalo_align_alignment": { "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,7cf7375f2ba360814ea978731838b972" - ] - ], - "1": [ - "versions.yml:md5,aec272178993715fd8d3e1ce192fe7d3" - ], - "alignment": [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,7cf7375f2ba360814ea978731838b972" - ] - ], - "versions": [ - "versions.yml:md5,aec272178993715fd8d3e1ce192fe7d3" + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" ] - } + ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-12T12:26:09.158593884" + "timestamp": "2024-11-07T09:11:39.016384316" }, - "versions": { + "kalign_align_versions": { "content": [ [ - "versions.yml:md5,1a266e903df6779d66f9e85f51b04240" + "versions.yml:md5,e7d33c95bb5d69e8573c8ad4eb2aca2c" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-12T17:54:03.355114155" + "timestamp": "2024-11-07T09:09:44.918523969" }, - "versions1": { + "clustalo_align_versions": { "content": [ [ - "versions.yml:md5,45270687addf1e651298ad01be9858b9" + "versions.yml:md5,75356bf56559adcb33a9c93aba830309" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-15T15:57:35.581187151" + "timestamp": "2024-11-07T09:11:39.033642445" }, - "sarscov2 - kalign/align": { + "tcoffee_align_versions": { "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,c165ecf48fb89862cc2a991cc3cadb2d" - ] - ], - "1": [ - "versions.yml:md5,98ce16aea87f74ab4e08b2b96c98b3e8" - ], - "alignment": [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,c165ecf48fb89862cc2a991cc3cadb2d" - ] - ], - "versions": [ - "versions.yml:md5,98ce16aea87f74ab4e08b2b96c98b3e8" + [ + "versions.yml:md5,c5208e86b43e8c973c39c2bd8ca2932a" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:12:46.142334018" + }, + "mafft_versions": { + "content": [ + [ + "versions.yml:md5,ac417224510279b05e804f041d82304d" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:09:24.697891518" + }, + "tcoffee_align_alignment": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,bd1db08ad04514cc6d1334598c1a6ef0" ] - } + ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-12T12:26:27.100719984" + "timestamp": "2024-11-07T09:12:46.12691554" }, - "sarscov2 - muscle5/super5": { + "famsa_align_alignment": { "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a" - ] - ], - "1": [ - "versions.yml:md5,892b294cb7d1f3fc16beee1d4d023165" - ], - "alignment": [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a" - ] - ], - "versions": [ - "versions.yml:md5,892b294cb7d1f3fc16beee1d4d023165" + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,7cf7375f2ba360814ea978731838b972" ] - } + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:10:17.412909358" + }, + "famsa_align_versions": { + "content": [ + [ + "versions.yml:md5,c74ca8b91c442fc4ea29219ee1b724fd" + ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-12T14:41:45.676732121" + "timestamp": "2024-11-07T09:10:17.423586217" }, - "sarscov2 - tcoffee/align": { + "muscle5_super5_alignment": { "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,f9feb411ffabb1603473c8a60c06187d" - ] - ], - "1": [ - "versions.yml:md5,5b9b99bdeef801de9eb0157b8240f9a2" - ], - "alignment": [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,f9feb411ffabb1603473c8a60c06187d" - ] - ], - "versions": [ - "versions.yml:md5,5b9b99bdeef801de9eb0157b8240f9a2" + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a" ] - } + ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-12T14:51:25.769451346" + "timestamp": "2024-11-07T09:10:32.694344" }, - "sarscov2 - clustalo/align": { + "magus_align_versions": { "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" - ] - ], - "1": [ - "versions.yml:md5,b876f18fb1dfcb122933043312a64bae" - ], - "alignment": [ - [ - { - "id": "test" - }, - "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" - ] - ], - "versions": [ - "versions.yml:md5,b876f18fb1dfcb122933043312a64bae" + [ + "versions.yml:md5,9258e7c6deb7c3d816ba75cf111e09a8" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:11:08.349010115" + }, + "mafft_alignment": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.aln.gz:md5,23426611f4a0df532b6708f072bd445b" ] - } + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T09:09:24.67999956" + }, + "muscle5_super5_versions": { + "content": [ + [ + "versions.yml:md5,48ec1b7cf99109e8495f3bc00d67a1eb" + ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-12T12:25:28.903889766" + "timestamp": "2024-11-07T09:10:32.708162989" } } \ No newline at end of file diff --git a/subworkflows/mirpedrol/msa_guidetree/main.nf b/subworkflows/mirpedrol/msa_guidetree/main.nf index ead8f637..ceba00ca 100644 --- a/subworkflows/mirpedrol/msa_guidetree/main.nf +++ b/subworkflows/mirpedrol/msa_guidetree/main.nf @@ -1,25 +1,36 @@ -if ( params.guidetree == "clustalo/guidetree" ) { - include { CLUSTALO_GUIDETREE as GUIDETREE } from '../../../modules/mirpedrol/clustalo/guidetree/main' -} else if ( params.guidetree == "famsa/guidetree" ) { - include { FAMSA_GUIDETREE as GUIDETREE } from '../../../modules/mirpedrol/famsa/guidetree/main' -} else if ( params.guidetree == "magus/guidetree" ) { - include { MAGUS_GUIDETREE as GUIDETREE } from '../../../modules/mirpedrol/magus/guidetree/main' -} +include { FAMSA_GUIDETREE } from '../../../modules/mirpedrol/famsa/guidetree/main' +include { MAGUS_GUIDETREE } from '../../../modules/mirpedrol/magus/guidetree/main' +include { CLUSTALO_GUIDETREE } from '../../../modules/mirpedrol/clustalo/guidetree/main' + workflow MSA_GUIDETREE { take: - ch_fasta // channel: [ meta, fasta ] + ch_fasta main: + def ch_out_tree = Channel.empty() + def ch_out_versions = Channel.empty() + if ( params.guidetree == "famsa/guidetree" ) { + FAMSA_GUIDETREE( ch_fasta ) + ch_out_tree = ch_out_tree.mix(FAMSA_GUIDETREE.out.tree) + ch_out_versions = ch_out_versions.mix(FAMSA_GUIDETREE.out.versions) + } + else if ( params.guidetree == "magus/guidetree" ) { + MAGUS_GUIDETREE( ch_fasta ) + ch_out_tree = ch_out_tree.mix(MAGUS_GUIDETREE.out.tree) + ch_out_versions = ch_out_versions.mix(MAGUS_GUIDETREE.out.versions) + } + else if ( params.guidetree == "clustalo/guidetree" ) { + CLUSTALO_GUIDETREE( ch_fasta ) + ch_out_tree = ch_out_tree.mix(CLUSTALO_GUIDETREE.out.tree) + ch_out_versions = ch_out_versions.mix(CLUSTALO_GUIDETREE.out.versions) + } - ch_versions = Channel.empty() - - GUIDETREE ( ch_fasta ) - ch_versions = ch_versions.mix(GUIDETREE.out.versions) emit: - guidetree = GUIDETREE.out.tree // channel: [ meta, *.dnd ] - versions = ch_versions // channel: [ versions.yml ] + tree = ch_out_tree + versions = ch_out_versions + } diff --git a/subworkflows/mirpedrol/msa_guidetree/meta.yml b/subworkflows/mirpedrol/msa_guidetree/meta.yml index 4991038e..b5ec1411 100644 --- a/subworkflows/mirpedrol/msa_guidetree/meta.yml +++ b/subworkflows/mirpedrol/msa_guidetree/meta.yml @@ -1,33 +1,50 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/subworkflows/yaml-schema.json name: "msa_guidetree" -description: Compute the guide tree of a multiple sequence alignment with one of the possible tools -keywords: - - guide tree - - MSA - - genomics -components: - - clustalo/guidetree - - famsa/guidetree - - magus/guidetree +description: Compute a guide tree for multiple sequence alignment +keywords: ["align", "guide tree", "guidetree", "msa"] +components: ["famsa/guidetree", "magus/guidetree", "clustalo/guidetree"] + input: - ch_fasta: - type: file - description: | - Structure: [ val(meta), path(fasta) ] - meta: Groovy Map containing sample information - fasta: Input sequences in FASTA format (*.{fa,fasta}) + description: "Channel containing: meta, fasta" + structure: + - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'test']` + + " + type: map + - fasta: + description: Input sequences in FASTA format + ontologies: + - edam: http://edamontology.org/format_1929 + pattern: "*.{fa,fasta}" + type: file + output: - - guidetree: - type: file - description: | - Structure: [ val(meta), path(guidetree) ] - meta: Groovy Map containing sample information - guidetree: Tree file, in gzipped newick format (*.dnd) + - tree: + description: Output channel tree + structure: + - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'test']` + + " + type: map + - "*.dnd": + description: Guide tree file in Newick format + pattern: "*.{dnd}" + type: file - versions: - type: file - description: | - File containing software versions - Structure: [ path(versions.yml) ] + description: Output channel versions + structure: + - versions.yml: + description: File containing software versions + pattern: versions.yml + type: file + authors: - "@mirpedrol" maintainers: diff --git a/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test b/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test index 31a1f2f9..55dc5522 100644 --- a/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test +++ b/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test @@ -7,75 +7,83 @@ nextflow_workflow { tag "subworkflows" tag "subworkflows_mirpedrol" tag "subworkflows/msa_guidetree" - tag "clustalo/guidetree" - tag "famsa/guidetree" - tag "magus/guidetree" + tag "famsa/guidetree" + tag "magus/guidetree" + tag "clustalo/guidetree" - test("sarscov2 - clustalo/guidetree") { - when { - params.guidetree = "clustalo/guidetree" - workflow { + test("run famsa/guidetree") { + + when { + params.guidetree = "famsa/guidetree" + workflow { """ input[0] = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ] """ - } - } + } + } then { assertAll( - { assert workflow.success}, - { assert snapshot(workflow.out).match()} + { assert workflow.success }, + { assert snapshot(workflow.out.tree).match("famsa_guidetree_tree")}, + { assert snapshot(workflow.out.versions).match("famsa_guidetree_versions") } ) - } - } + } + } - test("sarscov2 - famsa/guidetree") { + test("run magus/guidetree") { - when { - params.guidetree = "famsa/guidetree" - workflow { + when { + params.guidetree = "magus/guidetree" + workflow { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) ] """ - } - } + } + } then { assertAll( - { assert workflow.success}, - { assert snapshot(workflow.out).match()} + { assert workflow.success }, + //{ assert snapshot(workflow.out.tree).match("magus_guidetree_tree")}, + // tests seem to be reproducible on a single machine, but not across different machines + // test the correct samples are in there + { assert path(workflow.out.tree[0][1]).getText().contains("1apf") }, + { assert path(workflow.out.tree[0][1]).getText().contains("1ahl") }, + { assert path(workflow.out.tree[0][1]).getText().contains("1atx") }, + { assert path(workflow.out.tree[0][1]).getText().contains("1sh1") }, + { assert path(workflow.out.tree[0][1]).getText().contains("1bds") }, + { assert snapshot(workflow.out.versions).match("magus_guidetree_versions") } ) - } - } + } + } - test("sarscov2 - magus/guidetree") { + test("run clustalo/guidetree") { - when { - params.guidetree = "magus/guidetree" - workflow { + when { + params.guidetree = "clustalo/guidetree" + workflow { """ input[0] = [ [ id:'test' ], // meta map - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ] """ - } - } + } + } then { assertAll( - { assert workflow.success}, - { assert path(workflow.out.guidetree[0][1]).getText().contains("1apf") }, - { assert path(workflow.out.guidetree[0][1]).getText().contains("1ahl") }, - { assert path(workflow.out.guidetree[0][1]).getText().contains("1atx") }, - { assert path(workflow.out.guidetree[0][1]).getText().contains("1sh1") }, - { assert path(workflow.out.guidetree[0][1]).getText().contains("1bds") }, - { assert snapshot(workflow.out.versions).match("versions1") } + { assert workflow.success }, + { assert snapshot(workflow.out.tree).match("clustalo_guidetree_tree")}, + { assert snapshot(workflow.out.versions).match("clustalo_guidetree_versions") } ) - } - } + } + } + + } diff --git a/subworkflows/mirpedrol/msa_treealign/main.nf b/subworkflows/mirpedrol/msa_treealign/main.nf index 45acabac..0369a4b5 100644 --- a/subworkflows/mirpedrol/msa_treealign/main.nf +++ b/subworkflows/mirpedrol/msa_treealign/main.nf @@ -1,12 +1,8 @@ -if ( params.treealign == "famsa/treealign" ) { - include { FAMSA_TREEALIGN as TREEALIGN } from '../../../modules/mirpedrol/famsa/treealign/main' -} else if ( params.treealign == "magus/treealign" ) { - include { MAGUS_TREEALIGN as TREEALIGN } from '../../../modules/mirpedrol/magus/treealign/main' -} else if ( params.treealign == "clustalo/treealign" ) { - include { CLUSTALO_TREEALIGN as TREEALIGN } from '../../../modules/mirpedrol/clustalo/treealign/main' -} else if ( params.treealign == "tcoffee/treealign" ) { - include { TCOFFEE_TREEALIGN as TREEALIGN } from '../../../modules/mirpedrol/tcoffee/treealign/main' -} +include { FAMSA_TREEALIGN } from '../../../modules/mirpedrol/famsa/treealign/main' +include { MAGUS_TREEALIGN } from '../../../modules/mirpedrol/magus/treealign/main' +include { CLUSTALO_TREEALIGN } from '../../../modules/mirpedrol/clustalo/treealign/main' +include { TCOFFEE_TREEALIGN } from '../../../modules/mirpedrol/tcoffee/treealign/main' + workflow MSA_TREEALIGN { @@ -15,15 +11,33 @@ workflow MSA_TREEALIGN { ch_tree main: + def ch_out_alignment = Channel.empty() + def ch_out_versions = Channel.empty() + if ( params.treealign == "famsa/treealign" ) { + FAMSA_TREEALIGN( ch_fasta, ch_tree ) + ch_out_alignment = ch_out_alignment.mix(FAMSA_TREEALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(FAMSA_TREEALIGN.out.versions) + } + else if ( params.treealign == "magus/treealign" ) { + MAGUS_TREEALIGN( ch_fasta, ch_tree ) + ch_out_alignment = ch_out_alignment.mix(MAGUS_TREEALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(MAGUS_TREEALIGN.out.versions) + } + else if ( params.treealign == "clustalo/treealign" ) { + CLUSTALO_TREEALIGN( ch_fasta, ch_tree ) + ch_out_alignment = ch_out_alignment.mix(CLUSTALO_TREEALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(CLUSTALO_TREEALIGN.out.versions) + } + else if ( params.treealign == "tcoffee/treealign" ) { + TCOFFEE_TREEALIGN( ch_fasta, ch_tree ) + ch_out_alignment = ch_out_alignment.mix(TCOFFEE_TREEALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(TCOFFEE_TREEALIGN.out.versions) + } - ch_versions = Channel.empty() - - TREEALIGN ( ch_fasta, ch_tree ) - ch_versions = ch_versions.mix(TREEALIGN.out.versions) emit: - alignment = TREEALIGN.out.alignment - versions = ch_versions + alignment = ch_out_alignment + versions = ch_out_versions } diff --git a/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test b/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test index f8356e93..21d80ade 100644 --- a/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test +++ b/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test @@ -11,9 +11,9 @@ nextflow_workflow { tag "magus/treealign" tag "clustalo/treealign" tag "tcoffee/treealign" - tag "clustalo/guidetree" - tag "famsa/guidetree" - tag "magus/guidetree" + tag "famsa/guidetree" + tag "magus/guidetree" + tag "clustalo/guidetree" test("run famsa/treealign") { @@ -46,8 +46,8 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, - { assert snapshot(workflow.out.alignment).match("famsa_alignment")}, - { assert snapshot(workflow.out.versions).match("famsa_versions") } + { assert snapshot(workflow.out.alignment).match("famsa_treealign_alignment")}, + { assert snapshot(workflow.out.versions).match("famsa_treealign_versions") } ) } } @@ -89,7 +89,7 @@ nextflow_workflow { { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1atx") }, { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1sh1") }, { assert path(workflow.out.alignment[0][1]).getTextGzip().contains(">1bds") }, - { assert snapshot(workflow.out.versions).match("magus_versions") } + { assert snapshot(workflow.out.versions).match("magus_treealign_versions") } ) } } @@ -125,13 +125,14 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, - { assert snapshot(workflow.out.alignment).match("clustalo_alignment")}, - { assert snapshot(workflow.out.versions).match("clustalo_versions") } + { assert snapshot(workflow.out.alignment).match("clustalo_treealign_alignment")}, + { assert snapshot(workflow.out.versions).match("clustalo_treealign_versions") } ) } } test("run tcoffee/treealign") { + setup { run("FAMSA_GUIDETREE") { @@ -162,8 +163,8 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, - { assert snapshot(workflow.out.alignment).match("tcoffee_alignment")}, - { assert snapshot(workflow.out.versions).match("tcoffee_versions") } + { assert snapshot(workflow.out.alignment).match("tcoffee_treealign_alignment")}, + { assert snapshot(workflow.out.versions).match("tcoffee_treealign_versions") } ) } } diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf index ac31f28f..0fcbf7b3 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -2,18 +2,13 @@ // Subworkflow with functionality that may be useful for any Nextflow pipeline // -import org.yaml.snakeyaml.Yaml -import groovy.json.JsonOutput -import nextflow.extension.FilesEx - /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SUBWORKFLOW DEFINITION -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ workflow UTILS_NEXTFLOW_PIPELINE { - take: print_version // boolean: print version dump_parameters // boolean: dump parameters @@ -26,7 +21,7 @@ workflow UTILS_NEXTFLOW_PIPELINE { // Print workflow version and exit on --version // if (print_version) { - log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + log.info("${workflow.manifest.name} ${getWorkflowVersion()}") System.exit(0) } @@ -49,16 +44,16 @@ workflow UTILS_NEXTFLOW_PIPELINE { } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FUNCTIONS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // // Generate version string // def getWorkflowVersion() { - String version_string = "" + def version_string = "" as String if (workflow.manifest.version) { def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' version_string += "${prefix_v}${workflow.manifest.version}" @@ -76,13 +71,13 @@ def getWorkflowVersion() { // Dump pipeline parameters to a JSON file // def dumpParametersToJSON(outdir) { - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def filename = "params_${timestamp}.json" - def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") - def jsonStr = JsonOutput.toJson(params) - temp_pf.text = JsonOutput.prettyPrint(jsonStr) + def timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = groovy.json.JsonOutput.toJson(params) + temp_pf.text = groovy.json.JsonOutput.prettyPrint(jsonStr) - FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + nextflow.extension.FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") temp_pf.delete() } @@ -90,37 +85,40 @@ def dumpParametersToJSON(outdir) { // When running with -profile conda, warn if channels have not been set-up appropriately // def checkCondaChannels() { - Yaml parser = new Yaml() + def parser = new org.yaml.snakeyaml.Yaml() def channels = [] try { def config = parser.load("conda config --show channels".execute().text) channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return + } + catch (NullPointerException e) { + log.warn("Could not verify conda channel configuration.") + return null + } + catch (IOException e) { + log.warn("Could not verify conda channel configuration.") + return null } // Check that all channels are present // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def required_channels_in_order = ['conda-forge', 'bioconda'] def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } + def channel_priority_violation = required_channels_in_order != channels.findAll { ch -> ch in required_channels_in_order } if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + log.warn """\ + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + There is a problem with your Conda configuration! + You will need to set-up the conda-forge and bioconda channels correctly. + Please refer to https://bioconda.github.io/ + The observed channel order is + ${channels} + but the following channel order is required: + ${required_channels_in_order} + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + """.stripIndent(true) } } diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test index ca964ce8..02dbf094 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -52,10 +52,12 @@ nextflow_workflow { } then { - assertAll( - { assert workflow.success }, - { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } - ) + expect { + with(workflow) { + assert success + assert "nextflow_workflow v9.9.9" in stdout + } + } } } diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config index 4f2dbd48..a09572e5 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -3,7 +3,7 @@ manifest { author = """nf-core""" homePage = 'https://127.0.0.1' description = """Dummy pipeline""" - nextflowVersion = '!>=24.04.1' + nextflowVersion = '!>=23.04.0' version = '9.9.9' doi = 'https://doi.org/10.5281/zenodo.5070524' } diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index 14558c39..5cb7bafe 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -2,17 +2,13 @@ // Subworkflow with utility functions specific to the nf-core pipeline template // -import org.yaml.snakeyaml.Yaml -import nextflow.extension.FilesEx - /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SUBWORKFLOW DEFINITION -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ workflow UTILS_NFCORE_PIPELINE { - take: nextflow_cli_args @@ -25,23 +21,20 @@ workflow UTILS_NFCORE_PIPELINE { } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FUNCTIONS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // // Warn if a -profile or Nextflow config has not been provided to run the pipeline // def checkConfigProvided() { - valid_config = true + def valid_config = true as Boolean if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " + log.warn( + "[${workflow.manifest.name}] You are attempting to run the pipeline without any custom configuration!\n\n" + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + "Please refer to the quick start section and usage docs for the pipeline.\n " + ) valid_config = false } return valid_config @@ -52,12 +45,14 @@ def checkConfigProvided() { // def checkProfileProvided(nextflow_cli_args) { if (workflow.profile.endsWith(',')) { - error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + - "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + error( + "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) } if (nextflow_cli_args[0]) { - log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + - "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + log.warn( + "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) } } @@ -66,25 +61,21 @@ def checkProfileProvided(nextflow_cli_args) { // def workflowCitation() { def temp_doi_ref = "" - String[] manifest_doi = workflow.manifest.doi.tokenize(",") - // Using a loop to handle multiple DOIs + def manifest_doi = workflow.manifest.doi.tokenize(",") + // Handling multiple DOIs // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers // Removing ` ` since the manifest.doi is a string and not a proper list - for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - "* The pipeline\n" + - temp_doi_ref + "\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" + manifest_doi.each { doi_ref -> + temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + } + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + temp_doi_ref + "\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } // // Generate workflow version string // def getWorkflowVersion() { - String version_string = "" + def version_string = "" as String if (workflow.manifest.version) { def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' version_string += "${prefix_v}${workflow.manifest.version}" @@ -102,8 +93,8 @@ def getWorkflowVersion() { // Get software versions for pipeline // def processVersionsFromYAML(yaml_file) { - Yaml yaml = new Yaml() - versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + def yaml = new org.yaml.snakeyaml.Yaml() + def versions = yaml.load(yaml_file).collectEntries { k, v -> [k.tokenize(':')[-1], v] } return yaml.dumpAsMap(versions).trim() } @@ -113,8 +104,8 @@ def processVersionsFromYAML(yaml_file) { def workflowVersionToYAML() { return """ Workflow: - $workflow.manifest.name: ${getWorkflowVersion()} - Nextflow: $workflow.nextflow.version + ${workflow.manifest.name}: ${getWorkflowVersion()} + Nextflow: ${workflow.nextflow.version} """.stripIndent().trim() } @@ -122,11 +113,7 @@ def workflowVersionToYAML() { // Get channel of software versions used in pipeline in YAML format // def softwareVersionsToYAML(ch_versions) { - return ch_versions - .unique() - .map { processVersionsFromYAML(it) } - .unique() - .mix(Channel.of(workflowVersionToYAML())) + return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(Channel.of(workflowVersionToYAML())) } // @@ -134,25 +121,31 @@ def softwareVersionsToYAML(ch_versions) { // def paramsSummaryMultiqc(summary_params) { def summary_section = '' - for (group in summary_params.keySet()) { - def group_params = summary_params.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" + summary_params + .keySet() + .each { group -> + def group_params = summary_params.get(group) + // This gets the parameters of that particular group + if (group_params) { + summary_section += "

${group}

\n" + summary_section += "
\n" + group_params + .keySet() + .sort() + .each { param -> + summary_section += "
${param}
${group_params.get(param) ?: 'N/A'}
\n" + } + summary_section += "
\n" } - summary_section += "
\n" } - } - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" + def yaml_file_text = "id: '${workflow.manifest.name.replace('/', '-')}-summary'\n" as String + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" return yaml_file_text } @@ -161,7 +154,7 @@ def paramsSummaryMultiqc(summary_params) { // nf-core logo // def nfCoreLogo(monochrome_logs=true) { - Map colors = logColours(monochrome_logs) + def colors = logColours(monochrome_logs) as Map String.format( """\n ${dashedLine(monochrome_logs)} @@ -180,7 +173,7 @@ def nfCoreLogo(monochrome_logs=true) { // Return dashed line // def dashedLine(monochrome_logs=true) { - Map colors = logColours(monochrome_logs) + def colors = logColours(monochrome_logs) as Map return "-${colors.dim}----------------------------------------------------${colors.reset}-" } @@ -188,7 +181,7 @@ def dashedLine(monochrome_logs=true) { // ANSII colours used for terminal logging // def logColours(monochrome_logs=true) { - Map colorcodes = [:] + def colorcodes = [:] as Map // Reset / Meta colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" @@ -200,54 +193,54 @@ def logColours(monochrome_logs=true) { colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" return colorcodes } @@ -262,14 +255,15 @@ def attachMultiqcReport(multiqc_report) { mqc_report = multiqc_report.getVal() if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + log.warn("[${workflow.manifest.name}] Found multiple reports from process 'MULTIQC', will use only one") } mqc_report = mqc_report[0] } } - } catch (all) { + } + catch (Exception all) { if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + log.warn("[${workflow.manifest.name}] Could not attach MultiQC report to summary email") } } return mqc_report @@ -281,26 +275,35 @@ def attachMultiqcReport(multiqc_report) { def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + def subject = "[${workflow.manifest.name}] Successful: ${workflow.runName}" if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + subject = "[${workflow.manifest.name}] FAILED: ${workflow.runName}" } def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } def misc_fields = [:] misc_fields['Date Started'] = workflow.start misc_fields['Date Completed'] = workflow.complete misc_fields['Pipeline script file path'] = workflow.scriptFile misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build + if (workflow.repository) { + misc_fields['Pipeline repository Git URL'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['Pipeline repository Git Commit'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['Pipeline Git branch/tag'] = workflow.revision + } + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp def email_fields = [:] @@ -338,39 +341,41 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi // Render the sendmail template def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def smail_fields = [email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) def sendmail_html = sendmail_template.toString() // Send the HTML e-mail - Map colors = logColours(monochrome_logs) + def colors = logColours(monochrome_logs) as Map if (email_address) { try { - if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + if (plaintext_email) { +new org.codehaus.groovy.GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") sendmail_tf.withWriter { w -> w << sendmail_html } - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { + ['sendmail', '-t'].execute() << sendmail_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (sendmail)-") + } + catch (Exception all) { // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + def mail_cmd = ['mail', '-s', subject, '--content-type=text/html', email_address] mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (mail)-") } } // Write summary e-mail HTML to a file def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } - FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + nextflow.extension.FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html") output_hf.delete() // Write summary e-mail TXT to a file def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } - FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + nextflow.extension.FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt") output_tf.delete() } @@ -378,15 +383,17 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi // Print pipeline summary on completion // def completionSummary(monochrome_logs=true) { - Map colors = logColours(monochrome_logs) + def colors = logColours(monochrome_logs) as Map if (workflow.success) { if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Pipeline completed successfully${colors.reset}-") + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-") } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.red} Pipeline completed with errors${colors.reset}-") } } @@ -395,21 +402,30 @@ def completionSummary(monochrome_logs=true) { // def imNotification(summary_params, hook_url) { def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) { + misc_fields['repository'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['commitid'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['revision'] = workflow.revision + } + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp def msg_fields = [:] msg_fields['version'] = getWorkflowVersion() @@ -434,13 +450,13 @@ def imNotification(summary_params, hook_url) { def json_message = json_template.toString() // POST - def post = new URL(hook_url).openConnection(); + def post = new URL(hook_url).openConnection() post.setRequestMethod("POST") post.setDoOutput(true) post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); + post.getOutputStream().write(json_message.getBytes("UTF-8")) + def postRC = post.getResponseCode() + if (!postRC.equals(200)) { + log.warn(post.getErrorStream().getText()) } } diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config index 4f2dbd48..d0a926bf 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -3,7 +3,7 @@ manifest { author = """nf-core""" homePage = 'https://127.0.0.1' description = """Dummy pipeline""" - nextflowVersion = '!>=24.04.1' + nextflowVersion = '!>=23.04.0' version = '9.9.9' doi = 'https://doi.org/10.5281/zenodo.5070524' } diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf index 2585b65d..2398c620 100644 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -3,9 +3,9 @@ // /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT NF-VALIDATION PLUGIN -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ include { paramsHelp } from 'plugin/nf-validation' @@ -13,13 +13,12 @@ include { paramsSummaryLog } from 'plugin/nf-validation' include { validateParameters } from 'plugin/nf-validation' /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SUBWORKFLOW DEFINITION -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ workflow UTILS_NFVALIDATION_PLUGIN { - take: print_help // boolean: print help workflow_command // string: default commmand used to run pipeline @@ -30,7 +29,7 @@ workflow UTILS_NFVALIDATION_PLUGIN { main: - log.debug "Using schema file: ${schema_filename}" + log.debug("Using schema file: ${schema_filename}") // Default values for strings pre_help_text = pre_help_text ?: '' @@ -41,19 +40,19 @@ workflow UTILS_NFVALIDATION_PLUGIN { // Print help message if needed // if (print_help) { - log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + log.info(pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text) System.exit(0) } // // Print parameter summary to stdout // - log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + log.info(pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text) // // Validate parameters relative to the parameter JSON schema // - if (validate_params){ + if (validate_params) { validateParameters(parameters_schema: schema_filename) } diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test index 5784a33f..c50b1fb5 100644 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -169,7 +169,7 @@ nextflow_workflow { params { monochrome_logs = true test_data = '' - outdir = 1 + outdir = false } workflow { """ From 57c4e82ad9ba980f896c9e406bdb82ba32323375 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Thu, 7 Nov 2024 10:48:30 +0100 Subject: [PATCH 05/23] move evaluation subworkflow to an independent workflow --- main.nf | 48 +++++++++++-- nextflow.config | 1 + workflows/evaluatemsa.nf | 111 +++++++++++++++++++++++++++++ workflows/multiplesequencealign.nf | 52 +------------- 4 files changed, 154 insertions(+), 58 deletions(-) create mode 100644 workflows/evaluatemsa.nf diff --git a/main.nf b/main.nf index 2c234566..06ca4af9 100644 --- a/main.nf +++ b/main.nf @@ -18,6 +18,7 @@ nextflow.enable.dsl = 2 */ include { MULTIPLESEQUENCEALIGN } from './workflows/multiplesequencealign' +include { EVALUATEMSA } from './workflows/evaluatemsa' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_multiplesequencealign_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_multiplesequencealign_pipeline' @@ -52,6 +53,29 @@ workflow NFCORE_MULTIPLESEQUENCEALIGN { multiqc_report = MULTIPLESEQUENCEALIGN.out.multiqc } + +workflow NFCORE_EVALUATEMSA { + + take: + msa_alignment // channel: [ meta, /path/to/file.aln ] + ch_refs // channel: [ meta, /path/to/file.aln ] + ch_structures_template // channel: [ meta, /path/to/file.pdb ] + stats_summary // channel: [ meta, /path/to/file.csv ] + + main: + ch_versions = Channel.empty() + + // + // WORKFLOW: Run evaluation pipelines + // + EVALUATEMSA ( + msa_alignment, + ch_refs, + ch_structures_template, + stats_summary, + ch_versions + ) +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -76,13 +100,23 @@ workflow { params.tools ) - // - // WORKFLOW: Run main workflow - // - NFCORE_MULTIPLESEQUENCEALIGN ( - PIPELINE_INITIALISATION.out.samplesheet, - PIPELINE_INITIALISATION.out.tools - ) + if (params.evaluate) { + // WORKFLOW: Run evaluation workflow + NFCORE_EVALUATEMSA ( + PIPELINE_INITIALISATION.out.msa_alignment, + PIPELINE_INITIALISATION.out.ch_refs, + PIPELINE_INITIALISATION.out.ch_structures_template, + PIPELINE_INITIALISATION.out.stats_summary + ) + } else { + // + // WORKFLOW: Run main workflow + // + NFCORE_MULTIPLESEQUENCEALIGN ( + PIPELINE_INITIALISATION.out.samplesheet, + PIPELINE_INITIALISATION.out.tools + ) + } // // SUBWORKFLOW: Run completion tasks diff --git a/nextflow.config b/nextflow.config index 4c94d84f..c54fcd84 100644 --- a/nextflow.config +++ b/nextflow.config @@ -29,6 +29,7 @@ params { extract_plddt = false // Evaluation + evaluate = false skip_eval = false calc_sp = true calc_tc = true diff --git a/workflows/evaluatemsa.nf b/workflows/evaluatemsa.nf new file mode 100644 index 00000000..a8615626 --- /dev/null +++ b/workflows/evaluatemsa.nf @@ -0,0 +1,111 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// MODULES +include { MULTIQC } from '../modules/local/multiqc' +include { PREPARE_MULTIQC } from '../modules/local/prepare_multiqc' +include { PREPARE_SHINY } from '../modules/local/prepare_shiny' + +//SUBWORKFLOWS +include { EVALUATE } from '../subworkflows/local/evaluate' + +// FUNCTIONS +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' + +workflow EVALUATEMSA { + + take: + msa_alignment + ch_refs + ch_structures_template + stats_summary + ch_versions + + main: + evaluation_summary = Channel.empty() + stats_and_evaluation_summary = Channel.empty() + ch_multiqc_table = Channel.empty() + + // + // Evaluate the quality of the alignment + // + if (!params.skip_eval) { + EVALUATE (msa_alignment, ch_refs, ch_structures_template) + ch_versions = ch_versions.mix(EVALUATE.out.versions) + evaluation_summary = evaluation_summary.mix(EVALUATE.out.eval_summary) + } + + // + // Combine stats and evaluation reports into a single CSV + // + if (!params.skip_stats || !params.skip_eval) { + stats_summary_csv = stats_summary.map{ meta, csv -> csv } + eval_summary_csv = evaluation_summary.map{ meta, csv -> csv } + stats_summary_csv.mix(eval_summary_csv) + .collect() + .map { + csvs -> + [ [ id:"summary_stats_eval" ], csvs ] + } + .set { stats_and_evaluation } + MERGE_STATS_EVAL (stats_and_evaluation) + stats_and_evaluation_summary = MERGE_STATS_EVAL.out.csv + ch_versions = ch_versions.mix(MERGE_STATS_EVAL.out.versions) + } + + // + // MODULE: Shiny + // + if (!params.skip_shiny) { + shiny_app = Channel.fromPath(params.shiny_app) + PREPARE_SHINY (stats_and_evaluation_summary, shiny_app) + ch_shiny_stats = PREPARE_SHINY.out.data.toList() + ch_versions = ch_versions.mix(PREPARE_SHINY.out.versions) + } + + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_pipeline_software_mqc_versions.yml', + sort: true, + newLine: true + ).set { ch_collated_versions } + + // + // MODULE: MultiQC + // + multiqc_out = Channel.empty() + if (!params.skip_multiqc && (!params.skip_stats || !params.skip_eval)) { + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) + + PREPARE_MULTIQC (stats_and_evaluation_summary) + ch_multiqc_table = ch_multiqc_table.mix(PREPARE_MULTIQC.out.multiqc_table.collect{it[1]}.ifEmpty([])) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList(), + ch_multiqc_table + ) + multiqc_out = MULTIQC.out.report.toList() + } + + emit: + versions = ch_versions // channel: [ path(versions.yml) ] + multiqc = multiqc_out // channel: [ path(multiqc_report.html) ] +} diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index 5df45ef9..b3b909ef 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -27,15 +27,8 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mult // SUBWORKFLOW: Local subworkflows // include { STATS } from '../subworkflows/local/stats' -include { EVALUATE } from '../subworkflows/local/evaluate' include { CREATE_TCOFFEETEMPLATE } from '../modules/local/create_tcoffee_template' -// -// MODULE: local modules -// -include { PREPARE_MULTIQC } from '../modules/local/prepare_multiqc' -include { PREPARE_SHINY } from '../modules/local/prepare_shiny' - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT NF-CORE MODULES/SUBWORKFLOWS @@ -74,10 +67,7 @@ workflow MULTIPLESEQUENCEALIGN { main: ch_multiqc_files = Channel.empty() - ch_multiqc_table = Channel.empty() - evaluation_summary = Channel.empty() stats_summary = Channel.empty() - stats_and_evaluation_summary = Channel.empty() ch_shiny_stats = Channel.empty() ch_input @@ -210,43 +200,6 @@ workflow MULTIPLESEQUENCEALIGN { msa_alignment.mix(MSA_ALIGNMENT.out.alignment) } - // - // Evaluate the quality of the alignment - // - if (!params.skip_eval) { - EVALUATE (msa_alignment, ch_refs, ch_structures_template) - ch_versions = ch_versions.mix(EVALUATE.out.versions) - evaluation_summary = evaluation_summary.mix(EVALUATE.out.eval_summary) - } - - // - // Combine stats and evaluation reports into a single CSV - // - if (!params.skip_stats || !params.skip_eval) { - stats_summary_csv = stats_summary.map{ meta, csv -> csv } - eval_summary_csv = evaluation_summary.map{ meta, csv -> csv } - stats_summary_csv.mix(eval_summary_csv) - .collect() - .map { - csvs -> - [ [ id:"summary_stats_eval" ], csvs ] - } - .set { stats_and_evaluation } - MERGE_STATS_EVAL (stats_and_evaluation) - stats_and_evaluation_summary = MERGE_STATS_EVAL.out.csv - ch_versions = ch_versions.mix(MERGE_STATS_EVAL.out.versions) - } - - // - // MODULE: Shiny - // - if (!params.skip_shiny) { - shiny_app = Channel.fromPath(params.shiny_app) - PREPARE_SHINY (stats_and_evaluation_summary, shiny_app) - ch_shiny_stats = PREPARE_SHINY.out.data.toList() - ch_versions = ch_versions.mix(PREPARE_SHINY.out.versions) - } - softwareVersionsToYAML(ch_versions) .collectFile( storeDir: "${params.outdir}/pipeline_info", @@ -272,15 +225,12 @@ workflow MULTIPLESEQUENCEALIGN { ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) - PREPARE_MULTIQC (stats_and_evaluation_summary) - ch_multiqc_table = ch_multiqc_table.mix(PREPARE_MULTIQC.out.multiqc_table.collect{it[1]}.ifEmpty([])) - MULTIQC ( ch_multiqc_files.collect(), ch_multiqc_config.toList(), ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList(), - ch_multiqc_table + [] ) multiqc_out = MULTIQC.out.report.toList() } From e2b92c129003f353c37799d77939d918bfba9719 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 11 Nov 2024 12:29:11 +0100 Subject: [PATCH 06/23] nextflow language server fixes and channel combination fixes --- conf/modules.config | 12 ++- conf/test.config | 2 +- main.nf | 10 +-- nextflow.config | 90 +++++++++---------- nextflow_schema.json | 7 +- subworkflows/local/evaluate.nf | 36 ++++---- .../main.nf | 3 +- workflows/multiplesequencealign.nf | 61 +++++++------ 8 files changed, 117 insertions(+), 104 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 3e66b604..64df547e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -112,13 +112,11 @@ tag = { "${meta.id} tree:${meta.tree} argstree:${args_tree} args:${meta.args_aligner}" } ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } ext.args = { "${meta.args_aligner}" == "null" ? '' : "${meta.args_aligner}" } - if(params.skip_compression){ - publishDir = [ - path: { "${params.outdir}/alignment/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + publishDir = [ + path: { "${params.outdir}/alignment/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } withName: "MTMALIGN_ALIGN"{ diff --git a/conf/test.config b/conf/test.config index 79a961fc..0f09f3fe 100644 --- a/conf/test.config +++ b/conf/test.config @@ -33,7 +33,7 @@ params { // Input data input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.0/samplesheet_test_af2.csv' tools = params.pipelines_testdata_base_path + 'multiplesequencealign/toolsheet/v1.0/toolsheet_full.csv' - aligner = 'clustalo/align' + alignment = 'clustalo/align' guidetree = 'clustalo/guidetree' treealign = 'clustalo/treealign' } diff --git a/main.nf b/main.nf index 06ca4af9..4f52d279 100644 --- a/main.nf +++ b/main.nf @@ -35,17 +35,15 @@ workflow NFCORE_MULTIPLESEQUENCEALIGN { take: samplesheet // channel: samplesheet read in from --input - tools // channel: toolsheet read in from --tools main: - ch_versions = Channel.empty() + def ch_versions = Channel.empty() // // WORKFLOW: Run pipeline // MULTIPLESEQUENCEALIGN ( samplesheet, - tools, ch_versions ) @@ -63,7 +61,7 @@ workflow NFCORE_EVALUATEMSA { stats_summary // channel: [ meta, /path/to/file.csv ] main: - ch_versions = Channel.empty() + def ch_versions = Channel.empty() // // WORKFLOW: Run evaluation pipelines @@ -114,7 +112,6 @@ workflow { // NFCORE_MULTIPLESEQUENCEALIGN ( PIPELINE_INITIALISATION.out.samplesheet, - PIPELINE_INITIALISATION.out.tools ) } @@ -131,7 +128,8 @@ workflow { NFCORE_MULTIPLESEQUENCEALIGN.out.multiqc_report, "${params.outdir}/shiny_app", "${params.outdir}/pipeline_info", - params.shiny_trace_mode + params.shiny_trace_mode, + params.evaluate ) } diff --git a/nextflow.config b/nextflow.config index c54fcd84..c13e04e8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,69 +14,69 @@ plugins { params { // Input options - input = null - tools = null + input = null + tools = null // Tool selectors - aligner = '' - guidetree = '' - treealign = '' + alignment = null + guidetree = null + treealign = null // Stats - skip_stats = false - calc_sim = false - calc_seq_stats = true - extract_plddt = false + skip_stats = false + calc_sim = false + calc_seq_stats = true + extract_plddt = false // Evaluation - evaluate = false - skip_eval = false - calc_sp = true - calc_tc = true - calc_irmsd = false - calc_gaps = true - calc_tcs = false + evaluate = false + skip_eval = false + calc_sp = true + calc_tc = true + calc_irmsd = false + calc_gaps = true + calc_tcs = false - skip_compression = false + skip_compression = false // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' - multiqc_methods_description = null - skip_multiqc = false + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null + skip_multiqc = false // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' // Shiny options - shiny_app = "${projectDir}/bin/shiny_app" - skip_shiny = false - shiny_trace_mode = "latest" // all, latest + shiny_app = "${projectDir}/bin/shiny_app" + skip_shiny = false + shiny_trace_mode = "latest" // all, latest // Config options - config_profile_name = null - config_profile_description = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Max resource options // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' // Schema validation default options validationFailUnrecognisedParams = false diff --git a/nextflow_schema.json b/nextflow_schema.json index bcb3d08f..80e0c06d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -61,7 +61,7 @@ "default": "", "fa_icon": "fas fa-tools", "properties": { - "aligner": { + "alignment": { "type": "string", "description": "Which aligner tool to use" }, @@ -116,6 +116,11 @@ "fa_icon": "fas fa-terminal", "description": "Define wether to run stats.", "properties": { + "evaluate": { + "type": "boolean", + "fa_icon": "fas fa-magnifying-glass", + "description": "Run the evaluations computation on the msa." + }, "skip_eval": { "type": "boolean", "fa_icon": "fas fa-fast-forward", diff --git a/subworkflows/local/evaluate.nf b/subworkflows/local/evaluate.nf index c34356ca..583605a8 100644 --- a/subworkflows/local/evaluate.nf +++ b/subworkflows/local/evaluate.nf @@ -25,13 +25,13 @@ workflow EVALUATE { main: - ch_versions = Channel.empty() - sp_csv = Channel.empty() - tc_csv = Channel.empty() - irmsd_csv = Channel.empty() - tcs_csv = Channel.empty() - gaps_csv = Channel.empty() - ch_eval_summary = Channel.empty() + def ch_versions = Channel.empty() + def sp_csv = Channel.empty() + def tc_csv = Channel.empty() + def irmsd_csv = Channel.empty() + def tcs_csv = Channel.empty() + def gaps_csv = Channel.empty() + def ch_eval_summary = Channel.empty() // -------------------------- @@ -79,7 +79,7 @@ workflow EVALUATE { // Total column score if( params.calc_tc ) { TCOFFEE_ALNCOMPARE_TC (alignment_and_ref) - tc_scores = TCOFFEE_ALNCOMPARE_TC.out.scores + def tc_scores = TCOFFEE_ALNCOMPARE_TC.out.scores ch_versions = ch_versions.mix(TCOFFEE_ALNCOMPARE_TC.out.versions.first()) tc_scores @@ -106,7 +106,7 @@ workflow EVALUATE { // number of gaps if ( params.calc_gaps ) { CALC_GAPS (ch_msa) - gaps_scores = CALC_GAPS.out.gaps + def gaps_scores = CALC_GAPS.out.gaps ch_versions = ch_versions.mix(CALC_GAPS.out.versions) gaps_scores @@ -153,10 +153,10 @@ workflow EVALUATE { msa_str.msa, msa_str.structures ) - tcoffee_irmsd_scores = TCOFFEE_IRMSD.out.irmsd + def tcoffee_irmsd_scores = TCOFFEE_IRMSD.out.irmsd ch_versions = ch_versions.mix(TCOFFEE_IRMSD.out.versions.first()) PARSE_IRMSD (tcoffee_irmsd_scores) - tcoffee_irmsd_scores_tot = PARSE_IRMSD.out.irmsd_tot + def tcoffee_irmsd_scores_tot = PARSE_IRMSD.out.irmsd_tot ch_versions = ch_versions.mix(PARSE_IRMSD.out.versions) ch_irmsd_summary = tcoffee_irmsd_scores_tot.map{ @@ -166,7 +166,7 @@ workflow EVALUATE { } CONCAT_IRMSD(ch_irmsd_summary, "csv", "csv") irmsd_csv = CONCAT_IRMSD.out.csv - versions = ch_versions.mix(CONCAT_IRMSD.out.versions) + def versions = ch_versions.mix(CONCAT_IRMSD.out.versions) } @@ -178,7 +178,7 @@ workflow EVALUATE { if( params.calc_tcs ){ // the second argument is empty but a lib file can be fed to it TCOFFEE_TCS (ch_msa, [[:], []]) - tcs_scores = TCOFFEE_TCS.out.scores + def tcs_scores = TCOFFEE_TCS.out.scores ch_versions = ch_versions.mix(TCOFFEE_TCS.out.versions.first()) tcs_scores @@ -203,11 +203,11 @@ workflow EVALUATE { // MERGE ALL STATS // ------------------------------------------- - sp = sp_csv.map { meta, csv -> csv } - tc = tc_csv.map { meta, csv -> csv } - irmsd = irmsd_csv.map { meta, csv -> csv } - gaps = gaps_csv.map { meta, csv -> csv } - tcs = tcs_csv.map { meta, csv -> csv } + def sp = sp_csv.map { meta, csv -> csv } + def tc = tc_csv.map { meta, csv -> csv } + def irmsd = irmsd_csv.map { meta, csv -> csv } + def gaps = gaps_csv.map { meta, csv -> csv } + def tcs = tcs_csv.map { meta, csv -> csv } def number_of_evals = [ params.calc_sp, diff --git a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf index c9cb1811..b1811c2e 100644 --- a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf @@ -121,6 +121,7 @@ workflow PIPELINE_COMPLETION { shiny_dir_path // string: Path to shiny stats file trace_dir_path // string: Path to trace file shiny_trace_mode // string: Mode to use for shiny trace file (default: "latest", options: "latest", "all") + evaluate // boolean: Evaluate the results main: @@ -140,7 +141,7 @@ workflow PIPELINE_COMPLETION { imNotification(summary_params, hook_url) } - if (shiny_trace_mode) { + if (shiny_trace_mode && evaluate) { getTraceForShiny(trace_dir_path, shiny_dir_path, shiny_trace_mode) } diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index b3b909ef..92bb83c9 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -62,13 +62,11 @@ workflow MULTIPLESEQUENCEALIGN { take: ch_input // channel: [ meta, path(sequence.fasta), path(reference.fasta), path(pdb_structures.tar.gz), path(templates.txt) ] - ch_tools // channel: [ val(guide_tree_tool), val(args_guide_tree_tool), val(alignment_tool), val(args_alignment_tool) ] ch_versions // channel: [ path(versions.yml) ] main: - ch_multiqc_files = Channel.empty() - stats_summary = Channel.empty() - ch_shiny_stats = Channel.empty() + def ch_multiqc_files = Channel.empty() + def stats_summary = Channel.empty() ch_input .map { @@ -78,7 +76,7 @@ workflow MULTIPLESEQUENCEALIGN { .set { ch_seqs } ch_input - .filter { it[2].size() > 0} + .filter { input -> input[2].size() > 0} .map { meta, fasta, ref, str, template -> [ meta, file(ref) ] @@ -86,7 +84,7 @@ workflow MULTIPLESEQUENCEALIGN { .set { ch_refs } ch_input - .filter { it[4].size() > 0} + .filter { input -> input[4].size() > 0} .map { meta, fasta, ref, str, template -> [ meta, file(template) ] @@ -98,7 +96,7 @@ workflow MULTIPLESEQUENCEALIGN { meta, fasta, ref, str, template -> [ meta, str ] } - .filter { it[1].size() > 0 } + .filter { input -> input[1].size() > 0 } .set { ch_structures } // ---------------- @@ -107,8 +105,8 @@ workflow MULTIPLESEQUENCEALIGN { // Structures are taken from a directory of PDB files. // If the directory is compressed, it is uncompressed first. ch_structures - .branch { - compressed: it[1].endsWith('.tar.gz') + .branch { structures -> + compressed: structures[1].endsWith('.tar.gz') uncompressed: true } .set { ch_structures } @@ -126,9 +124,11 @@ workflow MULTIPLESEQUENCEALIGN { // TEMPLATES // ---------------- // If a family does not present a template but structures are provided, create one. - ch_structures_template = ch_structures.join(ch_templates, by:0, remainder:true) + ch_structures + .join(ch_templates, by:0, remainder:true) + .set { ch_structures_template } ch_structures_template - .branch { + .branch { it -> template: it[2] != null no_template: true } @@ -142,7 +142,7 @@ workflow MULTIPLESEQUENCEALIGN { [ meta, structures ] } ) - new_templates = CREATE_TCOFFEETEMPLATE.out.template + def new_templates = CREATE_TCOFFEETEMPLATE.out.template ch_structures_branched.template .map { meta,structures,template -> @@ -150,7 +150,7 @@ workflow MULTIPLESEQUENCEALIGN { } .set { forced_templates } - ch_templates_merged = forced_templates.mix(new_templates) + def ch_templates_merged = forced_templates.mix(new_templates) // Merge the structures and templates channels, ready for the alignment ch_structures_template = ch_templates_merged.combine(ch_structures, by:0) @@ -167,7 +167,7 @@ workflow MULTIPLESEQUENCEALIGN { stats_summary = stats_summary.mix(STATS.out.stats_summary) } - msa_alignment = Channel.empty() + def msa_alignment = Channel.empty() if (params.guidetree && params.treealign) { // @@ -178,18 +178,29 @@ workflow MULTIPLESEQUENCEALIGN { ) ch_versions = ch_versions.mix(MSA_GUIDETREE.out.versions) + // Prepare channels for treealign to make sure the correct tree is used for the respective alignment + ch_seqs + .combine(MSA_GUIDETREE.out.tree, by:0) + .set { ch_seqs_trees } + ch_seqs_trees + .multiMap { meta, seq, tree -> + sequences: [meta, seq] + trees: [meta, tree] + } + .set { ch_seqs_trees_multi } + // // Align with a given tree // MSA_TREEALIGN ( - ch_seqs, - MSA_GUIDETREE.out.guidetree + ch_seqs_trees_multi.sequences, + ch_seqs_trees_multi.trees ) ch_versions = ch_versions.mix(MSA_TREEALIGN.out.versions) msa_alignment.mix(MSA_TREEALIGN.out.alignment) } - if (params.aligner) { + if (params.alignment) { // // Align // @@ -211,16 +222,16 @@ workflow MULTIPLESEQUENCEALIGN { // // MODULE: MultiQC // - multiqc_out = Channel.empty() + def multiqc_out = Channel.empty() if (!params.skip_multiqc && (!params.skip_stats || !params.skip_eval)) { - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + def ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + def ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + def ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() + def summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + def ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + def ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + def ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) From 3476d1f6c55931905f33c4f4e1728265bd417e96 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 11 Nov 2024 12:35:04 +0100 Subject: [PATCH 07/23] remove toolsheet --- README.md | 19 --------- assets/schema_tools.json | 31 --------------- conf/test.config | 1 - conf/test_full.config | 1 - conf/test_parameters.config | 1 - conf/test_pdb.config | 1 - docs/usage.md | 39 +------------------ main.nf | 1 - nextflow.config | 1 - nextflow_schema.json | 11 ------ .../main.nf | 19 --------- tests/main.nf.test | 1 - 12 files changed, 1 insertion(+), 125 deletions(-) delete mode 100644 assets/schema_tools.json diff --git a/README.md b/README.md index 1fcf18d1..8f97256b 100644 --- a/README.md +++ b/README.md @@ -54,24 +54,6 @@ Each row represents a set of sequences (in this case the seatoxin and toxin prot > [!NOTE] > The only required input is the id column and either fasta or structures. -#### 2. TOOLSHEET - -Each line of the toolsheet defines a combination of guide tree and multiple sequence aligner to run with the respective arguments to be used. - -It should look at follows: - -`toolsheet.csv`: - -```csv -tree,args_tree,aligner,args_aligner, -FAMSA, -gt upgma -medoidtree, FAMSA, -, ,TCOFFEE, -FAMSA,,REGRESSIVE, -``` - -> [!NOTE] -> The only required input is aligner. - #### 3. RUN THE PIPELINE Now, you can run the pipeline using: @@ -80,7 +62,6 @@ Now, you can run the pipeline using: nextflow run nf-core/multiplesequencealign \ -profile test \ --input samplesheet.csv \ - --tools toolsheet.csv \ --outdir outdir ``` diff --git a/assets/schema_tools.json b/assets/schema_tools.json deleted file mode 100644 index 96572948..00000000 --- a/assets/schema_tools.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/nf-core/msa/master/assets/schema_tools.json", - "title": "nf-core/msa pipeline - params.tools schema", - "description": "Schema for the file provided with params.tools", - "type": "array", - "items": { - "type": "object", - "properties": { - "tree": { - "type": "string", - "errorMessage": "tree name cannot contain spaces", - "meta": ["tree"] - }, - "args_tree": { - "type": "string", - "meta": ["args_tree"] - }, - "aligner": { - "type": "string", - "meta": ["aligner"], - "errorMessage": "align name must be provided and cannot contain spaces" - }, - "args_aligner": { - "type": "string", - "meta": ["args_aligner"] - } - }, - "required": ["aligner"] - } -} diff --git a/conf/test.config b/conf/test.config index 0f09f3fe..5922211c 100644 --- a/conf/test.config +++ b/conf/test.config @@ -32,7 +32,6 @@ params { // Input data input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.0/samplesheet_test_af2.csv' - tools = params.pipelines_testdata_base_path + 'multiplesequencealign/toolsheet/v1.0/toolsheet_full.csv' alignment = 'clustalo/align' guidetree = 'clustalo/guidetree' treealign = 'clustalo/treealign' diff --git a/conf/test_full.config b/conf/test_full.config index 75dbea2a..7783953d 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -27,5 +27,4 @@ params { // Input data for full size test input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.0/samplesheet_full.csv' - tools = params.pipelines_testdata_base_path + 'multiplesequencealign/toolsheet/v1.0/toolsheet_full.csv' } diff --git a/conf/test_parameters.config b/conf/test_parameters.config index 4867a336..3486c056 100644 --- a/conf/test_parameters.config +++ b/conf/test_parameters.config @@ -25,5 +25,4 @@ params { // Input data input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.0/samplesheet_test_af2.csv' - tools = params.pipelines_testdata_base_path + 'multiplesequencealign/toolsheet/v1.0/toolsheet_full.csv' } diff --git a/conf/test_pdb.config b/conf/test_pdb.config index 5c519c7a..75d3e066 100644 --- a/conf/test_pdb.config +++ b/conf/test_pdb.config @@ -28,5 +28,4 @@ params { // Input data input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.0/samplesheet_test.csv' - tools = params.pipelines_testdata_base_path + 'multiplesequencealign/toolsheet/v1.0/toolsheet_structural.csv' } diff --git a/docs/usage.md b/docs/usage.md index dd701550..9e5a1cf3 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -116,48 +116,12 @@ Each row represents a set of sequences (in this case the seatoxin and toxin prot > [!NOTE] > You can have some samples with structures and/or references and some without. The pipeline will run the modules requiring structures/references only on the samples for which you have provided the required information and the others will be just skipped. -## Toolsheet input - -We provide a toolsheet as input to facilitate testing multiple arguments per tool when using the pipeline as a benchmarking framework. This, enables having multiple entries in the toolsheet, each corresponding to different arguments to be tested for the same tool. - -Each line of the toolsheet defines a combination of guide tree and multiple sequence aligner to run with the respective arguments to be used. - -A typical toolsheet should look at follows: - -```csv title="toolsheet.csv" -tree,args_tree,aligner,args_aligner, -FAMSA, -gt upgma -medoidtree, FAMSA, -, ,TCOFFEE, -FAMSA,,REGRESSIVE, -``` - -> [!NOTE] -> Each of the trees and aligners are available as standalones. You can leave `args_tree` and `args_aligner` empty if you are cool with the default settings of each method. Alternatively, you can leave `args_tree` empty to use the default guide tree with each aligner. - -> [!NOTE] -> use the exact spelling as listed above in [align](#3-align) and [guide trees](#2-guide-trees)! - -`tree` is the tool used to build the tree (optional). - -Arguments to the tree tool can be provided using `args_tree`. Please refer to each tool's documentation (optional). - -The `aligner` column contains the tool to run the alignment (optional). - -Finally, the arguments to the aligner tool can be set by using the `args_aligner` column (optional). - -| Column | Description | -| -------------- | -------------------------------------------------------------------------------- | -| `tree` | Optional. Tool used to build the tree. | -| `args_tree` | Optional. Arguments to the tree tool. Please refer to each tool's documentation. | -| `aligner` | Required. Tool to run the alignment. Available options listed above. | -| `args_aligner` | Optional. Arguments to the alignment tool. | - ## Running the pipeline The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/multiplesequencealign --input ./samplesheet.csv --tools ./toolsheet.csv --outdir ./results -profile docker +nextflow run nf-core/multiplesequencealign --input ./samplesheet.csv --outdir ./results -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -189,7 +153,6 @@ with `params.yaml` containing: ```yaml input: './samplesheet.csv' -tools: "./toolsheet.csv" outdir: './results/' <...> ``` diff --git a/main.nf b/main.nf index 4f52d279..0a512d98 100644 --- a/main.nf +++ b/main.nf @@ -95,7 +95,6 @@ workflow { args, params.outdir, params.input, - params.tools ) if (params.evaluate) { diff --git a/nextflow.config b/nextflow.config index c13e04e8..4adea4b0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,7 +15,6 @@ params { // Input options input = null - tools = null // Tool selectors alignment = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 80e0c06d..1973f9c4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -23,17 +23,6 @@ "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/multiplesequencealign/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" }, - "tools": { - "type": "string", - "format": "file-path", - "exists": true, - "mimetype": "text/csv", - "schema": "assets/schema_tools.json", - "pattern": "^\\S+\\.csv$", - "description": "Path to comma-separated file containing information about the tools to be used in the experiment.", - "help_text": "You will need to create a design file with information about the tools to be used before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/msa/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" - }, "outdir": { "type": "string", "format": "directory-path", diff --git a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf index b1811c2e..3de50a8b 100644 --- a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf @@ -36,7 +36,6 @@ workflow PIPELINE_INITIALISATION { nextflow_cli_args // array: List of positional nextflow CLI args outdir // string: The output directory where the results will be saved input // string: Path to input samplesheet - tools // string: Path to input tools samplesheet main: @@ -78,27 +77,9 @@ workflow PIPELINE_INITIALISATION { // Create channel from input file provided through params.input // ch_input = Channel.fromSamplesheet('input') - ch_tools = Channel.fromSamplesheet('tools') - .map { - meta -> - def meta_clone = meta[0].clone() - def tree_map = [:] - def align_map = [:] - - tree_map["tree"] = meta_clone["tree"] - tree_map["args_tree"] = meta_clone["args_tree"] - tree_map["args_tree_clean"] = Utils.cleanArgs(meta_clone.args_tree) - - align_map["aligner"] = meta_clone["aligner"] - align_map["args_aligner"] = Utils.check_required_args(meta_clone["aligner"], meta_clone["args_aligner"]) - align_map["args_aligner_clean"] = Utils.cleanArgs(align_map["args_aligner"]) - - [ tree_map, align_map ] - }.unique() emit: samplesheet = ch_input - tools = ch_tools versions = ch_versions } diff --git a/tests/main.nf.test b/tests/main.nf.test index 12277da7..9a01d983 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -10,7 +10,6 @@ nextflow_pipeline { when { params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/samplesheet/v1.0/samplesheet_test.csv' - tools = 'https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/toolsheet/v1.0/toolsheet.csv' // Output directory outdir = "./outdir/" From e4dd23f235070aa55ea80906252ad8b2023fa9f6 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 12 Nov 2024 16:01:35 +0100 Subject: [PATCH 08/23] add parameters to provide tool arguments and save results per tool+args folder with the file name being the sample --- conf/modules.config | 40 +++++++++++++++------------------------- nextflow.config | 6 ++++++ nextflow_schema.json | 24 ++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 25 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 64df547e..7705fd8a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -64,37 +64,28 @@ } // - // Tree building + // Tree building (guidetree) // - withName: "FAMSA_GUIDETREE"{ - tag = { "${meta.id} args:${meta.args_tree}" } - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}" } - ext.args = { "${meta.args_tree}" == "null" ? '' : "${meta.args_tree}" } + withName: "FAMSA_GUIDETREE|CLUSTALO_GUIDETREE|MAGUS_GUIDETREE"{ + ext.args = { params.guidetree_args } + tag = { "${meta.id} args:${ext.args}" } publishDir = [ - path: { "${params.outdir}/trees/${meta.id}" }, + path: { "${params.outdir}/trees/${task.process.split(":")[-1].replace("_", "-")}_${params.guidetree_args.trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: "CLUSTALO_GUIDETREE"{ - tag = { "${meta.id} args:${meta.args_tree}" } - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}" } - ext.args = { "${meta.args_tree}" == "null" ? '' : "${meta.args_tree}" } - publishDir = [ - path: { "${params.outdir}/trees/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + // + // Alignment from a tree (treealign) + // - withName: "MAGUS_GUIDETREE"{ - tag = { "${meta.id} args:${meta.args_tree}" } - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}" } - ext.args = { "${meta.args_tree}" == "null" ? '' : "${meta.args_tree}" } + withName: "CLUSTALO_TREEALIGN|FAMSA_TREEALIGN|MAGUS_TREEALIGN|TCOFFEE_TREEALIGN"{ + ext.args = { params.treealign_args } + tag = { "${meta.id} args:${ext.args}" } publishDir = [ - path: { "${params.outdir}/trees/${meta.id}" }, + path: { "${params.outdir}/alignment/${task.process.split(":")[-1].replace("_", "-")}_${params.treealign_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_${params.guidetree.replace("/", "-")}_${params.guidetree_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -109,11 +100,10 @@ } withName: "CLUSTALO_ALIGN|FAMSA_ALIGN|LEARNMSA_ALIGN|MAFFT|MAGUS_ALIGN|MUSCLE5_SUPER5|REGRESSIVE|TCOFFEE_ALIGN|TCOFFEE3D_ALIGN"{ - tag = { "${meta.id} tree:${meta.tree} argstree:${args_tree} args:${meta.args_aligner}" } - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } - ext.args = { "${meta.args_aligner}" == "null" ? '' : "${meta.args_aligner}" } + ext.args = { params.alignment_args } + tag = { "${meta.id} args:${params.alignment_args}" } publishDir = [ - path: { "${params.outdir}/alignment/${meta.id}" }, + path: { "${params.outdir}/alignment/${task.process.split(":")[-1].replace("_", "-")}_${params.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/nextflow.config b/nextflow.config index 4adea4b0..094e02d7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -21,6 +21,12 @@ params { guidetree = null treealign = null + // Tool args + alignment_args = '' + guidetree_args = '' + treealign_args = '' + + // Stats skip_stats = false calc_sim = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 1973f9c4..1f6f6400 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -64,6 +64,30 @@ } } }, + "tool_arguments": { + "title": "Tool arguments", + "type": "object", + "description": "Parameters to provide arguments for the selected tools to use", + "default": "", + "fa_icon": "fas fa-tools", + "properties": { + "alignment_args": { + "type": "string", + "description": "Arguments to use for the selected alignment tool.", + "help": "If the provided arguments contain a double dash, make sure to provide them with an assignment operator, e.g. `--alignment_args='--arg1 --arg2'`." + }, + "guidetree_args": { + "type": "string", + "description": "Arguments to use for the selected guidetree tool.", + "help": "If the provided arguments contain a double dash, make sure to provide them with an assignment operator, e.g. `--guidetree_args='--arg1 --arg2'`." + }, + "treealign_args": { + "type": "string", + "description": "Arguments to use for the selected treealign tool.", + "help": "If the provided arguments contain a double dash, make sure to provide them with an assignment operator, e.g. `--treealign_args='--arg1 --arg2'`." + } + } + } "stats_options": { "title": "Stats options", "type": "object", From 8969f587438790c43cb99c558d3251a9786a665b Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 12 Nov 2024 16:09:10 +0100 Subject: [PATCH 09/23] update methodsDescriptionText() according to the v3 template --- .../utils_nfcore_multiplesequencealign_pipeline/main.nf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf index 3de50a8b..587cba3a 100644 --- a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf @@ -207,14 +207,17 @@ def methodsDescriptionText(mqc_methods_yaml) { meta.workflow = workflow.toMap() meta["manifest_map"] = workflow.manifest.toMap() + // Pipeline DOI // Pipeline DOI if (meta.manifest_map.doi) { // Using a loop to handle multiple DOIs // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers // Removing ` ` since the manifest.doi is a string and not a proper list def temp_doi_ref = "" - String[] manifest_doi = meta.manifest_map.doi.tokenize(",") - for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + def manifest_doi = meta.manifest_map.doi.tokenize(",") + manifest_doi.each { doi_ref -> + temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + } meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) } else meta["doi_text"] = "" meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " From e6534ccb5f1dd8b8e0885011a72c67881e84c4d2 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 18 Nov 2024 12:24:40 +0100 Subject: [PATCH 10/23] fix Nf language server errors and make evaluation workflow work --- assets/schema_evaluate.json | 32 ++++++++ main.nf | 26 +++---- nextflow.config | 1 + nextflow_schema.json | 2 +- .../generate_downstream_samplesheet/main.nf | 78 +++++++++++++++++++ .../main.nf | 2 - workflows/evaluatemsa.nf | 41 ++++++---- workflows/multiplesequencealign.nf | 30 ++++--- 8 files changed, 173 insertions(+), 39 deletions(-) create mode 100644 assets/schema_evaluate.json create mode 100644 subworkflows/local/generate_downstream_samplesheet/main.nf diff --git a/assets/schema_evaluate.json b/assets/schema_evaluate.json new file mode 100644 index 00000000..f891430d --- /dev/null +++ b/assets/schema_evaluate.json @@ -0,0 +1,32 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/multiplesequencealign/master/assets/schema_input.json", + "title": "nf-core/multiplesequencealign pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] + }, + "msa": { + "type": "string", + "pattern": "^\\S+\\.aln$", + "description": "aln file containing the MSA", + "errorMessage": "Must end with .aln" + }, + "reference": { + "type": "string" + }, + "structures": { + "type": "string" + } + }, + "required": ["id"], + "anyOf": [{ "required": ["msa"] }, { "required": ["structures"] }] + } +} diff --git a/main.nf b/main.nf index 0a512d98..400b96cc 100644 --- a/main.nf +++ b/main.nf @@ -35,6 +35,7 @@ workflow NFCORE_MULTIPLESEQUENCEALIGN { take: samplesheet // channel: samplesheet read in from --input + outdir main: def ch_versions = Channel.empty() @@ -44,7 +45,8 @@ workflow NFCORE_MULTIPLESEQUENCEALIGN { // MULTIPLESEQUENCEALIGN ( samplesheet, - ch_versions + ch_versions, + outdir ) emit: @@ -55,10 +57,9 @@ workflow NFCORE_MULTIPLESEQUENCEALIGN { workflow NFCORE_EVALUATEMSA { take: - msa_alignment // channel: [ meta, /path/to/file.aln ] - ch_refs // channel: [ meta, /path/to/file.aln ] - ch_structures_template // channel: [ meta, /path/to/file.pdb ] - stats_summary // channel: [ meta, /path/to/file.csv ] + evaluate_samplesheet // channel: [ /path/to/file.csv ] + stats_summary // channel: [ meta, /path/to/file.csv ] + outdir main: def ch_versions = Channel.empty() @@ -67,11 +68,10 @@ workflow NFCORE_EVALUATEMSA { // WORKFLOW: Run evaluation pipelines // EVALUATEMSA ( - msa_alignment, - ch_refs, - ch_structures_template, + evaluate_samplesheet, stats_summary, - ch_versions + ch_versions, + outdir ) } /* @@ -100,10 +100,9 @@ workflow { if (params.evaluate) { // WORKFLOW: Run evaluation workflow NFCORE_EVALUATEMSA ( - PIPELINE_INITIALISATION.out.msa_alignment, - PIPELINE_INITIALISATION.out.ch_refs, - PIPELINE_INITIALISATION.out.ch_structures_template, - PIPELINE_INITIALISATION.out.stats_summary + "${params.outdir}/downstream_samplesheets/evaluation.csv", + "${params.outdir}/downstream_samplesheets/stats.csv", + params.outdir ) } else { // @@ -111,6 +110,7 @@ workflow { // NFCORE_MULTIPLESEQUENCEALIGN ( PIPELINE_INITIALISATION.out.samplesheet, + params.outdir ) } diff --git a/nextflow.config b/nextflow.config index 094e02d7..cc2b9ca3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -224,6 +224,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.2.0' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 1f6f6400..ba14c76d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -87,7 +87,7 @@ "help": "If the provided arguments contain a double dash, make sure to provide them with an assignment operator, e.g. `--treealign_args='--arg1 --arg2'`." } } - } + }, "stats_options": { "title": "Stats options", "type": "object", diff --git a/subworkflows/local/generate_downstream_samplesheet/main.nf b/subworkflows/local/generate_downstream_samplesheet/main.nf new file mode 100644 index 00000000..ff288f70 --- /dev/null +++ b/subworkflows/local/generate_downstream_samplesheet/main.nf @@ -0,0 +1,78 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW: GENERATE A DOWNSTREAM SAMPLESHEET FOR EVALUATION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow SAMPLESHEET_EVALUATION { + take: + ch_msa // channel: [ meta, /path/to/file.aln ] + ch_references // channel: [ meta, /path/to/file.aln ] + ch_structures // channel: [ meta, /path/to/file.pdb ] + outdir // params.outdir + + main: + def ch_list_for_samplesheet = ch_msa + .join(ch_references, by: 0, remainder: true) + .join(ch_structures, by: 0, remainder: true) + .map { meta, msa, reference, structure -> + def sample = meta.id + [id: sample, msa: msa, reference: reference, structure: structure] + } + + channelToSamplesheet(ch_list_for_samplesheet, "${outdir}/downstream_samplesheets/evaluation") +} + +workflow SAMPLESHEET_STATS { + take: + stats_summary // channel: [ meta, /path/to/file.csv ] + outdir + + main: + def ch_list_for_samplesheet = stats_summary + .map { meta, csv -> + [stats: csv] + } + + channelToSamplesheet(ch_list_for_samplesheet, "${outdir}/downstream_samplesheets/stats") +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW CALLING SPECIFIC SAMPLESHEET GENERATION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow GENERATE_DOWNSTREAM_SAMPLESHEETS { + take: + evaluation_msa // channel: [ meta, /path/to/file.aln ] + evaluation_references // channel: [ meta, /path/to/file.aln ] + evaluation_structures // channel: [ meta, /path/to/file.pdb ] + stats_summary // channel: [ meta, /path/to/file.csv ] + outdir // params.outdir + + main: + SAMPLESHEET_EVALUATION( + evaluation_msa, + evaluation_references, + evaluation_structures, + outdir + ) + SAMPLESHEET_STATS( + stats_summary, + outdir + ) +} + +// Input can be any channel with a dictionary +def channelToSamplesheet(ch_list_for_samplesheet, path) { + ch_list_for_samplesheet + .first() + .map { it -> it.keySet().join(",") } + .concat(ch_list_for_samplesheet.map { it -> it.values().join(",").replace("null", "") }) + .collectFile( + name: "${path}.csv", + newLine: true, + sort: false + ) +} diff --git a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf index 587cba3a..f1fb1139 100644 --- a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf @@ -335,8 +335,6 @@ import groovy.text.SimpleTemplateEngine class Utils { - - public static cleanArgs(argString) { def cleanArgs = argString.toString().trim().replace(" ", " ").replace(" ", "_").replaceAll("==", "_").replaceAll("\\s+", "") // if clearnArgs is empty, return "" diff --git a/workflows/evaluatemsa.nf b/workflows/evaluatemsa.nf index a8615626..d58c9a50 100644 --- a/workflows/evaluatemsa.nf +++ b/workflows/evaluatemsa.nf @@ -5,9 +5,10 @@ */ // MODULES -include { MULTIQC } from '../modules/local/multiqc' -include { PREPARE_MULTIQC } from '../modules/local/prepare_multiqc' -include { PREPARE_SHINY } from '../modules/local/prepare_shiny' +include { MULTIQC } from '../modules/local/multiqc' +include { PREPARE_MULTIQC } from '../modules/local/prepare_multiqc' +include { PREPARE_SHINY } from '../modules/local/prepare_shiny' +include { CSVTK_JOIN as MERGE_STATS_EVAL } from '../modules/nf-core/csvtk/join/main.nf' //SUBWORKFLOWS include { EVALUATE } from '../subworkflows/local/evaluate' @@ -16,26 +17,38 @@ include { EVALUATE } from '../subworkflows/local/evaluate' include { paramsSummaryMap } from 'plugin/nf-validation' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { samplesheetToList } from 'plugin/nf-schema' workflow EVALUATEMSA { take: - msa_alignment - ch_refs - ch_structures_template + evaluate_samplesheet stats_summary ch_versions + outdir main: - evaluation_summary = Channel.empty() - stats_and_evaluation_summary = Channel.empty() - ch_multiqc_table = Channel.empty() + def evaluation_summary = Channel.empty() + def stats_and_evaluation_summary = Channel.empty() + def ch_multiqc_table = Channel.empty() + + // + // Read evaluate samplesheet and create channels + // + ch_input = Channel.fromList(samplesheetToList(evaluate_samplesheet, "${projectDir}/assets/schema_evaluate.json")) + ch_input + .multiMap { meta, msa, reference, structure -> + msa: [meta, msa] + reference: [meta, reference] + structure: [meta, structure] + } + .set { ch_input_multi } // // Evaluate the quality of the alignment // if (!params.skip_eval) { - EVALUATE (msa_alignment, ch_refs, ch_structures_template) + EVALUATE (ch_input_multi.msa, ch_input_multi.reference, ch_input_multi.structure) ch_versions = ch_versions.mix(EVALUATE.out.versions) evaluation_summary = evaluation_summary.mix(EVALUATE.out.eval_summary) } @@ -44,8 +57,8 @@ workflow EVALUATEMSA { // Combine stats and evaluation reports into a single CSV // if (!params.skip_stats || !params.skip_eval) { - stats_summary_csv = stats_summary.map{ meta, csv -> csv } - eval_summary_csv = evaluation_summary.map{ meta, csv -> csv } + def stats_summary_csv = stats_summary.map{ meta, csv -> csv } + def eval_summary_csv = evaluation_summary.map{ meta, csv -> csv } stats_summary_csv.mix(eval_summary_csv) .collect() .map { @@ -70,7 +83,7 @@ workflow EVALUATEMSA { softwareVersionsToYAML(ch_versions) .collectFile( - storeDir: "${params.outdir}/pipeline_info", + storeDir: "${outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true @@ -79,7 +92,7 @@ workflow EVALUATEMSA { // // MODULE: MultiQC // - multiqc_out = Channel.empty() + def multiqc_out = Channel.empty() if (!params.skip_multiqc && (!params.skip_stats || !params.skip_eval)) { ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index 92bb83c9..50165c13 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -26,8 +26,9 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mult // // SUBWORKFLOW: Local subworkflows // -include { STATS } from '../subworkflows/local/stats' -include { CREATE_TCOFFEETEMPLATE } from '../modules/local/create_tcoffee_template' +include { STATS } from '../subworkflows/local/stats' +include { CREATE_TCOFFEETEMPLATE } from '../modules/local/create_tcoffee_template' +include { GENERATE_DOWNSTREAM_SAMPLESHEETS } from '../subworkflows/local/generate_downstream_samplesheet/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -39,7 +40,6 @@ include { CREATE_TCOFFEETEMPLATE } from '../modules/local/create_tcoffee_templat // MODULE: Installed directly from nf-core/modules // include { UNTAR } from '../modules/nf-core/untar/main' -include { CSVTK_JOIN as MERGE_STATS_EVAL } from '../modules/nf-core/csvtk/join/main.nf' include { PIGZ_COMPRESS } from '../modules/nf-core/pigz/compress/main' /* @@ -63,6 +63,7 @@ workflow MULTIPLESEQUENCEALIGN { take: ch_input // channel: [ meta, path(sequence.fasta), path(reference.fasta), path(pdb_structures.tar.gz), path(templates.txt) ] ch_versions // channel: [ path(versions.yml) ] + outdir // params.outdir main: def ch_multiqc_files = Channel.empty() @@ -197,7 +198,7 @@ workflow MULTIPLESEQUENCEALIGN { ch_seqs_trees_multi.trees ) ch_versions = ch_versions.mix(MSA_TREEALIGN.out.versions) - msa_alignment.mix(MSA_TREEALIGN.out.alignment) + msa_alignment = msa_alignment.mix(MSA_TREEALIGN.out.alignment) } if (params.alignment) { @@ -208,17 +209,28 @@ workflow MULTIPLESEQUENCEALIGN { ch_seqs ) ch_versions = ch_versions.mix(MSA_ALIGNMENT.out.versions) - msa_alignment.mix(MSA_ALIGNMENT.out.alignment) + msa_alignment = msa_alignment.mix(MSA_ALIGNMENT.out.alignment) } softwareVersionsToYAML(ch_versions) .collectFile( - storeDir: "${params.outdir}/pipeline_info", + storeDir: "${outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true ).set { ch_collated_versions } + // + // SUBWORKFLOW: Generate samplesheets for downstream workflows + // + GENERATE_DOWNSTREAM_SAMPLESHEETS ( + msa_alignment, + ch_refs, + ch_structures, + stats_summary, + outdir + ) + // // MODULE: MultiQC // @@ -232,9 +244,9 @@ workflow MULTIPLESEQUENCEALIGN { def ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) def ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) def ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) MULTIQC ( ch_multiqc_files.collect(), From 056d141421f2774f4bcc341679f8dd405afcecc0 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 18 Nov 2024 13:05:37 +0100 Subject: [PATCH 11/23] read previous downstream samplesheets and start updating schemas --- assets/schema_evaluate.json | 8 +- assets/schema_input.json | 2 +- assets/schema_stats.json | 25 +++ nextflow_schema.json | 182 ++++++------------ .../generate_downstream_samplesheet/main.nf | 35 +++- .../main.nf | 4 +- 6 files changed, 125 insertions(+), 131 deletions(-) create mode 100644 assets/schema_stats.json diff --git a/assets/schema_evaluate.json b/assets/schema_evaluate.json index f891430d..44664d8e 100644 --- a/assets/schema_evaluate.json +++ b/assets/schema_evaluate.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/multiplesequencealign/master/assets/schema_input.json", "title": "nf-core/multiplesequencealign pipeline - params.input schema", "description": "Schema for the file provided with params.input", @@ -14,16 +14,16 @@ "meta": ["id"] }, "msa": { - "type": "string", + "type": "file", "pattern": "^\\S+\\.aln$", "description": "aln file containing the MSA", "errorMessage": "Must end with .aln" }, "reference": { - "type": "string" + "type": "file" }, "structures": { - "type": "string" + "type": "file" } }, "required": ["id"], diff --git a/assets/schema_input.json b/assets/schema_input.json index a2770af7..b5a39749 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/multiplesequencealign/master/assets/schema_input.json", "title": "nf-core/multiplesequencealign pipeline - params.input schema", "description": "Schema for the file provided with params.input", diff --git a/assets/schema_stats.json b/assets/schema_stats.json new file mode 100644 index 00000000..06516bcd --- /dev/null +++ b/assets/schema_stats.json @@ -0,0 +1,25 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/multiplesequencealign/master/assets/schema_input.json", + "title": "nf-core/multiplesequencealign pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] + }, + "stats": { + "type": "file", + "pattern": "^\\S+\\.csv$", + "description": "dsv file containing the stats of the input sequences.", + "errorMessage": "Must end with .csv" + } + }, + "required": ["id", "stats"] + } +} diff --git a/nextflow_schema.json b/nextflow_schema.json index ba14c76d..b2f88187 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,10 +1,10 @@ { - "$schema": "https://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/multiplesequencealign/master/nextflow_schema.json", "title": "nf-core/multiplesequencealign pipeline parameters", "description": "Pipeline to run and benchmark multiple sequence alignment tools.", "type": "object", - "definitions": { + "$defs": { "input_output_options": { "title": "Input/output options", "type": "object", @@ -23,6 +23,23 @@ "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/multiplesequencealign/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" }, + "tools": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/csv", + "schema": "assets/schema_tools.json", + "pattern": "^\\S+\\.csv$", + "description": "Path to comma-separated file containing information about the tools to be used in the experiment.", + "help_text": "You will need to create a design file with information about the tools to be used before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/msa/usage#samplesheet-input).", + "fa_icon": "fas fa-file-csv" + }, + "templates_suffix": { + "type": "string", + "description": "Suffix of the dependencies templates.", + "help_text": "Suffix of the files in the accessory files directory (e.g. pdb for pdb files).", + "fa_icon": "fas fa-file-signature" + }, "outdir": { "type": "string", "format": "directory-path", @@ -43,48 +60,47 @@ } } }, - "tool_selectors": { - "title": "Tool selectors", + "reference_genome_options": { + "title": "Reference genome options", "type": "object", - "description": "Parameters to select which tools to use", - "default": "", - "fa_icon": "fas fa-tools", + "fa_icon": "fas fa-dna", + "description": "Reference genome related files and options required for the workflow.", "properties": { - "alignment": { + "genome": { "type": "string", - "description": "Which aligner tool to use" + "description": "Name of iGenomes reference.", + "fa_icon": "fas fa-book", + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." }, - "guidetree": { + "fasta": { "type": "string", - "description": "Which aligner tool to use to generate a guide tree" + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", + "description": "Path to FASTA genome file.", + "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", + "fa_icon": "far fa-file-code" }, - "treealign": { - "type": "string", - "description": "Which aligner tool to use to align providing a generated guide tree" + "igenomes_ignore": { + "type": "boolean", + "description": "Do not load the iGenomes reference config.", + "fa_icon": "fas fa-ban", + "hidden": true, + "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." } } }, - "tool_arguments": { - "title": "Tool arguments", + "align_options": { + "title": "Stats options", "type": "object", - "description": "Parameters to provide arguments for the selected tools to use", - "default": "", - "fa_icon": "fas fa-tools", + "fa_icon": "fas fa-terminal", + "description": "Define extra alignment options.", "properties": { - "alignment_args": { - "type": "string", - "description": "Arguments to use for the selected alignment tool.", - "help": "If the provided arguments contain a double dash, make sure to provide them with an assignment operator, e.g. `--alignment_args='--arg1 --arg2'`." - }, - "guidetree_args": { - "type": "string", - "description": "Arguments to use for the selected guidetree tool.", - "help": "If the provided arguments contain a double dash, make sure to provide them with an assignment operator, e.g. `--guidetree_args='--arg1 --arg2'`." - }, - "treealign_args": { - "type": "string", - "description": "Arguments to use for the selected treealign tool.", - "help": "If the provided arguments contain a double dash, make sure to provide them with an assignment operator, e.g. `--treealign_args='--arg1 --arg2'`." + "build_consensus": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Build consensus alignment with M-COFFEE." } } }, @@ -107,8 +123,7 @@ "calc_seq_stats": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Calculate general statistics on input files.", - "default": true + "description": "Calculate general statistics on input files." }, "extract_plddt": { "type": "boolean", @@ -118,8 +133,7 @@ "calc_gaps": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Extract total number of gaps and average number of gaps of the alignment.", - "default": true + "description": "Extract total number of gaps and average number of gaps of the alignment." } } }, @@ -129,11 +143,6 @@ "fa_icon": "fas fa-terminal", "description": "Define wether to run stats.", "properties": { - "evaluate": { - "type": "boolean", - "fa_icon": "fas fa-magnifying-glass", - "description": "Run the evaluations computation on the msa." - }, "skip_eval": { "type": "boolean", "fa_icon": "fas fa-fast-forward", @@ -142,14 +151,12 @@ "calc_sp": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Calculate the Sum of Pairs of alignment.", - "default": true + "description": "Calculate the Sum of Pairs of alignment." }, "calc_tc": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Calculate the Total Column Score of alignment.", - "default": true + "description": "Calculate the Total Column Score of alignment." }, "calc_irmsd": { "type": "boolean", @@ -187,8 +194,7 @@ }, "shiny_trace_mode": { "type": "string", - "description": "variable containing the shiny_trace mode to be used.", - "default": "latest" + "description": "variable containing the shiny_trace mode to be used." } } }, @@ -252,41 +258,6 @@ } } }, - "max_job_request_options": { - "title": "Max job request options", - "type": "object", - "fa_icon": "fab fa-acquisitions-incorporated", - "description": "Set the top limit for requested resources for any single job.", - "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", - "properties": { - "max_cpus": { - "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job.", - "default": 16, - "fa_icon": "fas fa-microchip", - "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" - }, - "max_memory": { - "type": "string", - "description": "Maximum amount of memory that can be requested for any single job.", - "default": "128.GB", - "fa_icon": "fas fa-memory", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" - }, - "max_time": { - "type": "string", - "description": "Maximum amount of time that can be requested for any single job.", - "default": "240.h", - "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", - "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" - } - } - }, "generic_options": { "title": "Generic options", "type": "object", @@ -294,12 +265,6 @@ "description": "Less common options for the pipeline, typically set in a config file.", "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", "properties": { - "help": { - "type": "boolean", - "description": "Display help text.", - "fa_icon": "fas fa-question-circle", - "hidden": true - }, "version": { "type": "boolean", "description": "Display version and exit.", @@ -375,27 +340,6 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "validationShowHiddenParams": { - "type": "boolean", - "fa_icon": "far fa-eye-slash", - "description": "Show all params when using `--help`", - "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." - }, - "validationFailUnrecognisedParams": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters fails when an unrecognised parameter is found.", - "hidden": true, - "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." - }, - "validationLenientMode": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters in lenient more.", - "hidden": true, - "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." - }, "pipelines_testdata_base_path": { "type": "string", "fa_icon": "far fa-check-circle", @@ -408,31 +352,31 @@ }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/$defs/input_output_options" }, { - "$ref": "#/definitions/tool_selectors" + "$ref": "#/$defs/align_options" }, { - "$ref": "#/definitions/stats_options" + "$ref": "#/$defs/stats_options" }, { - "$ref": "#/definitions/eval_options" + "$ref": "#/$defs/eval_options" }, { - "$ref": "#/definitions/reports_options" + "$ref": "#/$defs/reports_options" }, { - "$ref": "#/definitions/compression" + "$ref": "#/$defs/compression" }, { - "$ref": "#/definitions/institutional_config_options" + "$ref": "#/$defs/reference_genome_options" }, { - "$ref": "#/definitions/max_job_request_options" + "$ref": "#/$defs/institutional_config_options" }, { - "$ref": "#/definitions/generic_options" + "$ref": "#/$defs/generic_options" } ] } diff --git a/subworkflows/local/generate_downstream_samplesheet/main.nf b/subworkflows/local/generate_downstream_samplesheet/main.nf index ff288f70..07d77fbc 100644 --- a/subworkflows/local/generate_downstream_samplesheet/main.nf +++ b/subworkflows/local/generate_downstream_samplesheet/main.nf @@ -4,6 +4,8 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { samplesheetToList } from 'plugin/nf-schema' + workflow SAMPLESHEET_EVALUATION { take: ch_msa // channel: [ meta, /path/to/file.aln ] @@ -12,13 +14,24 @@ workflow SAMPLESHEET_EVALUATION { outdir // params.outdir main: - def ch_list_for_samplesheet = ch_msa + // Try reading an existing samplesheet + def samplesheet = file("${outdir}/downstream_samplesheets/evaluation.csv") + def ch_existing_samplesheet = Channel.empty() + if (samplesheet.exists()) { + ch_existing_samplesheet = Channel.fromList(samplesheetToList(samplesheet, "${projectDir}/assets/schema_evaluate.json")) + } + // Create a channel with the new values for the samplesheet + def ch_info_for_samplesheet = ch_msa .join(ch_references, by: 0, remainder: true) .join(ch_structures, by: 0, remainder: true) .map { meta, msa, reference, structure -> - def sample = meta.id - [id: sample, msa: msa, reference: reference, structure: structure] + [id: meta.id, msa: msa, reference: reference, structure: structure] } + // Join both channels + ch_existing_samplesheet + .mix(ch_info_for_samplesheet) + .unique() + .set { ch_list_for_samplesheet } channelToSamplesheet(ch_list_for_samplesheet, "${outdir}/downstream_samplesheets/evaluation") } @@ -29,10 +42,22 @@ workflow SAMPLESHEET_STATS { outdir main: - def ch_list_for_samplesheet = stats_summary + // Try reading an existing samplesheet + def samplesheet = file("${outdir}/downstream_samplesheets/stats.csv") + def ch_existing_samplesheet = Channel.empty() + if (samplesheet.exists()) { + ch_existing_samplesheet = Channel.fromList(samplesheetToList(samplesheet, "${projectDir}/assets/schema_stats.json")) + } + // Create a channel with the new values for the samplesheet + def ch_info_for_samplesheet = stats_summary .map { meta, csv -> - [stats: csv] + [id: meta.id, stats: csv] } + // Join both channels + ch_existing_samplesheet + .mix(ch_info_for_samplesheet) + .unique() + .set { ch_list_for_samplesheet } channelToSamplesheet(ch_list_for_samplesheet, "${outdir}/downstream_samplesheets/stats") } diff --git a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf index f1fb1139..b6e1e853 100644 --- a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf @@ -10,7 +10,7 @@ include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' include { paramsSummaryMap } from 'plugin/nf-validation' -include { fromSamplesheet } from 'plugin/nf-validation' +include { samplesheetToList } from 'plugin/nf-schema' include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' @@ -76,7 +76,7 @@ workflow PIPELINE_INITIALISATION { // // Create channel from input file provided through params.input // - ch_input = Channel.fromSamplesheet('input') + ch_input = Channel.fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) emit: samplesheet = ch_input From be314b48e7e9cad6724e2a80e62a21aefc89d1f6 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Mon, 18 Nov 2024 15:38:35 +0100 Subject: [PATCH 12/23] update to nf-schema and read existing samplesheet to generate downstream samplesheet --- assets/schema_evaluate.json | 25 ++- assets/schema_input.json | 16 +- assets/schema_stats.json | 12 +- conf/base.config | 34 +-- conf/test.config | 13 +- main.nf | 1 - modules.json | 4 +- nextflow.config | 92 +++----- .../generate_downstream_samplesheet/main.nf | 9 +- .../main.nf | 17 +- .../nf-core/utils_nfschema_plugin/main.nf | 46 ++++ .../nf-core/utils_nfschema_plugin/meta.yml | 35 +++ .../utils_nfschema_plugin/tests/main.nf.test | 117 ++++++++++ .../tests/nextflow.config | 8 + .../tests/nextflow_schema.json | 8 +- .../nf-core/utils_nfvalidation_plugin/main.nf | 61 ------ .../utils_nfvalidation_plugin/meta.yml | 44 ---- .../tests/main.nf.test | 200 ------------------ .../utils_nfvalidation_plugin/tests/tags.yml | 2 - workflows/evaluatemsa.nf | 2 +- workflows/multiplesequencealign.nf | 2 +- 21 files changed, 318 insertions(+), 430 deletions(-) create mode 100644 subworkflows/nf-core/utils_nfschema_plugin/main.nf create mode 100644 subworkflows/nf-core/utils_nfschema_plugin/meta.yml create mode 100644 subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test create mode 100644 subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config rename subworkflows/nf-core/{utils_nfvalidation_plugin => utils_nfschema_plugin}/tests/nextflow_schema.json (95%) delete mode 100644 subworkflows/nf-core/utils_nfvalidation_plugin/main.nf delete mode 100644 subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml delete mode 100644 subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test delete mode 100644 subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml diff --git a/assets/schema_evaluate.json b/assets/schema_evaluate.json index 44664d8e..098f3fef 100644 --- a/assets/schema_evaluate.json +++ b/assets/schema_evaluate.json @@ -1,8 +1,8 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/nf-core/multiplesequencealign/master/assets/schema_input.json", - "title": "nf-core/multiplesequencealign pipeline - params.input schema", - "description": "Schema for the file provided with params.input", + "$id": "https://raw.githubusercontent.com/nf-core/multiplesequencealign/master/assets/schema_evaluate.json", + "title": "nf-core/multiplesequencealign pipeline - schema for the evaluation workflow", + "description": "Schema for the evaluation workflow", "type": "array", "items": { "type": "object", @@ -14,19 +14,24 @@ "meta": ["id"] }, "msa": { - "type": "file", - "pattern": "^\\S+\\.aln$", + "type": "string", + "format": "file-path", + "pattern": "^\\S+\\.aln(.gz)$", "description": "aln file containing the MSA", - "errorMessage": "Must end with .aln" + "errorMessage": "Must end with .aln", + "meta": ["msa"] }, "reference": { - "type": "file" + "type": "string", + "format": "file-path", + "meta": ["reference"] }, "structures": { - "type": "file" + "type": "string", + "format": "file-path", + "meta": ["structures"] } }, - "required": ["id"], - "anyOf": [{ "required": ["msa"] }, { "required": ["structures"] }] + "anyOf": [{ "required": ["id", "msa"] }, { "required": ["id", "structures"] }] } } diff --git a/assets/schema_input.json b/assets/schema_input.json index b5a39749..224a93a6 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -16,19 +16,23 @@ "fasta": { "type": "string", "pattern": "^\\S+\\.f(ast)?a$", - "errorMessage": "fasta file. Must end with .fa or .fasta" + "errorMessage": "fasta file. Must end with .fa or .fasta", + "default": "" }, "reference": { - "type": "string" + "type": "string", + "default": "" }, - "structures": { - "type": "string" + "dependencies": { + "type": "string", + "default": "" }, "template": { - "type": "string" + "type": "string", + "default": "" } }, "required": ["id"], - "anyOf": [{ "required": ["fasta"] }, { "required": ["structures"] }] + "anyOf": [{ "required": ["fasta"] }, { "required": ["dependencies"] }] } } diff --git a/assets/schema_stats.json b/assets/schema_stats.json index 06516bcd..a6da6228 100644 --- a/assets/schema_stats.json +++ b/assets/schema_stats.json @@ -1,8 +1,8 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/nf-core/multiplesequencealign/master/assets/schema_input.json", - "title": "nf-core/multiplesequencealign pipeline - params.input schema", - "description": "Schema for the file provided with params.input", + "$id": "https://raw.githubusercontent.com/nf-core/multiplesequencealign/master/assets/schema_stats.json", + "title": "nf-core/multiplesequencealign pipeline - schema", + "description": "Schema for the stats file", "type": "array", "items": { "type": "object", @@ -14,10 +14,12 @@ "meta": ["id"] }, "stats": { - "type": "file", + "type": "string", + "format": "file-path", "pattern": "^\\S+\\.csv$", "description": "dsv file containing the stats of the input sequences.", - "errorMessage": "Must end with .csv" + "errorMessage": "Must end with .csv", + "meta": ["stats"] } }, "required": ["id", "stats"] diff --git a/conf/base.config b/conf/base.config index 03a21cd2..e6a41504 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,9 +10,9 @@ process { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 6.GB } + time = { 4.h } errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 @@ -25,30 +25,30 @@ process { // adding in your local modules too. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 6.GB } + time = { 4.h } } withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 2 } + memory = { 12.GB } + time = { 4.h } } withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { 6 } + memory = { 36.GB } + time = { 8.h } } withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + cpus = { 12 } + memory = { 72.GB } + time = { 16.h } } withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } + time = { 20.h } } withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + memory = { 200.GB } } withLabel:error_ignore { errorStrategy = 'ignore' diff --git a/conf/test.config b/conf/test.config index 5922211c..ca07b152 100644 --- a/conf/test.config +++ b/conf/test.config @@ -9,16 +9,17 @@ ---------------------------------------------------------------------------------------- */ - +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} params { config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - skip_stats = false calc_sim = true calc_seq_stats = true diff --git a/main.nf b/main.nf index 400b96cc..77e03cc6 100644 --- a/main.nf +++ b/main.nf @@ -89,7 +89,6 @@ workflow { // PIPELINE_INITIALISATION ( params.version, - params.help, params.validate_params, params.monochrome_logs, args, diff --git a/modules.json b/modules.json index 757e7e38..7787c818 100644 --- a/modules.json +++ b/modules.json @@ -165,9 +165,9 @@ "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", "installed_by": ["subworkflows"] }, - "utils_nfvalidation_plugin": { + "utils_nfschema_plugin": { "branch": "master", - "git_sha": "f533459a222ac53eb4c6bb7a5f574e4069197cdb", + "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", "installed_by": ["subworkflows"] } } diff --git a/nextflow.config b/nextflow.config index cc2b9ca3..97e6b792 100644 --- a/nextflow.config +++ b/nextflow.config @@ -6,10 +6,6 @@ ---------------------------------------------------------------------------------------- */ -plugins { - id 'nf-validation@0.3.1' -} - // Global default params, used in configs params { @@ -76,20 +72,6 @@ params { custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_contact = null config_profile_url = null - - // Max resource options - // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' - - // Schema validation default options - validationFailUnrecognisedParams = false - validationLenientMode = false - validationSchemaIgnoreParams = '' - validationShowHiddenParams = false - validate_params = true - } // Load base.config by default for all pipelines @@ -221,13 +203,6 @@ docker.registry = 'quay.io' podman.registry = 'quay.io' singularity.registry = 'quay.io' -// Nextflow plugins -plugins { - id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet - id 'nf-schema@2.2.0' -} - - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -275,38 +250,41 @@ manifest { doi = '' } -// Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' +// Nextflow plugins +plugins { + id 'nf-schema@2.2.0' +} -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } +validation { + defaultIgnoreParams = ["genomes", "fasta", "igenomes_base", "genome"] + help { + enabled = true + command = "nextflow run $manifest.name -profile --input samplesheet.csv --outdir " + fullParameter = "help_full" + showHiddenParameter = "show_hidden" + beforeText = """ +-\033[2m----------------------------------------------------\033[0m- + \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m +\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m +\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m +\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m + \033[0;32m`._,._,\'\033[0m +\033[0;35m ${manifest.name} ${manifest.version}\033[0m +-\033[2m----------------------------------------------------\033[0m- +""" + afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} +* The nf-core framework + https://doi.org/10.1038/s41587-020-0439-x + +* Software dependencies + https://github.com/${manifest.name}/blob/master/CITATIONS.md +""" + } + summary { + beforeText = validation.help.beforeText + afterText = validation.help.afterText } } + +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' diff --git a/subworkflows/local/generate_downstream_samplesheet/main.nf b/subworkflows/local/generate_downstream_samplesheet/main.nf index 07d77fbc..4fa587fd 100644 --- a/subworkflows/local/generate_downstream_samplesheet/main.nf +++ b/subworkflows/local/generate_downstream_samplesheet/main.nf @@ -19,15 +19,19 @@ workflow SAMPLESHEET_EVALUATION { def ch_existing_samplesheet = Channel.empty() if (samplesheet.exists()) { ch_existing_samplesheet = Channel.fromList(samplesheetToList(samplesheet, "${projectDir}/assets/schema_evaluate.json")) + .flatten() } // Create a channel with the new values for the samplesheet def ch_info_for_samplesheet = ch_msa + .view() .join(ch_references, by: 0, remainder: true) .join(ch_structures, by: 0, remainder: true) .map { meta, msa, reference, structure -> - [id: meta.id, msa: msa, reference: reference, structure: structure] + [id: meta.id, msa: msa, reference: reference, structures: structure] } // Join both channels + ch_existing_samplesheet.dump(tag: "existing") + ch_info_for_samplesheet.dump(tag: "new") ch_existing_samplesheet .mix(ch_info_for_samplesheet) .unique() @@ -47,6 +51,7 @@ workflow SAMPLESHEET_STATS { def ch_existing_samplesheet = Channel.empty() if (samplesheet.exists()) { ch_existing_samplesheet = Channel.fromList(samplesheetToList(samplesheet, "${projectDir}/assets/schema_stats.json")) + .flatten() } // Create a channel with the new values for the samplesheet def ch_info_for_samplesheet = stats_summary @@ -54,6 +59,8 @@ workflow SAMPLESHEET_STATS { [id: meta.id, stats: csv] } // Join both channels + ch_existing_samplesheet.dump(tag: "existing") + ch_info_for_samplesheet.dump(tag: "new") ch_existing_samplesheet .mix(ch_info_for_samplesheet) .unique() diff --git a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf index b6e1e853..bfbe4eea 100644 --- a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf @@ -8,8 +8,8 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' -include { paramsSummaryMap } from 'plugin/nf-validation' +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { paramsSummaryMap } from 'plugin/nf-schema' include { samplesheetToList } from 'plugin/nf-schema' include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' @@ -30,7 +30,6 @@ workflow PIPELINE_INITIALISATION { take: version // boolean: Display version and exit - help // boolean: Display help text validate_params // boolean: Boolean whether to validate parameters against the schema at runtime monochrome_logs // boolean: Do not use coloured log outputs nextflow_cli_args // array: List of positional nextflow CLI args @@ -54,16 +53,10 @@ workflow PIPELINE_INITIALISATION { // // Validate parameters and generate parameter summary to stdout // - pre_help_text = nfCoreLogo(monochrome_logs) - post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) - def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " - UTILS_NFVALIDATION_PLUGIN ( - help, - workflow_command, - pre_help_text, - post_help_text, + UTILS_NFSCHEMA_PLUGIN ( + workflow, validate_params, - "nextflow_schema.json" + null ) // diff --git a/subworkflows/nf-core/utils_nfschema_plugin/main.nf b/subworkflows/nf-core/utils_nfschema_plugin/main.nf new file mode 100644 index 00000000..4994303e --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/main.nf @@ -0,0 +1,46 @@ +// +// Subworkflow that uses the nf-schema plugin to validate parameters and render the parameter summary +// + +include { paramsSummaryLog } from 'plugin/nf-schema' +include { validateParameters } from 'plugin/nf-schema' + +workflow UTILS_NFSCHEMA_PLUGIN { + + take: + input_workflow // workflow: the workflow object used by nf-schema to get metadata from the workflow + validate_params // boolean: validate the parameters + parameters_schema // string: path to the parameters JSON schema. + // this has to be the same as the schema given to `validation.parametersSchema` + // when this input is empty it will automatically use the configured schema or + // "${projectDir}/nextflow_schema.json" as default. This input should not be empty + // for meta pipelines + + main: + + // + // Print parameter summary to stdout. This will display the parameters + // that differ from the default given in the JSON schema + // + if(parameters_schema) { + log.info paramsSummaryLog(input_workflow, parameters_schema:parameters_schema) + } else { + log.info paramsSummaryLog(input_workflow) + } + + // + // Validate the parameters using nextflow_schema.json or the schema + // given via the validation.parametersSchema configuration option + // + if(validate_params) { + if(parameters_schema) { + validateParameters(parameters_schema:parameters_schema) + } else { + validateParameters() + } + } + + emit: + dummy_emit = true +} + diff --git a/subworkflows/nf-core/utils_nfschema_plugin/meta.yml b/subworkflows/nf-core/utils_nfschema_plugin/meta.yml new file mode 100644 index 00000000..f7d9f028 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/meta.yml @@ -0,0 +1,35 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "utils_nfschema_plugin" +description: Run nf-schema to validate parameters and create a summary of changed parameters +keywords: + - validation + - JSON schema + - plugin + - parameters + - summary +components: [] +input: + - input_workflow: + type: object + description: | + The workflow object of the used pipeline. + This object contains meta data used to create the params summary log + - validate_params: + type: boolean + description: Validate the parameters and error if invalid. + - parameters_schema: + type: string + description: | + Path to the parameters JSON schema. + This has to be the same as the schema given to the `validation.parametersSchema` config + option. When this input is empty it will automatically use the configured schema or + "${projectDir}/nextflow_schema.json" as default. The schema should not be given in this way + for meta pipelines. +output: + - dummy_emit: + type: boolean + description: Dummy emit to make nf-core subworkflows lint happy +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test new file mode 100644 index 00000000..8fb30164 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test @@ -0,0 +1,117 @@ +nextflow_workflow { + + name "Test Subworkflow UTILS_NFSCHEMA_PLUGIN" + script "../main.nf" + workflow "UTILS_NFSCHEMA_PLUGIN" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/utils_nfschema_plugin" + tag "plugin/nf-schema" + + config "./nextflow.config" + + test("Should run nothing") { + + when { + + params { + test_data = '' + } + + workflow { + """ + validate_params = false + input[0] = workflow + input[1] = validate_params + input[2] = "" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should validate params") { + + when { + + params { + test_data = '' + outdir = null + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "" + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ Validation of pipeline parameters failed!') } } + ) + } + } + + test("Should run nothing - custom schema") { + + when { + + params { + test_data = '' + } + + workflow { + """ + validate_params = false + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should validate params - custom schema") { + + when { + + params { + test_data = '' + outdir = null + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config new file mode 100644 index 00000000..0907ac58 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -0,0 +1,8 @@ +plugins { + id "nf-schema@2.1.0" +} + +validation { + parametersSchema = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + monochromeLogs = true +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json similarity index 95% rename from subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json rename to subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json index 7626c1c9..331e0d2f 100644 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", "title": ". pipeline parameters", "description": "", "type": "object", - "definitions": { + "$defs": { "input_output_options": { "title": "Input/output options", "type": "object", @@ -87,10 +87,10 @@ }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/$defs/input_output_options" }, { - "$ref": "#/definitions/generic_options" + "$ref": "#/$defs/generic_options" } ] } diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf deleted file mode 100644 index 2398c620..00000000 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf +++ /dev/null @@ -1,61 +0,0 @@ -// -// Subworkflow that uses the nf-validation plugin to render help text and parameter summary -// - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-VALIDATION PLUGIN -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { paramsHelp } from 'plugin/nf-validation' -include { paramsSummaryLog } from 'plugin/nf-validation' -include { validateParameters } from 'plugin/nf-validation' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUBWORKFLOW DEFINITION -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow UTILS_NFVALIDATION_PLUGIN { - take: - print_help // boolean: print help - workflow_command // string: default commmand used to run pipeline - pre_help_text // string: string to be printed before help text and summary log - post_help_text // string: string to be printed after help text and summary log - validate_params // boolean: validate parameters - schema_filename // path: JSON schema file, null to use default value - - main: - - log.debug("Using schema file: ${schema_filename}") - - // Default values for strings - pre_help_text = pre_help_text ?: '' - post_help_text = post_help_text ?: '' - workflow_command = workflow_command ?: '' - - // - // Print help message if needed - // - if (print_help) { - log.info(pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text) - System.exit(0) - } - - // - // Print parameter summary to stdout - // - log.info(pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text) - - // - // Validate parameters relative to the parameter JSON schema - // - if (validate_params) { - validateParameters(parameters_schema: schema_filename) - } - - emit: - dummy_emit = true -} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml deleted file mode 100644 index 3d4a6b04..00000000 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml +++ /dev/null @@ -1,44 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json -name: "UTILS_NFVALIDATION_PLUGIN" -description: Use nf-validation to initiate and validate a pipeline -keywords: - - utility - - pipeline - - initialise - - validation -components: [] -input: - - print_help: - type: boolean - description: | - Print help message and exit - - workflow_command: - type: string - description: | - The command to run the workflow e.g. "nextflow run main.nf" - - pre_help_text: - type: string - description: | - Text to print before the help message - - post_help_text: - type: string - description: | - Text to print after the help message - - validate_params: - type: boolean - description: | - Validate the parameters and error if invalid. - - schema_filename: - type: string - description: | - The filename of the schema to validate against. -output: - - dummy_emit: - type: boolean - description: | - Dummy emit to make nf-core subworkflows lint happy -authors: - - "@adamrtalbot" -maintainers: - - "@adamrtalbot" - - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test deleted file mode 100644 index c50b1fb5..00000000 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test +++ /dev/null @@ -1,200 +0,0 @@ -nextflow_workflow { - - name "Test Workflow UTILS_NFVALIDATION_PLUGIN" - script "../main.nf" - workflow "UTILS_NFVALIDATION_PLUGIN" - tag "subworkflows" - tag "subworkflows_nfcore" - tag "plugin/nf-validation" - tag "'plugin/nf-validation'" - tag "utils_nfvalidation_plugin" - tag "subworkflows/utils_nfvalidation_plugin" - - test("Should run nothing") { - - when { - - params { - monochrome_logs = true - test_data = '' - } - - workflow { - """ - help = false - workflow_command = null - pre_help_text = null - post_help_text = null - validate_params = false - schema_filename = "$moduleTestDir/nextflow_schema.json" - - input[0] = help - input[1] = workflow_command - input[2] = pre_help_text - input[3] = post_help_text - input[4] = validate_params - input[5] = schema_filename - """ - } - } - - then { - assertAll( - { assert workflow.success } - ) - } - } - - test("Should run help") { - - - when { - - params { - monochrome_logs = true - test_data = '' - } - workflow { - """ - help = true - workflow_command = null - pre_help_text = null - post_help_text = null - validate_params = false - schema_filename = "$moduleTestDir/nextflow_schema.json" - - input[0] = help - input[1] = workflow_command - input[2] = pre_help_text - input[3] = post_help_text - input[4] = validate_params - input[5] = schema_filename - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert workflow.exitStatus == 0 }, - { assert workflow.stdout.any { it.contains('Input/output options') } }, - { assert workflow.stdout.any { it.contains('--outdir') } } - ) - } - } - - test("Should run help with command") { - - when { - - params { - monochrome_logs = true - test_data = '' - } - workflow { - """ - help = true - workflow_command = "nextflow run noorg/doesntexist" - pre_help_text = null - post_help_text = null - validate_params = false - schema_filename = "$moduleTestDir/nextflow_schema.json" - - input[0] = help - input[1] = workflow_command - input[2] = pre_help_text - input[3] = post_help_text - input[4] = validate_params - input[5] = schema_filename - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert workflow.exitStatus == 0 }, - { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, - { assert workflow.stdout.any { it.contains('Input/output options') } }, - { assert workflow.stdout.any { it.contains('--outdir') } } - ) - } - } - - test("Should run help with extra text") { - - - when { - - params { - monochrome_logs = true - test_data = '' - } - workflow { - """ - help = true - workflow_command = "nextflow run noorg/doesntexist" - pre_help_text = "pre-help-text" - post_help_text = "post-help-text" - validate_params = false - schema_filename = "$moduleTestDir/nextflow_schema.json" - - input[0] = help - input[1] = workflow_command - input[2] = pre_help_text - input[3] = post_help_text - input[4] = validate_params - input[5] = schema_filename - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert workflow.exitStatus == 0 }, - { assert workflow.stdout.any { it.contains('pre-help-text') } }, - { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, - { assert workflow.stdout.any { it.contains('Input/output options') } }, - { assert workflow.stdout.any { it.contains('--outdir') } }, - { assert workflow.stdout.any { it.contains('post-help-text') } } - ) - } - } - - test("Should validate params") { - - when { - - params { - monochrome_logs = true - test_data = '' - outdir = false - } - workflow { - """ - help = false - workflow_command = null - pre_help_text = null - post_help_text = null - validate_params = true - schema_filename = "$moduleTestDir/nextflow_schema.json" - - input[0] = help - input[1] = workflow_command - input[2] = pre_help_text - input[3] = post_help_text - input[4] = validate_params - input[5] = schema_filename - """ - } - } - - then { - assertAll( - { assert workflow.failed }, - { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } - ) - } - } -} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml deleted file mode 100644 index 60b1cfff..00000000 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/utils_nfvalidation_plugin: - - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/workflows/evaluatemsa.nf b/workflows/evaluatemsa.nf index d58c9a50..5a5611e5 100644 --- a/workflows/evaluatemsa.nf +++ b/workflows/evaluatemsa.nf @@ -14,7 +14,7 @@ include { CSVTK_JOIN as MERGE_STATS_EVAL } from '../modules/nf-core/csvtk/join/m include { EVALUATE } from '../subworkflows/local/evaluate' // FUNCTIONS -include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMap } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { samplesheetToList } from 'plugin/nf-schema' diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index 50165c13..bd636c00 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -12,7 +12,7 @@ include { MULTIQC } from '../modules/local/multiqc' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMap } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_multiplesequencealign_pipeline' From 79a714fa02d4d82e9e8f4ac0ac7a928cd736b1d6 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 19 Nov 2024 12:31:55 +0100 Subject: [PATCH 13/23] more fixes to make downstream samplesheet work + start adding params to meta map --- assets/schema_evaluate.json | 30 ++++++++++++++ assets/schema_input.json | 10 ++--- conf/modules.config | 18 ++++---- main.nf | 33 +++++++++------ .../generate_downstream_samplesheet/main.nf | 41 +++++++++++++++---- .../main.nf | 2 +- workflows/evaluatemsa.nf | 26 +++++++++--- workflows/multiplesequencealign.nf | 2 +- 8 files changed, 119 insertions(+), 43 deletions(-) diff --git a/assets/schema_evaluate.json b/assets/schema_evaluate.json index 098f3fef..e528cb97 100644 --- a/assets/schema_evaluate.json +++ b/assets/schema_evaluate.json @@ -13,6 +13,36 @@ "errorMessage": "Sample name must be provided and cannot contain spaces", "meta": ["id"] }, + "alignment": { + "type": "string", + "description": "the alignment tool from params.alignment", + "meta": ["alignment"] + }, + "alignment_args": { + "type": "string", + "description": "the alignment arguments from params.alignment_args", + "meta": ["alignment_args"] + }, + "guidetree": { + "type": "string", + "description": "the guidetree tool from params.guidetree", + "meta": ["guidetree"] + }, + "guidetree_args": { + "type": "string", + "description": "the guidetree arguments from params.guidetree_args", + "meta": ["guidetree_args"] + }, + "treealign": { + "type": "string", + "description": "the treealign tool from params.treealign", + "meta": ["treealign"] + }, + "treealign_args": { + "type": "string", + "description": "the treealign arguments from params.treealign_args", + "meta": ["treealign_args"] + }, "msa": { "type": "string", "format": "file-path", diff --git a/assets/schema_input.json b/assets/schema_input.json index 224a93a6..b31d2f7a 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -15,21 +15,21 @@ }, "fasta": { "type": "string", + "format": "file-path", "pattern": "^\\S+\\.f(ast)?a$", - "errorMessage": "fasta file. Must end with .fa or .fasta", - "default": "" + "errorMessage": "fasta file. Must end with .fa or .fasta" }, "reference": { "type": "string", - "default": "" + "format": "file-path" }, "dependencies": { "type": "string", - "default": "" + "format": "file-path" }, "template": { "type": "string", - "default": "" + "format": "file-path" } }, "required": ["id"], diff --git a/conf/modules.config b/conf/modules.config index 7705fd8a..6d244bfc 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -110,9 +110,9 @@ } withName: "MTMALIGN_ALIGN"{ - tag = { "${meta.id} tree:${meta.tree} argstree:${args_tree} args:${meta.args_aligner}" } - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } - ext.args = { "${meta.args_aligner}" == "null" ? '' : "${meta.args_aligner}" } + tag = { "${meta.id} tree:${meta.guidetree} argstree:${args_tree} args:${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_${meta.alignment.replace("_", "-")}-args-${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}" } + ext.args = { "${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}" == "null" ? '' : "${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}" } if(params.skip_compression){ publishDir = [ path: { "${params.outdir}/alignment/${meta.id}" }, @@ -137,21 +137,21 @@ // withName: 'PARSE_IRMSD'{ - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_irmsd" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_${meta.alignment.replace("_", "-")}-args-${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_irmsd" } } withName: 'TCOFFEE_ALNCOMPARE_SP'{ - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_sp" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_${meta.alignment.replace("_", "-")}-args-${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_sp" } ext.args = "-compare_mode sp" } withName: 'TCOFFEE_ALNCOMPARE_TC'{ - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_tc" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_${meta.alignment.replace("_", "-")}-args-${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_tc" } ext.args = "-compare_mode tc" } withName: 'TCOFFEE_IRMSD'{ - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_irmsd" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_${meta.alignment.replace("_", "-")}-args-${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_irmsd" } publishDir = [ path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: params.publish_dir_mode, @@ -160,7 +160,7 @@ } withName: "CALC_GAPS"{ - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_gaps" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_${meta.alignment.replace("_", "-")}-args-${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_gaps" } } withName: "CONCAT_IRMSD"{ @@ -184,7 +184,7 @@ } withName: 'TCOFFEE_TCS'{ - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_tcs" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_${meta.alignment.replace("_", "-")}-args-${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_tcs" } publishDir = [ path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: params.publish_dir_mode, diff --git a/main.nf b/main.nf index 77e03cc6..662b4740 100644 --- a/main.nf +++ b/main.nf @@ -73,6 +73,9 @@ workflow NFCORE_EVALUATEMSA { ch_versions, outdir ) + + emit: + multiqc_report = EVALUATEMSA.out.multiqc } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -84,17 +87,7 @@ workflow { main: - // - // SUBWORKFLOW: Run initialisation tasks - // - PIPELINE_INITIALISATION ( - params.version, - params.validate_params, - params.monochrome_logs, - args, - params.outdir, - params.input, - ) + def ch_multiqc_report = Channel.empty() if (params.evaluate) { // WORKFLOW: Run evaluation workflow @@ -103,7 +96,21 @@ workflow { "${params.outdir}/downstream_samplesheets/stats.csv", params.outdir ) + + ch_multiqc_report = NFCORE_EVALUATEMSA.out.multiqc_report } else { + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + params.input, + ) + // // WORKFLOW: Run main workflow // @@ -111,6 +118,8 @@ workflow { PIPELINE_INITIALISATION.out.samplesheet, params.outdir ) + + ch_multiqc_report = NFCORE_MULTIPLESEQUENCEALIGN.out.multiqc_report } // @@ -123,7 +132,7 @@ workflow { params.outdir, params.monochrome_logs, params.hook_url, - NFCORE_MULTIPLESEQUENCEALIGN.out.multiqc_report, + ch_multiqc_report, "${params.outdir}/shiny_app", "${params.outdir}/pipeline_info", params.shiny_trace_mode, diff --git a/subworkflows/local/generate_downstream_samplesheet/main.nf b/subworkflows/local/generate_downstream_samplesheet/main.nf index 4fa587fd..c8f92d1a 100644 --- a/subworkflows/local/generate_downstream_samplesheet/main.nf +++ b/subworkflows/local/generate_downstream_samplesheet/main.nf @@ -20,18 +20,36 @@ workflow SAMPLESHEET_EVALUATION { if (samplesheet.exists()) { ch_existing_samplesheet = Channel.fromList(samplesheetToList(samplesheet, "${projectDir}/assets/schema_evaluate.json")) .flatten() + .map { it -> + if (it.reference) { + [id: it.id, + alignment:it.alignment, alignment_args:it.alignment_args, + guidetree:it.guidetree, guidetree_args:it.guidetree_args, + treealign:it.treealign, treealign_args:it.treealign_args, + msa: it.msa, reference: it.reference.toUri(), structures: it.structures] + } else { + [id: it.id, + alignment:it.alignment, alignment_args:it.alignment_args, + guidetree:it.guidetree, guidetree_args:it.guidetree_args, + treealign:it.treealign, treealign_args:it.treealign_args, + msa: it.msa, reference: it.reference, structures: it.structures] + } + } } // Create a channel with the new values for the samplesheet def ch_info_for_samplesheet = ch_msa - .view() .join(ch_references, by: 0, remainder: true) .join(ch_structures, by: 0, remainder: true) - .map { meta, msa, reference, structure -> - [id: meta.id, msa: msa, reference: reference, structures: structure] + .flatMap { meta, msa, reference, structures -> + structures.collect { structure -> + [id: meta.id, + alignment:params.alignment, alignment_args:params.alignment_args, + guidetree:params.guidetree, guidetree_args:params.guidetree_args, + treealign:params.treealign, treealign_args:params.treealign_args, + msa: msa, reference: reference.toUri(), structures: structure] + } } // Join both channels - ch_existing_samplesheet.dump(tag: "existing") - ch_info_for_samplesheet.dump(tag: "new") ch_existing_samplesheet .mix(ch_info_for_samplesheet) .unique() @@ -59,8 +77,6 @@ workflow SAMPLESHEET_STATS { [id: meta.id, stats: csv] } // Join both channels - ch_existing_samplesheet.dump(tag: "existing") - ch_info_for_samplesheet.dump(tag: "new") ch_existing_samplesheet .mix(ch_info_for_samplesheet) .unique() @@ -100,8 +116,15 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS { def channelToSamplesheet(ch_list_for_samplesheet, path) { ch_list_for_samplesheet .first() - .map { it -> it.keySet().join(",") } - .concat(ch_list_for_samplesheet.map { it -> it.values().join(",").replace("null", "") }) + .map { it -> + it.keySet().join(",") + } + .concat( + ch_list_for_samplesheet + .map { it -> + it.values().join(",").replace("null", "").replace("[]", "") + } + ) .collectFile( name: "${path}.csv", newLine: true, diff --git a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf index bfbe4eea..d417a9a2 100644 --- a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf @@ -94,7 +94,7 @@ workflow PIPELINE_COMPLETION { multiqc_report // string: Path to MultiQC report shiny_dir_path // string: Path to shiny stats file trace_dir_path // string: Path to trace file - shiny_trace_mode // string: Mode to use for shiny trace file (default: "latest", options: "latest", "all") + shiny_trace_mode // string: Mode to use for shiny trace file (default: "latest", options: "latest", "all") evaluate // boolean: Evaluate the results main: diff --git a/workflows/evaluatemsa.nf b/workflows/evaluatemsa.nf index 5a5611e5..45e10c4e 100644 --- a/workflows/evaluatemsa.nf +++ b/workflows/evaluatemsa.nf @@ -18,6 +18,8 @@ include { paramsSummaryMap } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { samplesheetToList } from 'plugin/nf-schema' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_multiplesequencealign_pipeline' + workflow EVALUATEMSA { @@ -35,12 +37,18 @@ workflow EVALUATEMSA { // // Read evaluate samplesheet and create channels // - ch_input = Channel.fromList(samplesheetToList(evaluate_samplesheet, "${projectDir}/assets/schema_evaluate.json")) + def ch_input = Channel.fromList(samplesheetToList(evaluate_samplesheet, "${projectDir}/assets/schema_evaluate.json")) ch_input - .multiMap { meta, msa, reference, structure -> + .flatten() + .map { it -> + [["id": it.id, "alignment": it.alignment, "alignment_args": it.alignment_args, "guidetree": it.guidetree, "guidetree_args": it.guidetree_args, "treealign": it.treealign, "treealign_args": it.treealign_args], + it.msa, it.reference, it.structures] + } + .groupTuple(by: [0,1,2]) + .multiMap { meta, msa, reference, structures -> msa: [meta, msa] reference: [meta, reference] - structure: [meta, structure] + structures: [meta, structures] } .set { ch_input_multi } @@ -48,7 +56,7 @@ workflow EVALUATEMSA { // Evaluate the quality of the alignment // if (!params.skip_eval) { - EVALUATE (ch_input_multi.msa, ch_input_multi.reference, ch_input_multi.structure) + EVALUATE (ch_input_multi.msa, ch_input_multi.reference, ch_input_multi.structures) ch_versions = ch_versions.mix(EVALUATE.out.versions) evaluation_summary = evaluation_summary.mix(EVALUATE.out.eval_summary) } @@ -57,7 +65,12 @@ workflow EVALUATEMSA { // Combine stats and evaluation reports into a single CSV // if (!params.skip_stats || !params.skip_eval) { - def stats_summary_csv = stats_summary.map{ meta, csv -> csv } + def ch_stats = Channel.fromList(samplesheetToList(stats_summary, "${projectDir}/assets/schema_stats.json")) + .map { it -> + def meta = ["id": it.id] + [ meta, it.stats ] + } + def stats_summary_csv = ch_stats.map{ meta, csv -> csv } def eval_summary_csv = evaluation_summary.map{ meta, csv -> csv } stats_summary_csv.mix(eval_summary_csv) .collect() @@ -92,7 +105,8 @@ workflow EVALUATEMSA { // // MODULE: MultiQC // - def multiqc_out = Channel.empty() + def multiqc_out = Channel.empty() + def ch_multiqc_files = Channel.empty() if (!params.skip_multiqc && (!params.skip_stats || !params.skip_eval)) { ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index bd636c00..b3895946 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -107,7 +107,7 @@ workflow MULTIPLESEQUENCEALIGN { // If the directory is compressed, it is uncompressed first. ch_structures .branch { structures -> - compressed: structures[1].endsWith('.tar.gz') + compressed: structures[1].name.endsWith('.tar.gz') uncompressed: true } .set { ch_structures } From 0ce16ed1b6f61f547e49fcdfffd9c963b6d42213 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 19 Nov 2024 14:06:55 +0100 Subject: [PATCH 14/23] either alignment or guidetree + treealign --- conf/test.config | 2 -- nextflow.config | 2 +- nextflow_schema.json | 41 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/conf/test.config b/conf/test.config index ca07b152..3299859d 100644 --- a/conf/test.config +++ b/conf/test.config @@ -34,6 +34,4 @@ params { // Input data input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.0/samplesheet_test_af2.csv' alignment = 'clustalo/align' - guidetree = 'clustalo/guidetree' - treealign = 'clustalo/treealign' } diff --git a/nextflow.config b/nextflow.config index 97e6b792..288c0cc6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -22,7 +22,6 @@ params { guidetree_args = '' treealign_args = '' - // Stats skip_stats = false calc_sim = false @@ -59,6 +58,7 @@ params { help = false version = false pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + validate_params = true // Shiny options shiny_app = "${projectDir}/bin/shiny_app" diff --git a/nextflow_schema.json b/nextflow_schema.json index b2f88187..8d88067d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -104,6 +104,44 @@ } } }, + "tool_selectors": { + "title": "Tool selectors", + "type": "object", + "description": "Parameters to select which tools to use", + "default": "", + "fa_icon": "fas fa-tools", + "oneOf": [{"required": ["alignment"]}, {"required": ["guidetree", "treealign"]}], + "properties": { + "alignment": { + "type": "string", + "description": "Which aligner tool to use" + }, + "guidetree": { + "type": "string", + "description": "Which tool to use to generate a guide tree" + }, + "treealign": { + "type": "string", + "description": "Which aligner tool to use to align providing a generated guide tree" + }, + "alignment_args": { + "type": "string", + "description": "Arguments to pass to the alignment tool" + }, + "guidetree_args": { + "type": "string", + "description": "Arguments to pass to the guide tree tool" + }, + "treealign_args": { + "type": "string", + "description": "Arguments to pass to the tree" + }, + "evaluate": { + "type": "boolean", + "description": "Run the evaluation workflow" + } + } + }, "stats_options": { "title": "Stats options", "type": "object", @@ -354,6 +392,9 @@ { "$ref": "#/$defs/input_output_options" }, + { + "$ref": "#/$defs/tool_selectors" + }, { "$ref": "#/$defs/align_options" }, From d5810a81e52cd6ea3e0ef31982cc03a0fe549ddf Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Wed, 20 Nov 2024 14:46:55 +0100 Subject: [PATCH 15/23] not compress files + fixes to make the evaluation workflow work --- assets/schema_evaluate.json | 2 +- conf/modules.config | 32 ++++---- main.nf | 22 ++++- modules/mirpedrol/clustalo/align/main.nf | 6 +- modules/mirpedrol/clustalo/treealign/main.nf | 6 +- modules/mirpedrol/famsa/align/main.nf | 8 +- modules/mirpedrol/famsa/treealign/main.nf | 8 +- modules/mirpedrol/kalign/align/main.nf | 6 +- modules/mirpedrol/learnmsa/align/main.nf | 6 +- modules/mirpedrol/mafft/main.nf | 6 +- modules/mirpedrol/magus/align/main.nf | 6 +- modules/mirpedrol/magus/treealign/main.nf | 6 +- modules/mirpedrol/muscle5/super5/main.nf | 7 +- modules/mirpedrol/tcoffee/align/main.nf | 15 +--- modules/mirpedrol/tcoffee/treealign/main.nf | 15 +--- .../generate_downstream_samplesheet/main.nf | 25 ++++-- workflows/evaluatemsa.nf | 5 +- workflows/multiplesequencealign.nf | 81 +++++++++++-------- 18 files changed, 146 insertions(+), 116 deletions(-) diff --git a/assets/schema_evaluate.json b/assets/schema_evaluate.json index e528cb97..8f432787 100644 --- a/assets/schema_evaluate.json +++ b/assets/schema_evaluate.json @@ -46,7 +46,7 @@ "msa": { "type": "string", "format": "file-path", - "pattern": "^\\S+\\.aln(.gz)$", + "pattern": "^\\S+\\.aln$", "description": "aln file containing the MSA", "errorMessage": "Must end with .aln", "meta": ["msa"] diff --git a/conf/modules.config b/conf/modules.config index 6d244bfc..f2b8cca4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -56,6 +56,7 @@ withName: "MERGE_STATS"{ ext.prefix = { "complete_summary_stats" } ext.args = "-f 1 -O" + // If the publishDir path changes, make sure to change the path in generate_downstream_samplesheet publishDir = [ path: { "${params.outdir}/stats/" }, mode: params.publish_dir_mode, @@ -69,9 +70,9 @@ withName: "FAMSA_GUIDETREE|CLUSTALO_GUIDETREE|MAGUS_GUIDETREE"{ ext.args = { params.guidetree_args } - tag = { "${meta.id} args:${ext.args}" } + tag = { "${meta.id} args:${meta.guidetree_args}" } publishDir = [ - path: { "${params.outdir}/trees/${task.process.split(":")[-1].replace("_", "-")}_${params.guidetree_args.trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}" }, + path: { "${params.outdir}/trees/${meta.guidetree}_${meta.guidetree_args}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -83,9 +84,9 @@ withName: "CLUSTALO_TREEALIGN|FAMSA_TREEALIGN|MAGUS_TREEALIGN|TCOFFEE_TREEALIGN"{ ext.args = { params.treealign_args } - tag = { "${meta.id} args:${ext.args}" } + tag = { "${meta.id} args:${meta.treealign_args}" } publishDir = [ - path: { "${params.outdir}/alignment/${task.process.split(":")[-1].replace("_", "-")}_${params.treealign_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_${params.guidetree.replace("/", "-")}_${params.guidetree_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}" }, + path: { "${params.outdir}/alignment/${meta.treealign}_${meta.treealign_args}_${meta.guidetree}_${meta.guidetree_args}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -101,18 +102,19 @@ withName: "CLUSTALO_ALIGN|FAMSA_ALIGN|LEARNMSA_ALIGN|MAFFT|MAGUS_ALIGN|MUSCLE5_SUPER5|REGRESSIVE|TCOFFEE_ALIGN|TCOFFEE3D_ALIGN"{ ext.args = { params.alignment_args } - tag = { "${meta.id} args:${params.alignment_args}" } + tag = { "${meta.id} args:${meta.alignment_args}" } publishDir = [ - path: { "${params.outdir}/alignment/${task.process.split(":")[-1].replace("_", "-")}_${params.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}" }, + path: { "${params.outdir}/alignment/${meta.alignment}_${meta.alignment_args}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: "MTMALIGN_ALIGN"{ - tag = { "${meta.id} tree:${meta.guidetree} argstree:${args_tree} args:${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}" } - ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_${meta.alignment.replace("_", "-")}-args-${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}" } - ext.args = { "${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}" == "null" ? '' : "${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}" } + // unused module for this simplified version + tag = { "${meta.id} tree:${meta.guidetree} argstree:${meta.guidetree_args} args:${meta.treealign_args}" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args}_${meta.treealign}-args-${meta.treealign_args}" } + ext.args = { "${meta.alignment_args}" == "null" ? '' : "${meta.alignment_args}" } if(params.skip_compression){ publishDir = [ path: { "${params.outdir}/alignment/${meta.id}" }, @@ -137,21 +139,21 @@ // withName: 'PARSE_IRMSD'{ - ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_${meta.alignment.replace("_", "-")}-args-${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_irmsd" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args}_${meta.alignment ?: meta.treealign}-args-${meta.alignment_args ?: meta.treealign_args}_irmsd" } } withName: 'TCOFFEE_ALNCOMPARE_SP'{ - ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_${meta.alignment.replace("_", "-")}-args-${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_sp" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args}_${meta.alignment ?: meta.treealign}-args-${meta.alignment_args ?: meta.treealign_args}_sp" } ext.args = "-compare_mode sp" } withName: 'TCOFFEE_ALNCOMPARE_TC'{ - ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_${meta.alignment.replace("_", "-")}-args-${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_tc" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args}_${meta.alignment ?: meta.treealign}-args-${meta.alignment_args ?: meta.treealign_args}_tc" } ext.args = "-compare_mode tc" } withName: 'TCOFFEE_IRMSD'{ - ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_${meta.alignment.replace("_", "-")}-args-${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_irmsd" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args}_${meta.alignment ?: meta.treealign}-args-${meta.alignment_args ?: meta.treealign_args}_irmsd" } publishDir = [ path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: params.publish_dir_mode, @@ -160,7 +162,7 @@ } withName: "CALC_GAPS"{ - ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_${meta.alignment.replace("_", "-")}-args-${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_gaps" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args}_${meta.alignment ?: meta.treealign}-args-${meta.alignment_args ?: meta.treealign_args}_gaps" } } withName: "CONCAT_IRMSD"{ @@ -184,7 +186,7 @@ } withName: 'TCOFFEE_TCS'{ - ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_${meta.alignment.replace("_", "-")}-args-${meta.alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "")}_tcs" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.guidetree_args}_${meta.alignment ?: meta.treealign}-args-${meta.alignment_args ?: meta.treealign_args}_tcs" } publishDir = [ path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: params.publish_dir_mode, diff --git a/main.nf b/main.nf index 662b4740..3dc2da77 100644 --- a/main.nf +++ b/main.nf @@ -36,6 +36,12 @@ workflow NFCORE_MULTIPLESEQUENCEALIGN { take: samplesheet // channel: samplesheet read in from --input outdir + alignment // params.alignment + alignment_args // params.alignment_args + guidetree // params.guidetree + guidetree_args // params.guidetree_args + treealign // params.treealign + treealign_args // params.treealign_args main: def ch_versions = Channel.empty() @@ -46,7 +52,13 @@ workflow NFCORE_MULTIPLESEQUENCEALIGN { MULTIPLESEQUENCEALIGN ( samplesheet, ch_versions, - outdir + outdir, + alignment, + alignment_args, + guidetree, + guidetree_args, + treealign, + treealign_args ) emit: @@ -116,7 +128,13 @@ workflow { // NFCORE_MULTIPLESEQUENCEALIGN ( PIPELINE_INITIALISATION.out.samplesheet, - params.outdir + params.outdir, + params.alignment, + params.alignment_args , + params.guidetree, + params.guidetree_args, + params.treealign, + params.treealign_args ) ch_multiqc_report = NFCORE_MULTIPLESEQUENCEALIGN.out.multiqc_report diff --git a/modules/mirpedrol/clustalo/align/main.nf b/modules/mirpedrol/clustalo/align/main.nf index 55a1113b..bb92ef1c 100644 --- a/modules/mirpedrol/clustalo/align/main.nf +++ b/modules/mirpedrol/clustalo/align/main.nf @@ -11,7 +11,7 @@ process CLUSTALO_ALIGN { tuple val(meta) , path(fasta) output: - tuple val(meta), path("*.aln.gz"), emit: alignment + tuple val(meta), path("*.aln"), emit: alignment path "versions.yml" , emit: versions when: @@ -30,7 +30,7 @@ process CLUSTALO_ALIGN { -i ${fasta} \ --threads=${task.cpus} \ $args \ - --force -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) + > ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -43,7 +43,7 @@ process CLUSTALO_ALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln.gz + touch ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/clustalo/treealign/main.nf b/modules/mirpedrol/clustalo/treealign/main.nf index cfa9c117..9cfc82d8 100644 --- a/modules/mirpedrol/clustalo/treealign/main.nf +++ b/modules/mirpedrol/clustalo/treealign/main.nf @@ -12,7 +12,7 @@ process CLUSTALO_TREEALIGN { tuple val(meta2), path(tree) output: - tuple val(meta), path("*.aln.gz"), emit: alignment + tuple val(meta), path("*.aln"), emit: alignment path "versions.yml" , emit: versions when: @@ -32,7 +32,7 @@ process CLUSTALO_TREEALIGN { --guidetree-in=${tree} \ --threads=${task.cpus} \ $args \ - --force -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) + > ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -45,7 +45,7 @@ process CLUSTALO_TREEALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln.gz + touch ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/famsa/align/main.nf b/modules/mirpedrol/famsa/align/main.nf index ba7c0eb1..cdd3d787 100644 --- a/modules/mirpedrol/famsa/align/main.nf +++ b/modules/mirpedrol/famsa/align/main.nf @@ -13,7 +13,7 @@ process FAMSA_ALIGN { tuple val(meta) , path(fasta) output: - tuple val(meta), path("*.aln.gz"), emit: alignment + tuple val(meta), path("*.aln"), emit: alignment path "versions.yml" , emit: versions when: @@ -23,11 +23,11 @@ process FAMSA_ALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - famsa -gz \\ + famsa \\ $args \\ -t ${task.cpus} \\ ${fasta} \\ - ${prefix}.aln.gz + ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -38,7 +38,7 @@ process FAMSA_ALIGN { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln.gz + touch ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/famsa/treealign/main.nf b/modules/mirpedrol/famsa/treealign/main.nf index 6d1fd80a..0280dd09 100644 --- a/modules/mirpedrol/famsa/treealign/main.nf +++ b/modules/mirpedrol/famsa/treealign/main.nf @@ -14,7 +14,7 @@ process FAMSA_TREEALIGN { tuple val(meta2), path(tree) output: - tuple val(meta), path("*.aln.gz"), emit: alignment + tuple val(meta), path("*.aln"), emit: alignment path "versions.yml" , emit: versions when: @@ -24,12 +24,12 @@ process FAMSA_TREEALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - famsa -gt import $tree \\ + famsa import $tree \\ -gz \\ $args \\ -t ${task.cpus} \\ ${fasta} \\ - ${prefix}.aln.gz + ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -40,7 +40,7 @@ process FAMSA_TREEALIGN { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln.gz + touch ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/kalign/align/main.nf b/modules/mirpedrol/kalign/align/main.nf index 014f5216..ceb4fc23 100644 --- a/modules/mirpedrol/kalign/align/main.nf +++ b/modules/mirpedrol/kalign/align/main.nf @@ -11,7 +11,7 @@ process KALIGN_ALIGN { tuple val(meta), path(fasta) output: - tuple val(meta), path("*.aln.gz"), emit: alignment + tuple val(meta), path("*.aln"), emit: alignment path "versions.yml" , emit: versions when: @@ -24,7 +24,7 @@ process KALIGN_ALIGN { unpigz -cdf $fasta | \\ kalign \\ $args \\ - -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) + -o ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -37,7 +37,7 @@ process KALIGN_ALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln.gz + touch ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/learnmsa/align/main.nf b/modules/mirpedrol/learnmsa/align/main.nf index 365768e0..3cb32576 100644 --- a/modules/mirpedrol/learnmsa/align/main.nf +++ b/modules/mirpedrol/learnmsa/align/main.nf @@ -11,7 +11,7 @@ process LEARNMSA_ALIGN { tuple val(meta), path(fasta) output: - tuple val(meta), path("*.aln.gz"), emit: alignment + tuple val(meta), path("*.aln"), emit: alignment path "versions.yml" , emit: versions when: @@ -24,7 +24,7 @@ process LEARNMSA_ALIGN { learnMSA \\ $args \\ -i <(unpigz -cdf $fasta) \\ - -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) + -o ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -37,7 +37,7 @@ process LEARNMSA_ALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln.gz + touch ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/mafft/main.nf b/modules/mirpedrol/mafft/main.nf index 1ed127b6..92055318 100644 --- a/modules/mirpedrol/mafft/main.nf +++ b/modules/mirpedrol/mafft/main.nf @@ -11,7 +11,7 @@ process MAFFT { tuple val(meta) , path(fasta) output: - tuple val(meta), path("*.aln.gz"), emit: alignment + tuple val(meta), path("*.aln"), emit: alignment path "versions.yml" , emit: versions when: @@ -25,7 +25,7 @@ process MAFFT { --thread ${task.cpus} \\ ${args} \\ ${fasta} \\ - | pigz -cp ${task.cpus} > ${prefix}.aln.gz + > ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -38,7 +38,7 @@ process MAFFT { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln.gz + touch ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/magus/align/main.nf b/modules/mirpedrol/magus/align/main.nf index 18622ddd..f2b292c4 100644 --- a/modules/mirpedrol/magus/align/main.nf +++ b/modules/mirpedrol/magus/align/main.nf @@ -11,7 +11,7 @@ process MAGUS_ALIGN { tuple val(meta) , path(fasta) output: - tuple val(meta), path("*.aln.gz"), emit: alignment + tuple val(meta), path("*.aln"), emit: alignment path "versions.yml" , emit: versions when: @@ -28,7 +28,7 @@ process MAGUS_ALIGN { -np $task.cpus \\ -i $fasta \\ -d ./ \\ - --overwrite -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) \\ + -o ${prefix}.aln \\ $args cat <<-END_VERSIONS > versions.yml @@ -42,7 +42,7 @@ process MAGUS_ALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - echo "" | gzip > ${prefix}.aln.gz + touch ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/magus/treealign/main.nf b/modules/mirpedrol/magus/treealign/main.nf index 0fd93baa..ea3c2ea5 100644 --- a/modules/mirpedrol/magus/treealign/main.nf +++ b/modules/mirpedrol/magus/treealign/main.nf @@ -12,7 +12,7 @@ process MAGUS_TREEALIGN { tuple val(meta2), path(tree) output: - tuple val(meta), path("*.aln.gz"), emit: alignment + tuple val(meta), path("*.aln"), emit: alignment path "versions.yml" , emit: versions when: @@ -29,7 +29,7 @@ process MAGUS_TREEALIGN { -np $task.cpus \\ -i $fasta \\ -d ./ \\ - --overwrite -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) \\ + -o ${prefix}.aln \\ -t $tree \\ $args @@ -44,7 +44,7 @@ process MAGUS_TREEALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - echo "" | gzip > ${prefix}.aln.gz + touch ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/muscle5/super5/main.nf b/modules/mirpedrol/muscle5/super5/main.nf index 09545015..5aefb2dc 100644 --- a/modules/mirpedrol/muscle5/super5/main.nf +++ b/modules/mirpedrol/muscle5/super5/main.nf @@ -10,7 +10,7 @@ process MUSCLE5_SUPER5 { tuple val(meta), path(fasta) output: - tuple val(meta), path("*.aln.gz"), emit: alignment + tuple val(meta), path("*.aln"), emit: alignment path "versions.yml" , emit: versions when: @@ -20,7 +20,6 @@ process MUSCLE5_SUPER5 { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" prefix = args.contains('-perm all') ? "${prefix}@" : "${prefix}" - def write_output = (!args.contains('-perm all')) ? " -output >(pigz -cp ${task.cpus} > ${prefix}.aln.gz)" : "-output ${prefix}.aln" // muscle internally expands the shell pipe to a file descriptor of the form /dev/fd/ // this causes it to fail, unless -output is left at the end of the call // see also clustalo/align @@ -31,7 +30,7 @@ process MUSCLE5_SUPER5 { -super5 ${fasta} \\ ${args} \\ -threads ${task.cpus} \\ - $write_output + -output ${prefix}.aln # output may be multiple files if -perm all is set @@ -51,7 +50,7 @@ process MUSCLE5_SUPER5 { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln.gz + touch ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/tcoffee/align/main.nf b/modules/mirpedrol/tcoffee/align/main.nf index a5cc3bd2..5a2c7121 100644 --- a/modules/mirpedrol/tcoffee/align/main.nf +++ b/modules/mirpedrol/tcoffee/align/main.nf @@ -11,7 +11,7 @@ process TCOFFEE_ALIGN { tuple val(meta) , path(fasta) output: - tuple val(meta), path("*.aln.gz"), emit: alignment + tuple val(meta), path("*.aln"), emit: alignment path "versions.yml" , emit: versions when: @@ -26,16 +26,7 @@ process TCOFFEE_ALIGN { $args \ -output fasta_aln \ -thread ${task.cpus} \ - -outfile stdout \ - | pigz -cp ${task.cpus} > ${prefix}.aln.gz - - # If stdout file exist, then compress the file - # This is a patch for the current behaviour of the regressive algorithm - # that does not support the stdout redirection - if [ -f stdout ]; then - pigz -cp ${task.cpus} < stdout > ${prefix}.aln.gz - rm stdout - fi + -outfile ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -47,7 +38,7 @@ process TCOFFEE_ALIGN { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln.gz + touch ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/tcoffee/treealign/main.nf b/modules/mirpedrol/tcoffee/treealign/main.nf index 76ccb6fd..89756aac 100644 --- a/modules/mirpedrol/tcoffee/treealign/main.nf +++ b/modules/mirpedrol/tcoffee/treealign/main.nf @@ -12,7 +12,7 @@ process TCOFFEE_TREEALIGN { tuple val(meta2), path(tree) output: - tuple val(meta), path("*.aln.gz"), emit: alignment + tuple val(meta), path("*.aln"), emit: alignment path "versions.yml" , emit: versions when: @@ -28,16 +28,7 @@ process TCOFFEE_TREEALIGN { -output fasta_aln \ $args \ -thread ${task.cpus} \ - -outfile stdout \ - | pigz -cp ${task.cpus} > ${prefix}.aln.gz - - # If stdout file exist, then compress the file - # This is a patch for the current behaviour of the regressive algorithm - # that does not support the stdout redirection - if [ -f stdout ]; then - pigz -cp ${task.cpus} < stdout > ${prefix}.aln.gz - rm stdout - fi + -outfile ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -49,7 +40,7 @@ process TCOFFEE_TREEALIGN { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln.gz + touch ${prefix}.aln cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/subworkflows/local/generate_downstream_samplesheet/main.nf b/subworkflows/local/generate_downstream_samplesheet/main.nf index c8f92d1a..d2433613 100644 --- a/subworkflows/local/generate_downstream_samplesheet/main.nf +++ b/subworkflows/local/generate_downstream_samplesheet/main.nf @@ -37,21 +37,29 @@ workflow SAMPLESHEET_EVALUATION { } } // Create a channel with the new values for the samplesheet - def ch_info_for_samplesheet = ch_msa + def ch_intermediate = ch_msa .join(ch_references, by: 0, remainder: true) .join(ch_structures, by: 0, remainder: true) + .branch { meta, msa, reference, structures -> + structures: structures != null + no_structures: true + } + ch_intermediate.structures .flatMap { meta, msa, reference, structures -> structures.collect { structure -> - [id: meta.id, - alignment:params.alignment, alignment_args:params.alignment_args, - guidetree:params.guidetree, guidetree_args:params.guidetree_args, - treealign:params.treealign, treealign_args:params.treealign_args, - msa: msa, reference: reference.toUri(), structures: structure] + meta + [msa: msa, reference: reference.toUri(), structures: structure] } } + .set { ch_intermediate_structures } + ch_intermediate.no_structures + .map { meta, msa, reference, structures -> + meta + [msa: msa, reference: reference.toUri(), structures: null] + } + .set { ch_intermediate_no_structures } // Join both channels ch_existing_samplesheet - .mix(ch_info_for_samplesheet) + .mix(ch_intermediate_structures) + .mix(ch_intermediate_no_structures) .unique() .set { ch_list_for_samplesheet } @@ -74,7 +82,8 @@ workflow SAMPLESHEET_STATS { // Create a channel with the new values for the samplesheet def ch_info_for_samplesheet = stats_summary .map { meta, csv -> - [id: meta.id, stats: csv] + // If the path chanes, make sure to change the publishDir from MERGE_STATS in modules.config + [id: meta.id, stats: file(file(params.outdir).toString() + "/stats/" + csv.getName())] } // Join both channels ch_existing_samplesheet diff --git a/workflows/evaluatemsa.nf b/workflows/evaluatemsa.nf index 45e10c4e..29cc7993 100644 --- a/workflows/evaluatemsa.nf +++ b/workflows/evaluatemsa.nf @@ -41,7 +41,10 @@ workflow EVALUATEMSA { ch_input .flatten() .map { it -> - [["id": it.id, "alignment": it.alignment, "alignment_args": it.alignment_args, "guidetree": it.guidetree, "guidetree_args": it.guidetree_args, "treealign": it.treealign, "treealign_args": it.treealign_args], + [["id": it.id, + "alignment": it.alignment ?: "", "alignment_args": it.alignment_args ?: "", + "guidetree": it.guidetree ?: "", "guidetree_args": it.guidetree_args ?: "", + "treealign": it.treealign ?: "", "treealign_args": it.treealign_args ?: ""], it.msa, it.reference, it.structures] } .groupTuple(by: [0,1,2]) diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index b3895946..e45c8263 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -61,44 +61,61 @@ include { MSA_TREEALIGN } from '../subworkflows/mirpedrol/msa_treealign/main' workflow MULTIPLESEQUENCEALIGN { take: - ch_input // channel: [ meta, path(sequence.fasta), path(reference.fasta), path(pdb_structures.tar.gz), path(templates.txt) ] - ch_versions // channel: [ path(versions.yml) ] - outdir // params.outdir + ch_input // channel: [ meta, path(sequence.fasta), path(reference.fasta), path(pdb_structures.tar.gz), path(templates.txt) ] + ch_versions // channel: [ path(versions.yml) ] + outdir // params.outdir + alignment // params.alignment + alignment_args // params.alignment_args + guidetree // params.guidetree + guidetree_args // params.guidetree_args + treealign // params.treealign + treealign_args // params.treealign_args main: def ch_multiqc_files = Channel.empty() def stats_summary = Channel.empty() ch_input - .map { - meta, fasta, ref, str, template -> - [ meta, file(fasta) ] + .map {meta, fasta, ref, structure, template -> + def alignment_clean = alignment ? alignment.replace("/", "-") : "" + def alignment_args_clean = alignment_args ? alignment_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "") : "" + def guidetree_clean = guidetree ? guidetree.replace("/", "-") : "" + def guidetree_args_clean = guidetree_args ? guidetree_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "") : "" + def treealign_clean = treealign ? treealign.replace("/", "-") : "" + def treealign_args_clean = treealign_args ? treealign_args.toString().trim().replace(" ", " ").replace(" ", "-").replaceAll("==", "-").replaceAll("\\s+", "") : "" + [ + [ + "id": meta.id, + "alignment": alignment_clean, "alignment_args": alignment_args_clean, + "guidetree": guidetree_clean, "guidetree_args": guidetree_args_clean, + "treealign": treealign_clean, "treealign_args": treealign_args_clean + ], + fasta, ref, structure, template + ] } - .set { ch_seqs } - - ch_input - .filter { input -> input[2].size() > 0} - .map { - meta, fasta, ref, str, template -> - [ meta, file(ref) ] + .multiMap { meta, fasta, ref, structure, template -> + seqs: [ meta, fasta ] + refs: [ meta, ref ] + structures: [ meta, structure ] + templates: [ meta, template ] } - .set { ch_refs } + .set { ch_input_multi } - ch_input - .filter { input -> input[4].size() > 0} - .map { - meta, fasta, ref, str, template -> - [ meta, file(template) ] + ch_input_multi.refs + .filter { meta, ref -> + ref.size() > 0 } - .set { ch_templates } - - ch_input - .map { - meta, fasta, ref, str, template -> - [ meta, str ] + .set { ch_refs } + ch_input_multi.structures + .filter { meta, structure -> + structure.size() > 0 } - .filter { input -> input[1].size() > 0 } .set { ch_structures } + ch_input_multi.templates + .filter { meta, template -> + template.size() > 0 + } + .set { ch_templates } // ---------------- // STRUCTURES @@ -161,7 +178,7 @@ workflow MULTIPLESEQUENCEALIGN { // if (!params.skip_stats) { STATS ( - ch_seqs, + ch_input_multi.seqs, ch_structures ) ch_versions = ch_versions.mix(STATS.out.versions) @@ -170,17 +187,17 @@ workflow MULTIPLESEQUENCEALIGN { def msa_alignment = Channel.empty() - if (params.guidetree && params.treealign) { + if (guidetree && treealign) { // // Compute tree // MSA_GUIDETREE ( - ch_seqs + ch_input_multi.seqs ) ch_versions = ch_versions.mix(MSA_GUIDETREE.out.versions) // Prepare channels for treealign to make sure the correct tree is used for the respective alignment - ch_seqs + ch_input_multi.seqs .combine(MSA_GUIDETREE.out.tree, by:0) .set { ch_seqs_trees } ch_seqs_trees @@ -201,12 +218,12 @@ workflow MULTIPLESEQUENCEALIGN { msa_alignment = msa_alignment.mix(MSA_TREEALIGN.out.alignment) } - if (params.alignment) { + if (alignment) { // // Align // MSA_ALIGNMENT ( - ch_seqs + ch_input_multi.seqs ) ch_versions = ch_versions.mix(MSA_ALIGNMENT.out.versions) msa_alignment = msa_alignment.mix(MSA_ALIGNMENT.out.alignment) From f0e8e0ac62bb04a6f42225235c5dbef87c4ee933 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 26 Nov 2024 13:52:11 +0100 Subject: [PATCH 16/23] add EXTRACT_STRUCTURES swf and fix downstream samplesheet --- conf/modules.config | 2 + main.nf | 2 + subworkflows/local/extract_structures/main.nf | 29 ++++++++++ .../generate_downstream_samplesheet/main.nf | 56 ++++++++----------- workflows/evaluatemsa.nf | 22 +++++++- workflows/multiplesequencealign.nf | 24 ++------ 6 files changed, 81 insertions(+), 54 deletions(-) create mode 100644 subworkflows/local/extract_structures/main.nf diff --git a/conf/modules.config b/conf/modules.config index f2b8cca4..4e12e29d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -85,6 +85,7 @@ withName: "CLUSTALO_TREEALIGN|FAMSA_TREEALIGN|MAGUS_TREEALIGN|TCOFFEE_TREEALIGN"{ ext.args = { params.treealign_args } tag = { "${meta.id} args:${meta.treealign_args}" } + // If the publishDir path changes, make sure to change the path in generate_downstream_samplesheet publishDir = [ path: { "${params.outdir}/alignment/${meta.treealign}_${meta.treealign_args}_${meta.guidetree}_${meta.guidetree_args}" }, mode: params.publish_dir_mode, @@ -103,6 +104,7 @@ withName: "CLUSTALO_ALIGN|FAMSA_ALIGN|LEARNMSA_ALIGN|MAFFT|MAGUS_ALIGN|MUSCLE5_SUPER5|REGRESSIVE|TCOFFEE_ALIGN|TCOFFEE3D_ALIGN"{ ext.args = { params.alignment_args } tag = { "${meta.id} args:${meta.alignment_args}" } + // If the publishDir path changes, make sure to change the path in generate_downstream_samplesheet publishDir = [ path: { "${params.outdir}/alignment/${meta.alignment}_${meta.alignment_args}" }, mode: params.publish_dir_mode, diff --git a/main.nf b/main.nf index 3dc2da77..5cb68acc 100644 --- a/main.nf +++ b/main.nf @@ -102,7 +102,9 @@ workflow { def ch_multiqc_report = Channel.empty() if (params.evaluate) { + // // WORKFLOW: Run evaluation workflow + // NFCORE_EVALUATEMSA ( "${params.outdir}/downstream_samplesheets/evaluation.csv", "${params.outdir}/downstream_samplesheets/stats.csv", diff --git a/subworkflows/local/extract_structures/main.nf b/subworkflows/local/extract_structures/main.nf new file mode 100644 index 00000000..ff4c5be5 --- /dev/null +++ b/subworkflows/local/extract_structures/main.nf @@ -0,0 +1,29 @@ +include { UNTAR } from '../../../modules/nf-core/untar/main' + +workflow EXTRACT_STRUCTURES { + take: + ch_structures // channel: [ meta, /path/to/file.pdb ] + + main: + + // Structures are taken from a directory of PDB files. + // If the directory is compressed, it is uncompressed first. + ch_structures + .branch { structures -> + compressed: structures[1].name.endsWith('.tar.gz') + uncompressed: true + } + .set { ch_structures_branched } + + UNTAR(ch_structures_branched.compressed) + .untar + .mix(ch_structures_branched.uncompressed) + .map { + meta,dir -> + [ meta,file(dir).listFiles().collect() ] + } + .set { ch_structures_list } + + emit: + structures = ch_structures_list +} diff --git a/subworkflows/local/generate_downstream_samplesheet/main.nf b/subworkflows/local/generate_downstream_samplesheet/main.nf index d2433613..8a7ea2e1 100644 --- a/subworkflows/local/generate_downstream_samplesheet/main.nf +++ b/subworkflows/local/generate_downstream_samplesheet/main.nf @@ -10,7 +10,7 @@ workflow SAMPLESHEET_EVALUATION { take: ch_msa // channel: [ meta, /path/to/file.aln ] ch_references // channel: [ meta, /path/to/file.aln ] - ch_structures // channel: [ meta, /path/to/file.pdb ] + ch_structures // channel: [ meta, /path/to/file.tar ] outdir // params.outdir main: @@ -21,45 +21,35 @@ workflow SAMPLESHEET_EVALUATION { ch_existing_samplesheet = Channel.fromList(samplesheetToList(samplesheet, "${projectDir}/assets/schema_evaluate.json")) .flatten() .map { it -> - if (it.reference) { - [id: it.id, - alignment:it.alignment, alignment_args:it.alignment_args, - guidetree:it.guidetree, guidetree_args:it.guidetree_args, - treealign:it.treealign, treealign_args:it.treealign_args, - msa: it.msa, reference: it.reference.toUri(), structures: it.structures] - } else { - [id: it.id, - alignment:it.alignment, alignment_args:it.alignment_args, - guidetree:it.guidetree, guidetree_args:it.guidetree_args, - treealign:it.treealign, treealign_args:it.treealign_args, - msa: it.msa, reference: it.reference, structures: it.structures] - } + // convert reference and structures to URI string + [id: it.id, + alignment:it.alignment, alignment_args:it.alignment_args, + guidetree:it.guidetree, guidetree_args:it.guidetree_args, + treealign:it.treealign, treealign_args:it.treealign_args, + msa: it.msa, reference: it.reference ? it.reference.toUri() : it.reference, structures: it.structures ? it.structures.toUri() : it.structures] } } // Create a channel with the new values for the samplesheet - def ch_intermediate = ch_msa + def ch_info_for_samplesheet = ch_msa .join(ch_references, by: 0, remainder: true) .join(ch_structures, by: 0, remainder: true) - .branch { meta, msa, reference, structures -> - structures: structures != null - no_structures: true - } - ch_intermediate.structures - .flatMap { meta, msa, reference, structures -> - structures.collect { structure -> - meta + [msa: msa, reference: reference.toUri(), structures: structure] - } - } - .set { ch_intermediate_structures } - ch_intermediate.no_structures .map { meta, msa, reference, structures -> - meta + [msa: msa, reference: reference.toUri(), structures: null] + // If the path changes, make sure to change the publishDir from alignment or treealign modules in modules.config + def path_msa = "" + if (params.alignment) { + path_msa = "${meta.alignment}_${meta.alignment_args}/" + } else { + path_msa = "${meta.treealign}_${meta.treealign_args}_${meta.guidetree}_${meta.guidetree_args}/" + } + meta + [ + msa: file(file(params.outdir).toString() + "/alignment/" + path_msa + msa.getName()), + reference: reference ? reference.toUri() : reference, + structures: structures ? structures.toUri() : structures + ] } - .set { ch_intermediate_no_structures } // Join both channels ch_existing_samplesheet - .mix(ch_intermediate_structures) - .mix(ch_intermediate_no_structures) + .mix(ch_info_for_samplesheet) .unique() .set { ch_list_for_samplesheet } @@ -82,7 +72,7 @@ workflow SAMPLESHEET_STATS { // Create a channel with the new values for the samplesheet def ch_info_for_samplesheet = stats_summary .map { meta, csv -> - // If the path chanes, make sure to change the publishDir from MERGE_STATS in modules.config + // If the path changes, make sure to change the publishDir from MERGE_STATS in modules.config [id: meta.id, stats: file(file(params.outdir).toString() + "/stats/" + csv.getName())] } // Join both channels @@ -104,7 +94,7 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS { take: evaluation_msa // channel: [ meta, /path/to/file.aln ] evaluation_references // channel: [ meta, /path/to/file.aln ] - evaluation_structures // channel: [ meta, /path/to/file.pdb ] + evaluation_structures // channel: [ meta, /path/to/file.tar ] stats_summary // channel: [ meta, /path/to/file.csv ] outdir // params.outdir diff --git a/workflows/evaluatemsa.nf b/workflows/evaluatemsa.nf index 29cc7993..24c23e34 100644 --- a/workflows/evaluatemsa.nf +++ b/workflows/evaluatemsa.nf @@ -12,6 +12,8 @@ include { CSVTK_JOIN as MERGE_STATS_EVAL } from '../modules/nf-core/csvtk/join/m //SUBWORKFLOWS include { EVALUATE } from '../subworkflows/local/evaluate' +include { EXTRACT_STRUCTURES } from '../subworkflows/local/extract_structures/main' + // FUNCTIONS include { paramsSummaryMap } from 'plugin/nf-schema' @@ -47,7 +49,6 @@ workflow EVALUATEMSA { "treealign": it.treealign ?: "", "treealign_args": it.treealign_args ?: ""], it.msa, it.reference, it.structures] } - .groupTuple(by: [0,1,2]) .multiMap { meta, msa, reference, structures -> msa: [meta, msa] reference: [meta, reference] @@ -55,11 +56,28 @@ workflow EVALUATEMSA { } .set { ch_input_multi } + ch_input_multi.reference + .filter { meta, ref -> + ref.size() > 0 + } + .set { ch_reference } + ch_input_multi.structures + .filter { meta, structure -> + structure.size() > 0 + } + .set { ch_structures_tar } + + // ---------------- + // STRUCTURES + // ---------------- + EXTRACT_STRUCTURES(ch_structures_tar) + def ch_structures = EXTRACT_STRUCTURES.out.structures + // // Evaluate the quality of the alignment // if (!params.skip_eval) { - EVALUATE (ch_input_multi.msa, ch_input_multi.reference, ch_input_multi.structures) + EVALUATE (ch_input_multi.msa, ch_reference, ch_structures) ch_versions = ch_versions.mix(EVALUATE.out.versions) evaluation_summary = evaluation_summary.mix(EVALUATE.out.eval_summary) } diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index e45c8263..8dee34f0 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -29,6 +29,7 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mult include { STATS } from '../subworkflows/local/stats' include { CREATE_TCOFFEETEMPLATE } from '../modules/local/create_tcoffee_template' include { GENERATE_DOWNSTREAM_SAMPLESHEETS } from '../subworkflows/local/generate_downstream_samplesheet/main' +include { EXTRACT_STRUCTURES } from '../subworkflows/local/extract_structures/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -110,7 +111,7 @@ workflow MULTIPLESEQUENCEALIGN { .filter { meta, structure -> structure.size() > 0 } - .set { ch_structures } + .set { ch_structures_tar } ch_input_multi.templates .filter { meta, template -> template.size() > 0 @@ -120,23 +121,8 @@ workflow MULTIPLESEQUENCEALIGN { // ---------------- // STRUCTURES // ---------------- - // Structures are taken from a directory of PDB files. - // If the directory is compressed, it is uncompressed first. - ch_structures - .branch { structures -> - compressed: structures[1].name.endsWith('.tar.gz') - uncompressed: true - } - .set { ch_structures } - - UNTAR (ch_structures.compressed) - .untar - .mix(ch_structures.uncompressed) - .map { - meta,dir -> - [ meta,file(dir).listFiles().collect() ] - } - .set { ch_structures } + EXTRACT_STRUCTURES(ch_structures_tar) + ch_structures = EXTRACT_STRUCTURES.out.structures // ---------------- // TEMPLATES @@ -243,7 +229,7 @@ workflow MULTIPLESEQUENCEALIGN { GENERATE_DOWNSTREAM_SAMPLESHEETS ( msa_alignment, ch_refs, - ch_structures, + ch_structures_tar, stats_summary, outdir ) From 529d22b964d510d70788db7686f75172967ded54 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 26 Nov 2024 13:57:56 +0100 Subject: [PATCH 17/23] remove reference_genome_options from schema --- nextflow_schema.json | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 8d88067d..3c24c95d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -60,37 +60,6 @@ } } }, - "reference_genome_options": { - "title": "Reference genome options", - "type": "object", - "fa_icon": "fas fa-dna", - "description": "Reference genome related files and options required for the workflow.", - "properties": { - "genome": { - "type": "string", - "description": "Name of iGenomes reference.", - "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." - }, - "fasta": { - "type": "string", - "format": "file-path", - "exists": true, - "mimetype": "text/plain", - "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", - "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code" - }, - "igenomes_ignore": { - "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." - } - } - }, "align_options": { "title": "Stats options", "type": "object", @@ -410,9 +379,6 @@ { "$ref": "#/$defs/compression" }, - { - "$ref": "#/$defs/reference_genome_options" - }, { "$ref": "#/$defs/institutional_config_options" }, From 7677fe2d4b3911852e2865d68368d100ade5f127 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Wed, 27 Nov 2024 14:45:55 +0100 Subject: [PATCH 18/23] update tcoffee/alncompare --- modules.json | 2 +- .../tcoffee/alncompare/environment.yml | 2 - modules/nf-core/tcoffee/alncompare/main.nf | 15 ++-- modules/nf-core/tcoffee/alncompare/meta.yml | 61 ++++++++-------- .../tcoffee/alncompare/tests/main.nf.test | 70 +++++++++++++++++-- .../alncompare/tests/main.nf.test.snap | 70 +++++++++++++++++++ 6 files changed, 181 insertions(+), 39 deletions(-) diff --git a/modules.json b/modules.json index 7787c818..9cbf11e7 100644 --- a/modules.json +++ b/modules.json @@ -128,7 +128,7 @@ }, "tcoffee/alncompare": { "branch": "master", - "git_sha": "faf557ba56156ac0e5de76a25c1e3df11c944f59", + "git_sha": "ffa000ab3c33df25a165b5f9a039c4cbb665a77b", "installed_by": ["modules"] }, "tcoffee/irmsd": { diff --git a/modules/nf-core/tcoffee/alncompare/environment.yml b/modules/nf-core/tcoffee/alncompare/environment.yml index dfa14141..26a17e70 100644 --- a/modules/nf-core/tcoffee/alncompare/environment.yml +++ b/modules/nf-core/tcoffee/alncompare/environment.yml @@ -1,8 +1,6 @@ -name: "tcoffee_alncompare" channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::t-coffee=13.46.0.919e8c6b - conda-forge::pigz=2.8 diff --git a/modules/nf-core/tcoffee/alncompare/main.nf b/modules/nf-core/tcoffee/alncompare/main.nf index 043158a7..e4f46866 100644 --- a/modules/nf-core/tcoffee/alncompare/main.nf +++ b/modules/nf-core/tcoffee/alncompare/main.nf @@ -23,14 +23,19 @@ process TCOFFEE_ALNCOMPARE { def metric_name = args.split('compare_mode ')[1].split(' ')[0] def header = meta.keySet().join(",") def values = meta.values().join(",") - def read_msa = msa.getName().endsWith(".gz") ? "<(unpigz -cdf ${msa})" : msa - def read_ref = ref_msa.getName().endsWith(".gz") ? "<(unpigz -cdf ${ref_msa})" : ref_msa """ + # check whether it is compressed + if [[ "${msa}" == *.gz ]]; then + unpigz -c ${msa} > uncompressed_msa.fa + else + ln ${msa} uncompressed_msa.fa + fi + export TEMP='./' t_coffee -other_pg aln_compare \ - -al1 ${read_ref} \ - -al2 ${read_msa} \ + -al1 ${ref_msa} \ + -al2 uncompressed_msa.fa \ ${args} \ | grep -v "seq1" | grep -v '*' | \ awk '{ print \$4}' ORS="\t" \ @@ -53,6 +58,8 @@ process TCOFFEE_ALNCOMPARE { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" """ + # Otherwise, tcoffee will crash when calling its version + export TEMP='./' touch "${prefix}.scores" cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/tcoffee/alncompare/meta.yml b/modules/nf-core/tcoffee/alncompare/meta.yml index 31502555..0dd0c9ee 100644 --- a/modules/nf-core/tcoffee/alncompare/meta.yml +++ b/modules/nf-core/tcoffee/alncompare/meta.yml @@ -6,47 +6,52 @@ keywords: - evaluation tools: - "tcoffee": - description: "A collection of tools for Multiple Alignments of DNA, RNA, Protein Sequence" + description: "A collection of tools for Multiple Alignments of DNA, RNA, Protein + Sequence" homepage: "http://www.tcoffee.org/Projects/tcoffee/" documentation: "https://tcoffee.readthedocs.io/en/latest/tcoffee_main_documentation.html" tool_dev_url: "https://github.com/cbcrg/tcoffee" doi: "10.1006/jmbi.2000.4042" licence: ["GPL v3"] + identifier: "" - "pigz": description: "Parallel implementation of the gzip algorithm." homepage: "https://zlib.net/pigz/" documentation: "https://zlib.net/pigz/pigz.pdf" + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', ... ] - - msa: - type: file - description: fasta file containing the alignment to be evaluated. Can be gzipped or uncompressed - pattern: "*.{aln,fa,fasta,fas}{.gz,}" - - ref_msa: - type: file - description: fasta file containing the reference alignment used for the evaluation. Can be gzipped or uncompressed - pattern: "*.{aln,fa,fasta,fas}{.gz,}" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', ... ] + - msa: + type: file + description: fasta file containing the alignment to be evaluated. Can be gzipped + or uncompressed + pattern: "*.{aln,fa,fasta,fas}{.gz,}" + - ref_msa: + type: file + description: fasta file containing the reference alignment used for the evaluation. + Can be gzipped or uncompressed + pattern: "*.{aln,fa,fasta,fas}{.gz,}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - scores: - type: file - description: a file containing the score of the alignment - pattern: "*.scores" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.scores": + type: file + description: a file containing the score of the alignment + pattern: "*.scores" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@l-mansouri" - "@luisas" diff --git a/modules/nf-core/tcoffee/alncompare/tests/main.nf.test b/modules/nf-core/tcoffee/alncompare/tests/main.nf.test index 225a4f12..d68c50f2 100644 --- a/modules/nf-core/tcoffee/alncompare/tests/main.nf.test +++ b/modules/nf-core/tcoffee/alncompare/tests/main.nf.test @@ -6,7 +6,8 @@ nextflow_process { tag "modules" tag "modules_nfcore" tag "tcoffee" - tag "tcoffee/alncompare" + tag "tcoffee/alncompare" + tag "pigz/compress" test("seatoxin") { @@ -15,12 +16,73 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true), - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true), + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) ] """ } - + + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("seatoxin - compressed") { + + setup { + + run("PIGZ_COMPRESS") { + script "../../../pigz/compress/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + } + + + when { + process { + """ + reference_file = Channel.fromPath(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + input[0] = PIGZ_COMPRESS.out.archive.combine(reference_file) + """ + } + + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("seatoxin - stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true), + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + ] + """ + } + } then { diff --git a/modules/nf-core/tcoffee/alncompare/tests/main.nf.test.snap b/modules/nf-core/tcoffee/alncompare/tests/main.nf.test.snap index d1fd92e4..90e97dd5 100644 --- a/modules/nf-core/tcoffee/alncompare/tests/main.nf.test.snap +++ b/modules/nf-core/tcoffee/alncompare/tests/main.nf.test.snap @@ -1,4 +1,70 @@ { + "seatoxin - compressed": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "setoxin-ref.fa.scores:md5,c77aceec520beb56f08c342e01c56a14" + ] + ], + "1": [ + "versions.yml:md5,438507517a1a831c7b7a1571b1fdd98d" + ], + "scores": [ + [ + { + "id": "test" + }, + "setoxin-ref.fa.scores:md5,c77aceec520beb56f08c342e01c56a14" + ] + ], + "versions": [ + "versions.yml:md5,438507517a1a831c7b7a1571b1fdd98d" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-26T08:54:31.823528877" + }, + "seatoxin - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.scores:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,438507517a1a831c7b7a1571b1fdd98d" + ], + "scores": [ + [ + { + "id": "test" + }, + "test.scores:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,438507517a1a831c7b7a1571b1fdd98d" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:48:00.328461809" + }, "seatoxin": { "content": [ { @@ -26,6 +92,10 @@ ] } ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, "timestamp": "2024-01-22T17:08:59.494237269" } } \ No newline at end of file From 4371fb7e66af1c30a2150280a60e9a597952e28f Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Wed, 27 Nov 2024 14:50:34 +0100 Subject: [PATCH 19/23] update kalign/align --- modules.json | 2 +- modules/mirpedrol/kalign/align/main.nf | 6 +++--- modules/mirpedrol/kalign/align/tests/main.nf.test | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/modules.json b/modules.json index 9cbf11e7..2cb070f4 100644 --- a/modules.json +++ b/modules.json @@ -37,7 +37,7 @@ }, "kalign/align": { "branch": "main", - "git_sha": "caf37f3ee943a8101000b25ba502f038f8bfeb87", + "git_sha": "34e891b859086dd03e52e79abcf47b3863fcb3cb", "installed_by": ["msa_alignment"] }, "learnmsa/align": { diff --git a/modules/mirpedrol/kalign/align/main.nf b/modules/mirpedrol/kalign/align/main.nf index ceb4fc23..014f5216 100644 --- a/modules/mirpedrol/kalign/align/main.nf +++ b/modules/mirpedrol/kalign/align/main.nf @@ -11,7 +11,7 @@ process KALIGN_ALIGN { tuple val(meta), path(fasta) output: - tuple val(meta), path("*.aln"), emit: alignment + tuple val(meta), path("*.aln.gz"), emit: alignment path "versions.yml" , emit: versions when: @@ -24,7 +24,7 @@ process KALIGN_ALIGN { unpigz -cdf $fasta | \\ kalign \\ $args \\ - -o ${prefix}.aln + -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -37,7 +37,7 @@ process KALIGN_ALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln + touch ${prefix}.aln.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/kalign/align/tests/main.nf.test b/modules/mirpedrol/kalign/align/tests/main.nf.test index 569aa6d0..1de4d022 100644 --- a/modules/mirpedrol/kalign/align/tests/main.nf.test +++ b/modules/mirpedrol/kalign/align/tests/main.nf.test @@ -26,7 +26,8 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta")}, + { assert snapshot(process.out.alignment).match("alignment")}, + { assert snapshot(process.out.versions).match("versions")} ) } } From 55e7b75d788839ae6d4859b3e80c387e7abe6532 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Wed, 27 Nov 2024 14:51:34 +0100 Subject: [PATCH 20/23] update mafft --- modules.json | 124 +++++++++++++++------ modules/mirpedrol/mafft/main.nf | 6 +- modules/mirpedrol/mafft/tests/main.nf.test | 3 +- 3 files changed, 97 insertions(+), 36 deletions(-) diff --git a/modules.json b/modules.json index 2cb070f4..8d577955 100644 --- a/modules.json +++ b/modules.json @@ -8,77 +8,107 @@ "clustalo/align": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": ["msa_alignment"] + "installed_by": [ + "msa_alignment" + ] }, "clustalo/guidetree": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": ["msa_guidetree"] + "installed_by": [ + "msa_guidetree" + ] }, "clustalo/treealign": { "branch": "main", "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", - "installed_by": ["msa_treealign"] + "installed_by": [ + "msa_treealign" + ] }, "famsa/align": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": ["msa_alignment"] + "installed_by": [ + "msa_alignment" + ] }, "famsa/guidetree": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": ["msa_guidetree"] + "installed_by": [ + "msa_guidetree" + ] }, "famsa/treealign": { "branch": "main", "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", - "installed_by": ["msa_treealign"] + "installed_by": [ + "msa_treealign" + ] }, "kalign/align": { "branch": "main", "git_sha": "34e891b859086dd03e52e79abcf47b3863fcb3cb", - "installed_by": ["msa_alignment"] + "installed_by": [ + "msa_alignment" + ] }, "learnmsa/align": { "branch": "main", "git_sha": "caf37f3ee943a8101000b25ba502f038f8bfeb87", - "installed_by": ["msa_alignment"] + "installed_by": [ + "msa_alignment" + ] }, "mafft": { "branch": "main", - "git_sha": "caf37f3ee943a8101000b25ba502f038f8bfeb87", - "installed_by": ["msa_alignment"] + "git_sha": "34e891b859086dd03e52e79abcf47b3863fcb3cb", + "installed_by": [ + "msa_alignment" + ] }, "magus/align": { "branch": "main", "git_sha": "caf37f3ee943a8101000b25ba502f038f8bfeb87", - "installed_by": ["msa_alignment"] + "installed_by": [ + "msa_alignment" + ] }, "magus/guidetree": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": ["msa_guidetree"] + "installed_by": [ + "msa_guidetree" + ] }, "magus/treealign": { "branch": "main", "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", - "installed_by": ["msa_treealign"] + "installed_by": [ + "msa_treealign" + ] }, "muscle5/super5": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": ["msa_alignment"] + "installed_by": [ + "msa_alignment" + ] }, "tcoffee/align": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": ["msa_alignment"] + "installed_by": [ + "msa_alignment" + ] }, "tcoffee/treealign": { "branch": "main", "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", - "installed_by": ["msa_treealign"] + "installed_by": [ + "msa_treealign" + ] } } }, @@ -87,17 +117,23 @@ "msa_alignment": { "branch": "main", "git_sha": "927094f07130b8fa3ac0b8d1f46fd7d252809418", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "msa_guidetree": { "branch": "main", "git_sha": "4748294a96583ecb2c3952e7f81aca426386cc0b", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "msa_treealign": { "branch": "main", "git_sha": "0165b5b51bb1fe396a90c2db93c1f4e70b170816", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } @@ -108,48 +144,66 @@ "csvtk/concat": { "branch": "master", "git_sha": "cfe2a24902bfdfe8132f11461ffda92d257f9f09", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "csvtk/join": { "branch": "master", "git_sha": "614abbf126f287a3068dc86997b2e1b6a93abe20", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/csvtk/join/csvtk-join.diff" }, "pigz/compress": { "branch": "master", "git_sha": "0eab94fc1e48703c1b0a8704bd665f554905c39d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pigz/uncompress": { "branch": "master", "git_sha": "d7f0de8aae7bf84b080dfdcf4e294bf11a46a51c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/alncompare": { "branch": "master", "git_sha": "ffa000ab3c33df25a165b5f9a039c4cbb665a77b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/irmsd": { "branch": "master", "git_sha": "faf557ba56156ac0e5de76a25c1e3df11c944f59", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/seqreformat": { "branch": "master", "git_sha": "32ae618a60a25a870b5fa47ea2060ddcd911ab53", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/tcs": { "branch": "master", "git_sha": "1cacaceabae75b0c3bc393dee52cb6a5020fcb5c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -158,20 +212,26 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "56372688d8979092cafbe0c5c3895b491166ca1c", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/mirpedrol/mafft/main.nf b/modules/mirpedrol/mafft/main.nf index 92055318..1ed127b6 100644 --- a/modules/mirpedrol/mafft/main.nf +++ b/modules/mirpedrol/mafft/main.nf @@ -11,7 +11,7 @@ process MAFFT { tuple val(meta) , path(fasta) output: - tuple val(meta), path("*.aln"), emit: alignment + tuple val(meta), path("*.aln.gz"), emit: alignment path "versions.yml" , emit: versions when: @@ -25,7 +25,7 @@ process MAFFT { --thread ${task.cpus} \\ ${args} \\ ${fasta} \\ - > ${prefix}.aln + | pigz -cp ${task.cpus} > ${prefix}.aln.gz cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -38,7 +38,7 @@ process MAFFT { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln + touch ${prefix}.aln.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/mafft/tests/main.nf.test b/modules/mirpedrol/mafft/tests/main.nf.test index 13089a04..ec13b2c0 100644 --- a/modules/mirpedrol/mafft/tests/main.nf.test +++ b/modules/mirpedrol/mafft/tests/main.nf.test @@ -23,7 +23,8 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta")} + { assert snapshot(process.out.alignment).match("alignment")}, + { assert snapshot(process.out.versions).match("mafft_versions")} ) } From 9bae6c199d109e7006ff40cd158df1fc1ab9d2d5 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Wed, 27 Nov 2024 14:56:59 +0100 Subject: [PATCH 21/23] update modules --- modules.json | 128 +++++-------------- modules/mirpedrol/magus/guidetree/meta.yml | 4 +- modules/mirpedrol/muscle5/super5/main.nf | 7 +- modules/mirpedrol/muscle5/super5/meta.yml | 4 +- modules/nf-core/csvtk/concat/environment.yml | 2 - modules/nf-core/csvtk/concat/meta.yml | 61 ++++----- 6 files changed, 74 insertions(+), 132 deletions(-) diff --git a/modules.json b/modules.json index 8d577955..ff7ec6d7 100644 --- a/modules.json +++ b/modules.json @@ -8,107 +8,77 @@ "clustalo/align": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": [ - "msa_alignment" - ] + "installed_by": ["msa_alignment"] }, "clustalo/guidetree": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": [ - "msa_guidetree" - ] + "installed_by": ["msa_guidetree"] }, "clustalo/treealign": { "branch": "main", "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", - "installed_by": [ - "msa_treealign" - ] + "installed_by": ["msa_treealign"] }, "famsa/align": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": [ - "msa_alignment" - ] + "installed_by": ["msa_alignment"] }, "famsa/guidetree": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": [ - "msa_guidetree" - ] + "installed_by": ["msa_guidetree"] }, "famsa/treealign": { "branch": "main", "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", - "installed_by": [ - "msa_treealign" - ] + "installed_by": ["msa_treealign"] }, "kalign/align": { "branch": "main", "git_sha": "34e891b859086dd03e52e79abcf47b3863fcb3cb", - "installed_by": [ - "msa_alignment" - ] + "installed_by": ["msa_alignment"] }, "learnmsa/align": { "branch": "main", "git_sha": "caf37f3ee943a8101000b25ba502f038f8bfeb87", - "installed_by": [ - "msa_alignment" - ] + "installed_by": ["msa_alignment"] }, "mafft": { "branch": "main", "git_sha": "34e891b859086dd03e52e79abcf47b3863fcb3cb", - "installed_by": [ - "msa_alignment" - ] + "installed_by": ["msa_alignment"] }, "magus/align": { "branch": "main", "git_sha": "caf37f3ee943a8101000b25ba502f038f8bfeb87", - "installed_by": [ - "msa_alignment" - ] + "installed_by": ["msa_alignment"] }, "magus/guidetree": { "branch": "main", - "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": [ - "msa_guidetree" - ] + "git_sha": "519be3b194f5e2e271436398c1646c538867fe3e", + "installed_by": ["msa_guidetree"] }, "magus/treealign": { "branch": "main", "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", - "installed_by": [ - "msa_treealign" - ] + "installed_by": ["msa_treealign"] }, "muscle5/super5": { "branch": "main", - "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": [ - "msa_alignment" - ] + "git_sha": "34e891b859086dd03e52e79abcf47b3863fcb3cb", + "installed_by": ["msa_alignment"] }, "tcoffee/align": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": [ - "msa_alignment" - ] + "installed_by": ["msa_alignment"] }, "tcoffee/treealign": { "branch": "main", "git_sha": "3987b7c6d9bcfc037c4976142b126c18558b0aed", - "installed_by": [ - "msa_treealign" - ] + "installed_by": ["msa_treealign"] } } }, @@ -117,23 +87,17 @@ "msa_alignment": { "branch": "main", "git_sha": "927094f07130b8fa3ac0b8d1f46fd7d252809418", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "msa_guidetree": { "branch": "main", "git_sha": "4748294a96583ecb2c3952e7f81aca426386cc0b", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "msa_treealign": { "branch": "main", "git_sha": "0165b5b51bb1fe396a90c2db93c1f4e70b170816", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } @@ -143,67 +107,49 @@ "nf-core": { "csvtk/concat": { "branch": "master", - "git_sha": "cfe2a24902bfdfe8132f11461ffda92d257f9f09", - "installed_by": [ - "modules" - ] + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] }, "csvtk/join": { "branch": "master", "git_sha": "614abbf126f287a3068dc86997b2e1b6a93abe20", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/csvtk/join/csvtk-join.diff" }, "pigz/compress": { "branch": "master", "git_sha": "0eab94fc1e48703c1b0a8704bd665f554905c39d", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pigz/uncompress": { "branch": "master", "git_sha": "d7f0de8aae7bf84b080dfdcf4e294bf11a46a51c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/alncompare": { "branch": "master", "git_sha": "ffa000ab3c33df25a165b5f9a039c4cbb665a77b", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/irmsd": { "branch": "master", "git_sha": "faf557ba56156ac0e5de76a25c1e3df11c944f59", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/seqreformat": { "branch": "master", "git_sha": "32ae618a60a25a870b5fa47ea2060ddcd911ab53", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tcoffee/tcs": { "branch": "master", "git_sha": "1cacaceabae75b0c3bc393dee52cb6a5020fcb5c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "untar": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -212,26 +158,20 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "56372688d8979092cafbe0c5c3895b491166ca1c", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} diff --git a/modules/mirpedrol/magus/guidetree/meta.yml b/modules/mirpedrol/magus/guidetree/meta.yml index dbd4d8b3..45c3830b 100644 --- a/modules/mirpedrol/magus/guidetree/meta.yml +++ b/modules/mirpedrol/magus/guidetree/meta.yml @@ -28,7 +28,7 @@ input: - fasta: type: file description: Input sequences in FASTA format. - pattern: "*.{fa,fna,fasta}" + pattern: "*.{fa,fasta}" ontologies: - edam: http://edamontology.org/format_1929 output: @@ -41,7 +41,7 @@ output: - "*.dnd": type: file description: File containing the output guidetree, in newick format. - pattern: "*.dnd" + pattern: "*.{dnd}" - versions: - versions.yml: type: file diff --git a/modules/mirpedrol/muscle5/super5/main.nf b/modules/mirpedrol/muscle5/super5/main.nf index 5aefb2dc..09545015 100644 --- a/modules/mirpedrol/muscle5/super5/main.nf +++ b/modules/mirpedrol/muscle5/super5/main.nf @@ -10,7 +10,7 @@ process MUSCLE5_SUPER5 { tuple val(meta), path(fasta) output: - tuple val(meta), path("*.aln"), emit: alignment + tuple val(meta), path("*.aln.gz"), emit: alignment path "versions.yml" , emit: versions when: @@ -20,6 +20,7 @@ process MUSCLE5_SUPER5 { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" prefix = args.contains('-perm all') ? "${prefix}@" : "${prefix}" + def write_output = (!args.contains('-perm all')) ? " -output >(pigz -cp ${task.cpus} > ${prefix}.aln.gz)" : "-output ${prefix}.aln" // muscle internally expands the shell pipe to a file descriptor of the form /dev/fd/ // this causes it to fail, unless -output is left at the end of the call // see also clustalo/align @@ -30,7 +31,7 @@ process MUSCLE5_SUPER5 { -super5 ${fasta} \\ ${args} \\ -threads ${task.cpus} \\ - -output ${prefix}.aln + $write_output # output may be multiple files if -perm all is set @@ -50,7 +51,7 @@ process MUSCLE5_SUPER5 { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln + touch ${prefix}.aln.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/muscle5/super5/meta.yml b/modules/mirpedrol/muscle5/super5/meta.yml index 695d8a77..fb1980a4 100644 --- a/modules/mirpedrol/muscle5/super5/meta.yml +++ b/modules/mirpedrol/muscle5/super5/meta.yml @@ -7,7 +7,7 @@ keywords: - msa - multiple sequence alignment - msa - - align + - alignment tools: - muscle-super5: description: "Muscle v5 is a major re-write of MUSCLE based on new algorithms." @@ -29,7 +29,7 @@ input: - fasta: type: file description: Input sequences for alignment must be in FASTA format - pattern: "*.{fasta,fa,fna}" + pattern: "*.{fa,fasta}" ontologies: - edam: http://edamontology.org/format_1929 output: diff --git a/modules/nf-core/csvtk/concat/environment.yml b/modules/nf-core/csvtk/concat/environment.yml index ac58390c..5b97e54e 100644 --- a/modules/nf-core/csvtk/concat/environment.yml +++ b/modules/nf-core/csvtk/concat/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "csvtk_concat" channels: - conda-forge - bioconda - - defaults dependencies: - "bioconda::csvtk=0.30.0" diff --git a/modules/nf-core/csvtk/concat/meta.yml b/modules/nf-core/csvtk/concat/meta.yml index 5f53229c..27ffc1ca 100644 --- a/modules/nf-core/csvtk/concat/meta.yml +++ b/modules/nf-core/csvtk/concat/meta.yml @@ -11,38 +11,41 @@ tools: documentation: http://bioinf.shenwei.me/csvtk tool_dev_url: https://github.com/shenwei356/csvtk licence: ["MIT"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - csv: - type: file - description: CSV/TSV formatted files - pattern: "*.{csv,tsv}" - - in_format: - type: string - description: Input format (csv, tab, or a delimiting character) - pattern: "*" - - out_format: - type: string - description: Output format (csv, tab, or a delimiting character) - pattern: "*" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - csv: + type: file + description: CSV/TSV formatted files + pattern: "*.{csv,tsv}" + - - in_format: + type: string + description: Input format (csv, tab, or a delimiting character) + pattern: "*" + - - out_format: + type: string + description: Output format (csv, tab, or a delimiting character) + pattern: "*" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "version.yml" - csv: - type: file - description: Concatenated CSV/TSV file - pattern: "*.{csv,tsv}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${out_extension}: + type: file + description: Concatenated CSV/TSV file + pattern: "*.{csv,tsv}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "version.yml" authors: - "@rpetit3" maintainers: From 8acb2c95a9a84f2cae0420646713d37033559ae0 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Wed, 27 Nov 2024 16:07:33 +0100 Subject: [PATCH 22/23] update all modules --- modules.json | 16 +- modules/mirpedrol/clustalo/align/main.nf | 6 +- modules/mirpedrol/clustalo/treealign/main.nf | 6 +- modules/mirpedrol/famsa/align/main.nf | 8 +- modules/mirpedrol/famsa/treealign/main.nf | 8 +- modules/mirpedrol/learnmsa/align/main.nf | 6 +- modules/mirpedrol/magus/align/main.nf | 6 +- modules/mirpedrol/magus/treealign/main.nf | 6 +- modules/mirpedrol/tcoffee/align/main.nf | 15 +- modules/mirpedrol/tcoffee/treealign/main.nf | 15 +- modules/nf-core/csvtk/join/csvtk-join.diff | 9 +- modules/nf-core/csvtk/join/environment.yml | 2 - modules/nf-core/csvtk/join/meta.yml | 48 +++--- modules/nf-core/pigz/compress/environment.yml | 2 - modules/nf-core/pigz/compress/meta.yml | 49 +++--- .../nf-core/pigz/compress/tests/main.nf.test | 10 +- .../pigz/compress/tests/main.nf.test.snap | 15 +- modules/nf-core/pigz/uncompress/meta.yml | 46 +++--- .../pigz/uncompress/tests/main.nf.test | 2 +- modules/nf-core/tcoffee/irmsd/environment.yml | 2 - modules/nf-core/tcoffee/irmsd/main.nf | 6 +- modules/nf-core/tcoffee/irmsd/meta.yml | 60 ++++--- .../nf-core/tcoffee/irmsd/tests/main.nf.test | 87 +++++----- .../tcoffee/irmsd/tests/main.nf.test.snap | 16 +- .../tcoffee/seqreformat/environment.yml | 2 - modules/nf-core/tcoffee/seqreformat/main.nf | 4 +- modules/nf-core/tcoffee/seqreformat/meta.yml | 52 +++--- .../tcoffee/seqreformat/tests/main.nf.test | 36 ++++- .../seqreformat/tests/main.nf.test.snap | 29 +++- modules/nf-core/tcoffee/tcs/environment.yml | 2 - modules/nf-core/tcoffee/tcs/main.nf | 4 +- modules/nf-core/tcoffee/tcs/meta.yml | 81 ++++++---- modules/nf-core/tcoffee/tcs/tests/lib.config | 2 + .../nf-core/tcoffee/tcs/tests/main.nf.test | 60 +++++-- .../tcoffee/tcs/tests/main.nf.test.snap | 130 ++++----------- modules/nf-core/untar/environment.yml | 6 +- modules/nf-core/untar/main.nf | 29 +++- modules/nf-core/untar/meta.yml | 43 ++--- modules/nf-core/untar/tests/main.nf.test | 44 ++++- modules/nf-core/untar/tests/main.nf.test.snap | 152 +++++++++++++++--- 40 files changed, 688 insertions(+), 434 deletions(-) diff --git a/modules.json b/modules.json index ff7ec6d7..0007e009 100644 --- a/modules.json +++ b/modules.json @@ -13,7 +13,7 @@ "clustalo/guidetree": { "branch": "main", "git_sha": "1ecfdd11db17d75ba0499992d8d0a3222885645e", - "installed_by": ["msa_guidetree"] + "installed_by": ["modules", "msa_guidetree"] }, "clustalo/treealign": { "branch": "main", @@ -112,18 +112,18 @@ }, "csvtk/join": { "branch": "master", - "git_sha": "614abbf126f287a3068dc86997b2e1b6a93abe20", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], "patch": "modules/nf-core/csvtk/join/csvtk-join.diff" }, "pigz/compress": { "branch": "master", - "git_sha": "0eab94fc1e48703c1b0a8704bd665f554905c39d", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "pigz/uncompress": { "branch": "master", - "git_sha": "d7f0de8aae7bf84b080dfdcf4e294bf11a46a51c", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "tcoffee/alncompare": { @@ -133,22 +133,22 @@ }, "tcoffee/irmsd": { "branch": "master", - "git_sha": "faf557ba56156ac0e5de76a25c1e3df11c944f59", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "tcoffee/seqreformat": { "branch": "master", - "git_sha": "32ae618a60a25a870b5fa47ea2060ddcd911ab53", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "tcoffee/tcs": { "branch": "master", - "git_sha": "1cacaceabae75b0c3bc393dee52cb6a5020fcb5c", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "untar": { "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] } } diff --git a/modules/mirpedrol/clustalo/align/main.nf b/modules/mirpedrol/clustalo/align/main.nf index bb92ef1c..55a1113b 100644 --- a/modules/mirpedrol/clustalo/align/main.nf +++ b/modules/mirpedrol/clustalo/align/main.nf @@ -11,7 +11,7 @@ process CLUSTALO_ALIGN { tuple val(meta) , path(fasta) output: - tuple val(meta), path("*.aln"), emit: alignment + tuple val(meta), path("*.aln.gz"), emit: alignment path "versions.yml" , emit: versions when: @@ -30,7 +30,7 @@ process CLUSTALO_ALIGN { -i ${fasta} \ --threads=${task.cpus} \ $args \ - > ${prefix}.aln + --force -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -43,7 +43,7 @@ process CLUSTALO_ALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln + touch ${prefix}.aln.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/clustalo/treealign/main.nf b/modules/mirpedrol/clustalo/treealign/main.nf index 9cfc82d8..cfa9c117 100644 --- a/modules/mirpedrol/clustalo/treealign/main.nf +++ b/modules/mirpedrol/clustalo/treealign/main.nf @@ -12,7 +12,7 @@ process CLUSTALO_TREEALIGN { tuple val(meta2), path(tree) output: - tuple val(meta), path("*.aln"), emit: alignment + tuple val(meta), path("*.aln.gz"), emit: alignment path "versions.yml" , emit: versions when: @@ -32,7 +32,7 @@ process CLUSTALO_TREEALIGN { --guidetree-in=${tree} \ --threads=${task.cpus} \ $args \ - > ${prefix}.aln + --force -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -45,7 +45,7 @@ process CLUSTALO_TREEALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln + touch ${prefix}.aln.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/famsa/align/main.nf b/modules/mirpedrol/famsa/align/main.nf index cdd3d787..ba7c0eb1 100644 --- a/modules/mirpedrol/famsa/align/main.nf +++ b/modules/mirpedrol/famsa/align/main.nf @@ -13,7 +13,7 @@ process FAMSA_ALIGN { tuple val(meta) , path(fasta) output: - tuple val(meta), path("*.aln"), emit: alignment + tuple val(meta), path("*.aln.gz"), emit: alignment path "versions.yml" , emit: versions when: @@ -23,11 +23,11 @@ process FAMSA_ALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - famsa \\ + famsa -gz \\ $args \\ -t ${task.cpus} \\ ${fasta} \\ - ${prefix}.aln + ${prefix}.aln.gz cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -38,7 +38,7 @@ process FAMSA_ALIGN { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln + touch ${prefix}.aln.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/famsa/treealign/main.nf b/modules/mirpedrol/famsa/treealign/main.nf index 0280dd09..6d1fd80a 100644 --- a/modules/mirpedrol/famsa/treealign/main.nf +++ b/modules/mirpedrol/famsa/treealign/main.nf @@ -14,7 +14,7 @@ process FAMSA_TREEALIGN { tuple val(meta2), path(tree) output: - tuple val(meta), path("*.aln"), emit: alignment + tuple val(meta), path("*.aln.gz"), emit: alignment path "versions.yml" , emit: versions when: @@ -24,12 +24,12 @@ process FAMSA_TREEALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - famsa import $tree \\ + famsa -gt import $tree \\ -gz \\ $args \\ -t ${task.cpus} \\ ${fasta} \\ - ${prefix}.aln + ${prefix}.aln.gz cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -40,7 +40,7 @@ process FAMSA_TREEALIGN { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln + touch ${prefix}.aln.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/learnmsa/align/main.nf b/modules/mirpedrol/learnmsa/align/main.nf index 3cb32576..365768e0 100644 --- a/modules/mirpedrol/learnmsa/align/main.nf +++ b/modules/mirpedrol/learnmsa/align/main.nf @@ -11,7 +11,7 @@ process LEARNMSA_ALIGN { tuple val(meta), path(fasta) output: - tuple val(meta), path("*.aln"), emit: alignment + tuple val(meta), path("*.aln.gz"), emit: alignment path "versions.yml" , emit: versions when: @@ -24,7 +24,7 @@ process LEARNMSA_ALIGN { learnMSA \\ $args \\ -i <(unpigz -cdf $fasta) \\ - -o ${prefix}.aln + -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -37,7 +37,7 @@ process LEARNMSA_ALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln + touch ${prefix}.aln.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/magus/align/main.nf b/modules/mirpedrol/magus/align/main.nf index f2b292c4..18622ddd 100644 --- a/modules/mirpedrol/magus/align/main.nf +++ b/modules/mirpedrol/magus/align/main.nf @@ -11,7 +11,7 @@ process MAGUS_ALIGN { tuple val(meta) , path(fasta) output: - tuple val(meta), path("*.aln"), emit: alignment + tuple val(meta), path("*.aln.gz"), emit: alignment path "versions.yml" , emit: versions when: @@ -28,7 +28,7 @@ process MAGUS_ALIGN { -np $task.cpus \\ -i $fasta \\ -d ./ \\ - -o ${prefix}.aln \\ + --overwrite -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) \\ $args cat <<-END_VERSIONS > versions.yml @@ -42,7 +42,7 @@ process MAGUS_ALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln + echo "" | gzip > ${prefix}.aln.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/magus/treealign/main.nf b/modules/mirpedrol/magus/treealign/main.nf index ea3c2ea5..0fd93baa 100644 --- a/modules/mirpedrol/magus/treealign/main.nf +++ b/modules/mirpedrol/magus/treealign/main.nf @@ -12,7 +12,7 @@ process MAGUS_TREEALIGN { tuple val(meta2), path(tree) output: - tuple val(meta), path("*.aln"), emit: alignment + tuple val(meta), path("*.aln.gz"), emit: alignment path "versions.yml" , emit: versions when: @@ -29,7 +29,7 @@ process MAGUS_TREEALIGN { -np $task.cpus \\ -i $fasta \\ -d ./ \\ - -o ${prefix}.aln \\ + --overwrite -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz) \\ -t $tree \\ $args @@ -44,7 +44,7 @@ process MAGUS_TREEALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln + echo "" | gzip > ${prefix}.aln.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/tcoffee/align/main.nf b/modules/mirpedrol/tcoffee/align/main.nf index 5a2c7121..a5cc3bd2 100644 --- a/modules/mirpedrol/tcoffee/align/main.nf +++ b/modules/mirpedrol/tcoffee/align/main.nf @@ -11,7 +11,7 @@ process TCOFFEE_ALIGN { tuple val(meta) , path(fasta) output: - tuple val(meta), path("*.aln"), emit: alignment + tuple val(meta), path("*.aln.gz"), emit: alignment path "versions.yml" , emit: versions when: @@ -26,7 +26,16 @@ process TCOFFEE_ALIGN { $args \ -output fasta_aln \ -thread ${task.cpus} \ - -outfile ${prefix}.aln + -outfile stdout \ + | pigz -cp ${task.cpus} > ${prefix}.aln.gz + + # If stdout file exist, then compress the file + # This is a patch for the current behaviour of the regressive algorithm + # that does not support the stdout redirection + if [ -f stdout ]; then + pigz -cp ${task.cpus} < stdout > ${prefix}.aln.gz + rm stdout + fi cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -38,7 +47,7 @@ process TCOFFEE_ALIGN { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln + touch ${prefix}.aln.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mirpedrol/tcoffee/treealign/main.nf b/modules/mirpedrol/tcoffee/treealign/main.nf index 89756aac..76ccb6fd 100644 --- a/modules/mirpedrol/tcoffee/treealign/main.nf +++ b/modules/mirpedrol/tcoffee/treealign/main.nf @@ -12,7 +12,7 @@ process TCOFFEE_TREEALIGN { tuple val(meta2), path(tree) output: - tuple val(meta), path("*.aln"), emit: alignment + tuple val(meta), path("*.aln.gz"), emit: alignment path "versions.yml" , emit: versions when: @@ -28,7 +28,16 @@ process TCOFFEE_TREEALIGN { -output fasta_aln \ $args \ -thread ${task.cpus} \ - -outfile ${prefix}.aln + -outfile stdout \ + | pigz -cp ${task.cpus} > ${prefix}.aln.gz + + # If stdout file exist, then compress the file + # This is a patch for the current behaviour of the regressive algorithm + # that does not support the stdout redirection + if [ -f stdout ]; then + pigz -cp ${task.cpus} < stdout > ${prefix}.aln.gz + rm stdout + fi cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -40,7 +49,7 @@ process TCOFFEE_TREEALIGN { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln + touch ${prefix}.aln.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/csvtk/join/csvtk-join.diff b/modules/nf-core/csvtk/join/csvtk-join.diff index fded83ab..111e68b3 100644 --- a/modules/nf-core/csvtk/join/csvtk-join.diff +++ b/modules/nf-core/csvtk/join/csvtk-join.diff @@ -1,4 +1,7 @@ -Changes in module 'nf-core/csvtk/join' +Changes in component 'nf-core/csvtk/join' +'modules/nf-core/csvtk/join/environment.yml' is unchanged +'modules/nf-core/csvtk/join/meta.yml' is unchanged +Changes in 'csvtk/join/main.nf': --- modules/nf-core/csvtk/join/main.nf +++ modules/nf-core/csvtk/join/main.nf @@ -22,12 +22,17 @@ @@ -21,4 +24,8 @@ Changes in module 'nf-core/csvtk/join' cat <<-END_VERSIONS > versions.yml "${task.process}": +'modules/nf-core/csvtk/join/tests/main.nf.test.snap' is unchanged +'modules/nf-core/csvtk/join/tests/tags.yml' is unchanged +'modules/nf-core/csvtk/join/tests/nextflow.config' is unchanged +'modules/nf-core/csvtk/join/tests/main.nf.test' is unchanged ************************************************************ diff --git a/modules/nf-core/csvtk/join/environment.yml b/modules/nf-core/csvtk/join/environment.yml index 5b6c6468..ea951bdb 100644 --- a/modules/nf-core/csvtk/join/environment.yml +++ b/modules/nf-core/csvtk/join/environment.yml @@ -1,7 +1,5 @@ -name: csvtk_join channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::csvtk=0.30.0 diff --git a/modules/nf-core/csvtk/join/meta.yml b/modules/nf-core/csvtk/join/meta.yml index a75ec40f..d8671b17 100644 --- a/modules/nf-core/csvtk/join/meta.yml +++ b/modules/nf-core/csvtk/join/meta.yml @@ -1,5 +1,6 @@ name: csvtk_join -description: Join two or more CSV (or TSV) tables by selected fields into a single table +description: Join two or more CSV (or TSV) tables by selected fields into a single + table keywords: - join - tsv @@ -11,30 +12,33 @@ tools: documentation: http://bioinf.shenwei.me/csvtk tool_dev_url: https://github.com/shenwei356/csvtk licence: ["MIT"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - csv: - type: file - description: CSV/TSV formatted files - pattern: "*.{csv,tsv}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - csv: + type: file + description: CSV/TSV formatted files + pattern: "*.{csv,tsv}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "version.yml" - csv: - type: file - description: Joined CSV/TSV file - pattern: "*.{csv,tsv}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${out_extension}: + type: file + description: Joined CSV/TSV file + pattern: "*.{csv,tsv}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "version.yml" authors: - "@anoronh4" maintainers: diff --git a/modules/nf-core/pigz/compress/environment.yml b/modules/nf-core/pigz/compress/environment.yml index 7551d187..5016d226 100644 --- a/modules/nf-core/pigz/compress/environment.yml +++ b/modules/nf-core/pigz/compress/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "pigz_compress" channels: - conda-forge - bioconda - - defaults dependencies: - "pigz=2.8" diff --git a/modules/nf-core/pigz/compress/meta.yml b/modules/nf-core/pigz/compress/meta.yml index 42efd735..0966e651 100644 --- a/modules/nf-core/pigz/compress/meta.yml +++ b/modules/nf-core/pigz/compress/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "pigz_compress" description: Compresses files with pigz. @@ -12,35 +11,33 @@ tools: homepage: "https://zlib.net/pigz/" documentation: "https://zlib.net/pigz/pigz.pdf" + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - - raw_file: - type: file - description: File to be compressed - pattern: "*.*" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - raw_file: + type: file + description: File to be compressed + pattern: "*.*" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - archive: - type: file - description: The compressed file - pattern: "*.gz" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - $archive: + type: file + description: The compressed file + pattern: "*.gz" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@leoisl" maintainers: diff --git a/modules/nf-core/pigz/compress/tests/main.nf.test b/modules/nf-core/pigz/compress/tests/main.nf.test index 248d40fb..b3cb25e3 100644 --- a/modules/nf-core/pigz/compress/tests/main.nf.test +++ b/modules/nf-core/pigz/compress/tests/main.nf.test @@ -14,7 +14,7 @@ nextflow_process { """ input[0] = [ [ id:'test'], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -34,7 +34,7 @@ nextflow_process { """ input[0] = [ [ id:'test'], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -42,7 +42,11 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.archive[0][1]).name).match() } + { assert snapshot( + file(process.out.archive[0][1]).name, + process.out.versions + ).match() + } ) } } diff --git a/modules/nf-core/pigz/compress/tests/main.nf.test.snap b/modules/nf-core/pigz/compress/tests/main.nf.test.snap index 6e50456f..4d8df9f1 100644 --- a/modules/nf-core/pigz/compress/tests/main.nf.test.snap +++ b/modules/nf-core/pigz/compress/tests/main.nf.test.snap @@ -26,12 +26,23 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, "timestamp": "2023-12-11T22:39:53.350546" }, "sarscov2 - genome - fasta - stub": { "content": [ - "genome.fasta.gz" + "genome.fasta.gz", + [ + "versions.yml:md5,ca30e9e1ffa1394ba7eefdac8cf3a3ad" + ] ], - "timestamp": "2023-12-11T22:52:24.309192" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-30T12:18:32.339508" } } \ No newline at end of file diff --git a/modules/nf-core/pigz/uncompress/meta.yml b/modules/nf-core/pigz/uncompress/meta.yml index c2d16cd4..3f583b22 100644 --- a/modules/nf-core/pigz/uncompress/meta.yml +++ b/modules/nf-core/pigz/uncompress/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json name: "pigz_uncompress" description: write your description here @@ -12,31 +11,32 @@ tools: homepage: "https://zlib.net/pigz/" documentation: "https://zlib.net/pigz/pigz.pdf" + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - - zip: - type: file - description: Gzipped file - pattern: "*.{gzip}" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - zip: + type: file + description: Gzipped file + pattern: "*.{gzip}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test']` - file: - type: file - description: File to compress - pattern: "*" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test']` + - ${uncompressed_filename}: + type: file + description: File to compress + pattern: "*" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@lrauschning" diff --git a/modules/nf-core/pigz/uncompress/tests/main.nf.test b/modules/nf-core/pigz/uncompress/tests/main.nf.test index b0790982..62ab27e2 100644 --- a/modules/nf-core/pigz/uncompress/tests/main.nf.test +++ b/modules/nf-core/pigz/uncompress/tests/main.nf.test @@ -17,7 +17,7 @@ nextflow_process { process { """ input[0] = [ [ id:'test'], - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] """ } diff --git a/modules/nf-core/tcoffee/irmsd/environment.yml b/modules/nf-core/tcoffee/irmsd/environment.yml index 4065fe70..26a17e70 100644 --- a/modules/nf-core/tcoffee/irmsd/environment.yml +++ b/modules/nf-core/tcoffee/irmsd/environment.yml @@ -1,8 +1,6 @@ -name: "tcoffee_irmsd" channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::t-coffee=13.46.0.919e8c6b - conda-forge::pigz=2.8 diff --git a/modules/nf-core/tcoffee/irmsd/main.nf b/modules/nf-core/tcoffee/irmsd/main.nf index 95e6b61e..be6b6a11 100644 --- a/modules/nf-core/tcoffee/irmsd/main.nf +++ b/modules/nf-core/tcoffee/irmsd/main.nf @@ -8,8 +8,8 @@ process TCOFFEE_IRMSD { 'biocontainers/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0' }" input: - tuple val(meta), file (msa) - tuple val(meta2), file(template), file(structures) + tuple val(meta), path (msa) + tuple val(meta2), path(template), path(structures) output: tuple val(meta), path ("${prefix}.irmsd"), emit: irmsd @@ -44,6 +44,8 @@ process TCOFFEE_IRMSD { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${msa.baseName}" """ + # Otherwise, tcoffee will crash when calling its version + export TEMP='./' touch ${prefix}.irmsd cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/tcoffee/irmsd/meta.yml b/modules/nf-core/tcoffee/irmsd/meta.yml index f55fca74..cf930ea0 100644 --- a/modules/nf-core/tcoffee/irmsd/meta.yml +++ b/modules/nf-core/tcoffee/irmsd/meta.yml @@ -6,50 +6,46 @@ keywords: - evaluation tools: - "tcoffee": - description: "A collection of tools for Multiple Alignments of DNA, RNA, Protein Sequence" + description: "A collection of tools for Multiple Alignments of DNA, RNA, Protein + Sequence" homepage: "http://www.tcoffee.org/Projects/tcoffee/" documentation: "https://tcoffee.readthedocs.io/en/latest/tcoffee_main_documentation.html" tool_dev_url: "https://github.com/cbcrg/tcoffee" doi: "10.1006/jmbi.2000.4042" licence: ["GPL v3"] + identifier: "" - "pigz": description: "Parallel implementation of the gzip algorithm." homepage: "https://zlib.net/pigz/" documentation: "https://zlib.net/pigz/pigz.pdf" + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', ... ] - - msa: - type: file - description: fasta file containing the alignment to be evaluated. May be gzipped or uncompressed. - pattern: "*.{aln,fa,fasta,fas}{.gz,}" - - template: - type: file - description: Template file matching the structures to the sequences in the alignment. - pattern: "*" - - structures: - type: directory - description: Directory containing the structures file matching the sequences in the alignment in PDB format - pattern: "*" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', ... ] + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - irmsd: - type: file - description: File containing the irmsd of the alignment - pattern: "*" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.irmsd: + type: file + description: File containing the irmsd of the alignment + pattern: "*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@luisas" diff --git a/modules/nf-core/tcoffee/irmsd/tests/main.nf.test b/modules/nf-core/tcoffee/irmsd/tests/main.nf.test index 55a574a4..dfb68629 100644 --- a/modules/nf-core/tcoffee/irmsd/tests/main.nf.test +++ b/modules/nf-core/tcoffee/irmsd/tests/main.nf.test @@ -10,31 +10,43 @@ nextflow_process { tag "untar" tag "pigz/compress" - test("seatoxin") { + setup { - setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz", checkIfExists: true) + ] - run("UNTAR") { - script "../../../untar/main.nf" - process { - """ - input[0] = [ [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz", checkIfExists: true) - ] + """ + } + } - """ - } + run("PIGZ_COMPRESS") { + script "../../../pigz/compress/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + ] + + """ } } + } + + test("seatoxin") { when { process { """ input[0] = [ [ id:'test' ], // meta map - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) ] - input[1] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/templates/seatoxin-ref_template.txt", checkIfExists: true) ,file(dir).listFiles().collect()]} + input[1] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/templates/seatoxin-ref_template.txt", checkIfExists: true) ,file(dir).listFiles().collect()]} """ } @@ -51,38 +63,37 @@ nextflow_process { test("seatoxin - compressed") { - setup { - - run("UNTAR") { - script "../../../untar/main.nf" - process { - """ - input[0] = [ [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz", checkIfExists: true) - ] - - """ - } + when { + process { + """ + input[0] = PIGZ_COMPRESS.out.archive + input[1] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/templates/seatoxin-ref_template.txt", checkIfExists: true) ,file(dir).listFiles().collect()]} + """ } - run("PIGZ_COMPRESS") { - script "../../../pigz/compress/main.nf" - process { - """ - input[0] = [ [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) - ] + } - """ - } - } + then { + assertAll( + { assert process.success }, + { assert path(process.out.irmsd.get(0).get(1)).getText().contains("1ahl") } + ) } + } + + test("seatoxin - uncompressed - stub") { + + options "-stub" + when { process { """ - input[0] = PIGZ_COMPRESS.out.archive - input[1] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/templates/seatoxin-ref_template.txt", checkIfExists: true) ,file(dir).listFiles().collect()]} + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + ] + input[1] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/templates/seatoxin-ref_template.txt", checkIfExists: true) ,file(dir).listFiles().collect()]} """ } @@ -91,7 +102,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.irmsd.get(0).get(1)).getText().contains("1ahl") } + { assert snapshot(process.out).match()} ) } diff --git a/modules/nf-core/tcoffee/irmsd/tests/main.nf.test.snap b/modules/nf-core/tcoffee/irmsd/tests/main.nf.test.snap index c036642f..909d678c 100644 --- a/modules/nf-core/tcoffee/irmsd/tests/main.nf.test.snap +++ b/modules/nf-core/tcoffee/irmsd/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "seatoxin": { + "seatoxin - uncompressed - stub": { "content": [ { "0": [ @@ -7,25 +7,29 @@ { "id": "test" }, - "setoxin.irmsd:md5,a8f49fb2621cdc9fe39690a813ad0ca5" + "setoxin.irmsd:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ - "versions.yml:md5,60646e38ef71127e3736a06c91c2983f" + "versions.yml:md5,95ade8ac867efd9e18f850bb55b7c9b6" ], "irmsd": [ [ { "id": "test" }, - "setoxin.irmsd:md5,a8f49fb2621cdc9fe39690a813ad0ca5" + "setoxin.irmsd:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions": [ - "versions.yml:md5,60646e38ef71127e3736a06c91c2983f" + "versions.yml:md5,95ade8ac867efd9e18f850bb55b7c9b6" ] } ], - "timestamp": "2023-12-13T12:26:46.827121" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T10:15:36.874290895" } } \ No newline at end of file diff --git a/modules/nf-core/tcoffee/seqreformat/environment.yml b/modules/nf-core/tcoffee/seqreformat/environment.yml index 84afe8aa..7cc504fc 100644 --- a/modules/nf-core/tcoffee/seqreformat/environment.yml +++ b/modules/nf-core/tcoffee/seqreformat/environment.yml @@ -1,7 +1,5 @@ -name: tcoffee_seqreformat channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::t-coffee=13.46.0.919e8c6b diff --git a/modules/nf-core/tcoffee/seqreformat/main.nf b/modules/nf-core/tcoffee/seqreformat/main.nf index 774ae2be..04617b01 100644 --- a/modules/nf-core/tcoffee/seqreformat/main.nf +++ b/modules/nf-core/tcoffee/seqreformat/main.nf @@ -38,6 +38,8 @@ process TCOFFEE_SEQREFORMAT { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" """ + # Otherwise, tcoffee will crash when calling its version + export TEMP='./' touch "${prefix}.txt" cat <<-END_VERSIONS > versions.yml @@ -46,5 +48,3 @@ process TCOFFEE_SEQREFORMAT { END_VERSIONS """ } - - diff --git a/modules/nf-core/tcoffee/seqreformat/meta.yml b/modules/nf-core/tcoffee/seqreformat/meta.yml index 9c7d9ce9..17111df3 100644 --- a/modules/nf-core/tcoffee/seqreformat/meta.yml +++ b/modules/nf-core/tcoffee/seqreformat/meta.yml @@ -6,41 +6,39 @@ keywords: - genomics tools: - "tcoffee": - description: "A collection of tools for Computing, Evaluating and Manipulating Multiple Alignments of DNA, RNA, Protein Sequences and Structures." + description: "A collection of tools for Computing, Evaluating and Manipulating + Multiple Alignments of DNA, RNA, Protein Sequences and Structures." homepage: "http://www.tcoffee.org/Projects/tcoffee/" documentation: "https://tcoffee.readthedocs.io/en/latest/tcoffee_main_documentation.html" tool_dev_url: "https://github.com/cbcrg/tcoffee" doi: "10.1006/jmbi.2000.4042" licence: ["GPL v3"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test' ]` - - - fasta: - type: file - description: Input file to be reformatted - pattern: "*" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - infile: + type: file + description: Input file to be reformatted output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test' ]` - - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - formatted_file: - type: file - description: Formatted file - pattern: "*" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - ${prefix}.txt: + type: file + description: Formatted file + pattern: "*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@luisas" - "@JoseEspinosa" diff --git a/modules/nf-core/tcoffee/seqreformat/tests/main.nf.test b/modules/nf-core/tcoffee/seqreformat/tests/main.nf.test index 7a5492c5..1510e920 100644 --- a/modules/nf-core/tcoffee/seqreformat/tests/main.nf.test +++ b/modules/nf-core/tcoffee/seqreformat/tests/main.nf.test @@ -9,13 +9,13 @@ nextflow_process { tag "tcoffee" tag "tcoffee/seqreformat" - test("sarscov2 - bam") { + test("seatoxin") { when { process { """ input[0] = [ [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) ] """ } @@ -25,8 +25,36 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.formatted_file).match("formatted_file")}, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot( + process.out.formatted_file, + process.out.versions + ).match() + } + ) + } + } + + test("seatoxin - stub") { + options "-stub" + when { + process { + """ + input[0] = [ [ id:'test' ], + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.formatted_file, + process.out.versions + ).match() + } ) } } diff --git a/modules/nf-core/tcoffee/seqreformat/tests/main.nf.test.snap b/modules/nf-core/tcoffee/seqreformat/tests/main.nf.test.snap index 150102ee..185f24d8 100644 --- a/modules/nf-core/tcoffee/seqreformat/tests/main.nf.test.snap +++ b/modules/nf-core/tcoffee/seqreformat/tests/main.nf.test.snap @@ -1,23 +1,42 @@ { - "formatted_file": { + "seatoxin - stub": { "content": [ [ [ { "id": "test" }, - "test.txt:md5,fcd4691daf120c88ec5def7ac06fb562" + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] + ], + [ + "versions.yml:md5,68fb841e6e44274d430a1382bb0bbd14" ] ], - "timestamp": "2023-11-28T11:56:22.705185493" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:53:55.165479364" }, - "versions": { + "seatoxin": { "content": [ + [ + [ + { + "id": "test" + }, + "test.txt:md5,fcd4691daf120c88ec5def7ac06fb562" + ] + ], [ "versions.yml:md5,68fb841e6e44274d430a1382bb0bbd14" ] ], - "timestamp": "2023-11-28T11:56:22.717235196" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:53:33.814075197" } } \ No newline at end of file diff --git a/modules/nf-core/tcoffee/tcs/environment.yml b/modules/nf-core/tcoffee/tcs/environment.yml index 50480cd8..615d140e 100644 --- a/modules/nf-core/tcoffee/tcs/environment.yml +++ b/modules/nf-core/tcoffee/tcs/environment.yml @@ -1,10 +1,8 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "tcoffee_tcs" channels: - conda-forge - bioconda - - defaults dependencies: - "bioconda::t-coffee=13.46.0.919e8c6b" - conda-forge::pigz=2.8 diff --git a/modules/nf-core/tcoffee/tcs/main.nf b/modules/nf-core/tcoffee/tcs/main.nf index 0c50f52a..9080a317 100644 --- a/modules/nf-core/tcoffee/tcs/main.nf +++ b/modules/nf-core/tcoffee/tcs/main.nf @@ -8,7 +8,7 @@ process TCOFFEE_TCS { 'biocontainers/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0' }" input: - tuple val(meta), path(msa) + tuple val(meta) , path(msa) tuple val(meta2), path(lib) output: @@ -63,6 +63,8 @@ process TCOFFEE_TCS { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ + # Otherwise, tcoffee will crash when calling its version + export TEMP='./' touch ${prefix}.tcs touch ${prefix}.scores diff --git a/modules/nf-core/tcoffee/tcs/meta.yml b/modules/nf-core/tcoffee/tcs/meta.yml index a28efad0..2846d4ba 100644 --- a/modules/nf-core/tcoffee/tcs/meta.yml +++ b/modules/nf-core/tcoffee/tcs/meta.yml @@ -1,57 +1,72 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "tcoffee_tcs" -description: Compute the TCS score for a MSA or for a MSA plus a library file. Outputs the tcs as it is and a csv with just the total TCS score. +description: Compute the TCS score for a MSA or for a MSA plus a library file. Outputs + the tcs as it is and a csv with just the total TCS score. keywords: - alignment - MSA - evaluation tools: - "tcoffee": - description: "A collection of tools for Multiple Alignments of DNA, RNA, Protein Sequence" + description: "A collection of tools for Multiple Alignments of DNA, RNA, Protein + Sequence" homepage: "http://www.tcoffee.org/Projects/tcoffee/" documentation: "https://tcoffee.readthedocs.io/en/latest/tcoffee_main_documentation.html" tool_dev_url: "https://github.com/cbcrg/tcoffee" doi: "10.1006/jmbi.2000.4042" licence: ["GPL v3"] + identifier: "" - "pigz": description: "Parallel implementation of the gzip algorithm." homepage: "https://zlib.net/pigz/" documentation: "https://zlib.net/pigz/pigz.pdf" + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', ... ] - - msa: - type: file - description: fasta file containing the alignment to be evaluated. May be gzipped or uncompressed. - pattern: "*.{aln,fa,fasta,fas}{.gz,}" - - lib: - type: file - description: lib file containing the alignment library of the given msa. - pattern: "*{.tc_lib,*_lib}" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', ... ] + - msa: + type: file + description: fasta file containing the alignment to be evaluated. May be gzipped + or uncompressed. + pattern: "*.{aln,fa,fasta,fas}{.gz,}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - lib: + type: file + description: lib file containing the alignment library of the given msa. + pattern: "*{.tc_lib,*_lib}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - tcs: - type: file - description: The msa represented in tcs format, prepended with TCS scores - pattern: "*.tcs" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.tcs": + type: file + description: The msa represented in tcs format, prepended with TCS scores + pattern: "*.tcs" - scores: - type: file - description: a file containing the score of the alignment in csv format - pattern: "*.scores" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.scores": + type: file + description: a file containing the score of the alignment in csv format + pattern: "*.scores" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@alessiovignoli" diff --git a/modules/nf-core/tcoffee/tcs/tests/lib.config b/modules/nf-core/tcoffee/tcs/tests/lib.config index 56712f63..610250ce 100644 --- a/modules/nf-core/tcoffee/tcs/tests/lib.config +++ b/modules/nf-core/tcoffee/tcs/tests/lib.config @@ -1,5 +1,7 @@ process { + withName: "TCOFFEE_ALIGN"{ ext.args = { "-output fasta_aln -out_lib sample_lib1.tc_lib" } } + } diff --git a/modules/nf-core/tcoffee/tcs/tests/main.nf.test b/modules/nf-core/tcoffee/tcs/tests/main.nf.test index 35636fff..d66a2cda 100644 --- a/modules/nf-core/tcoffee/tcs/tests/main.nf.test +++ b/modules/nf-core/tcoffee/tcs/tests/main.nf.test @@ -10,14 +10,14 @@ nextflow_process { tag "pigz/compress" tag "tcoffee/align" - test("tcs - msa") { + test("tcs - seatoxin") { when { process { """ input[0] = [ - [ id:'test'], - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + [ id:'test'], + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) ] input[1] = [ [ id:'test'], @@ -30,13 +30,15 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert path(process.out.tcs.get(0).get(1)).getText().contains("1ahl") }, + { assert snapshot(process.out.versions).match() + } ) } } - test("tcs - msa - compressed") { + test("tcs - seatoxin - compressed") { setup { run("PIGZ_COMPRESS") { @@ -44,12 +46,12 @@ nextflow_process { process { """ input[0] = [ [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) ] """ } - } + } } when { @@ -67,12 +69,14 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert path(process.out.tcs.get(0).get(1)).getText().contains("1ahl") }, + { assert snapshot(process.out.versions).match() + } ) } } - test("tcs - msa - lib") { + test("tcs - seatoxin - lib") { config "./lib.config" @@ -82,33 +86,63 @@ nextflow_process { process { """ input[0] = [ [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) ] input[1] = [[:],[]] input[2] = [[:],[],[]] input[3] = true """ } - } + } } when { process { """ input[0] = [ - [ id:'test'], - file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + [ id:'test'], + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) ] input[1] = TCOFFEE_ALIGN.out.lib.map { it -> [[ id:'test' ], it[1]] } """ } } + then { + assertAll( + { assert process.success }, + { assert path(process.out.tcs.get(0).get(1)).getText().contains("1ahl") }, + { assert snapshot(process.out.versions).match() + } + ) + } + } + + test("tcs - seatoxin - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + ] + input[1] = [ + [ id:'test'], + [] + ] + """ + } + } + then { assertAll( { assert process.success }, { assert snapshot(process.out).match() } ) } + } } diff --git a/modules/nf-core/tcoffee/tcs/tests/main.nf.test.snap b/modules/nf-core/tcoffee/tcs/tests/main.nf.test.snap index 8f6fca13..80da7518 100644 --- a/modules/nf-core/tcoffee/tcs/tests/main.nf.test.snap +++ b/modules/nf-core/tcoffee/tcs/tests/main.nf.test.snap @@ -1,54 +1,17 @@ { - "tcs - msa - lib": { + "tcs - seatoxin - compressed": { "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.tcs:md5,d6467c4c358a9fe2d21b5d6d3e128cdb" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test.scores:md5,25576b8e2fef74953d28f4e3df689d93" - ] - ], - "2": [ - "versions.yml:md5,99775735c64e1ae150252a3b09576e91" - ], - "scores": [ - [ - { - "id": "test" - }, - "test.scores:md5,25576b8e2fef74953d28f4e3df689d93" - ] - ], - "tcs": [ - [ - { - "id": "test" - }, - "test.tcs:md5,d6467c4c358a9fe2d21b5d6d3e128cdb" - ] - ], - "versions": [ - "versions.yml:md5,99775735c64e1ae150252a3b09576e91" - ] - } + [ + "versions.yml:md5,99775735c64e1ae150252a3b09576e91" + ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-03-19T17:52:51.096817" + "timestamp": "2024-09-16T13:13:16.017532678" }, - "tcs - msa": { + "tcs - seatoxin - stub": { "content": [ { "0": [ @@ -56,7 +19,7 @@ { "id": "test" }, - "test.tcs:md5,d6467c4c358a9fe2d21b5d6d3e128cdb" + "test.tcs:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -64,7 +27,7 @@ { "id": "test" }, - "test.scores:md5,25576b8e2fef74953d28f4e3df689d93" + "test.scores:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "2": [ @@ -75,7 +38,7 @@ { "id": "test" }, - "test.scores:md5,25576b8e2fef74953d28f4e3df689d93" + "test.scores:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "tcs": [ @@ -83,7 +46,7 @@ { "id": "test" }, - "test.tcs:md5,d6467c4c358a9fe2d21b5d6d3e128cdb" + "test.tcs:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions": [ @@ -92,58 +55,33 @@ } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-03-19T12:47:34.828638" + "timestamp": "2024-09-16T09:59:35.62776554" }, - "tcs - msa - compressed": { + "tcs - seatoxin - lib": { "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.tcs:md5,d6467c4c358a9fe2d21b5d6d3e128cdb" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test.scores:md5,25576b8e2fef74953d28f4e3df689d93" - ] - ], - "2": [ - "versions.yml:md5,99775735c64e1ae150252a3b09576e91" - ], - "scores": [ - [ - { - "id": "test" - }, - "test.scores:md5,25576b8e2fef74953d28f4e3df689d93" - ] - ], - "tcs": [ - [ - { - "id": "test" - }, - "test.tcs:md5,d6467c4c358a9fe2d21b5d6d3e128cdb" - ] - ], - "versions": [ - "versions.yml:md5,99775735c64e1ae150252a3b09576e91" - ] - } + [ + "versions.yml:md5,99775735c64e1ae150252a3b09576e91" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T13:13:32.306127259" + }, + "tcs - seatoxin": { + "content": [ + [ + "versions.yml:md5,99775735c64e1ae150252a3b09576e91" + ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-03-19T17:37:52.408687" + "timestamp": "2024-09-16T13:13:00.253922777" } } \ No newline at end of file diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml index 0c9cbb10..c7794856 100644 --- a/modules/nf-core/untar/environment.yml +++ b/modules/nf-core/untar/environment.yml @@ -1,11 +1,7 @@ -name: untar - channels: - conda-forge - bioconda - - defaults - dependencies: - conda-forge::grep=3.11 - - conda-forge::sed=4.7 + - conda-forge::sed=4.8 - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index 8a75bb95..9bd8f554 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -4,8 +4,8 @@ process UNTAR { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" input: tuple val(meta), path(archive) @@ -52,8 +52,29 @@ process UNTAR { stub: prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) """ - mkdir $prefix - touch ${prefix}/file.txt + mkdir ${prefix} + ## Dry-run untaring the archive to get the files and place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch \${i} + else + mkdir -p \${i} + fi + done + else + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch ${prefix}/\${i} + else + mkdir -p ${prefix}/\${i} + fi + done + fi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml index a9a2110f..290346b3 100644 --- a/modules/nf-core/untar/meta.yml +++ b/modules/nf-core/untar/meta.yml @@ -10,30 +10,33 @@ tools: Extract tar.gz files. documentation: https://www.gnu.org/software/tar/manual/ licence: ["GPL-3.0-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - archive: - type: file - description: File to be untar - pattern: "*.{tar}.{gz}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - untar: - type: directory - description: Directory containing contents of archive - pattern: "*/" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - $prefix: + type: directory + description: Directory containing contents of archive + pattern: "*/" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@joseespinosa" - "@drpatelh" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test index 2a7c97bf..c957517a 100644 --- a/modules/nf-core/untar/tests/main.nf.test +++ b/modules/nf-core/untar/tests/main.nf.test @@ -6,6 +6,7 @@ nextflow_process { tag "modules" tag "modules_nfcore" tag "untar" + test("test_untar") { when { @@ -19,10 +20,9 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out.untar).match("test_untar") }, + { assert snapshot(process.out).match() }, ) } - } test("test_untar_onlyfiles") { @@ -38,10 +38,48 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out.untar).match("test_untar_onlyfiles") }, + { assert snapshot(process.out).match() }, ) } + } + + test("test_untar - stub") { + + options "-stub" + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } } + test("test_untar_onlyfiles - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } } diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap index 64550292..ceb91b79 100644 --- a/modules/nf-core/untar/tests/main.nf.test.snap +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -1,42 +1,158 @@ { "test_untar_onlyfiles": { "content": [ - [ - [ + { + "0": [ [ - - ], + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:28.231047" + }, + "test_untar_onlyfiles - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ [ - "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" ] - ] + } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-02-28T11:49:41.320643" + "timestamp": "2024-07-10T12:04:45.773103" + }, + "test_untar - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:36.777441" }, "test_untar": { "content": [ - [ - [ + { + "0": [ [ - - ], + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ [ - "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", - "opts.k2d:md5,a033d00cf6759407010b21700938f543", - "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" ] - ] + } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-02-28T11:49:33.795172" + "timestamp": "2024-07-10T12:04:19.377674" } } \ No newline at end of file From 86ad6e789584558041d000ce057bb3d33e3bdc43 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Wed, 27 Nov 2024 16:17:48 +0100 Subject: [PATCH 23/23] allow compressed msa for evaluation --- assets/schema_evaluate.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/schema_evaluate.json b/assets/schema_evaluate.json index 8f432787..fcba3984 100644 --- a/assets/schema_evaluate.json +++ b/assets/schema_evaluate.json @@ -46,9 +46,9 @@ "msa": { "type": "string", "format": "file-path", - "pattern": "^\\S+\\.aln$", + "pattern": "^\\S+\\.aln(\\.gz)?$", "description": "aln file containing the MSA", - "errorMessage": "Must end with .aln", + "errorMessage": "Must end with .aln or .aln.gz", "meta": ["msa"] }, "reference": {