Added fasta_ltrretriever_lai

Plant-Food-Research-Open · Feb 22, 2024 · d6ba080 · d6ba080
1 parent 92c5da1
commit d6ba080
Show file tree

Hide file tree

Showing 58 changed files with 3,168 additions and 7 deletions.
diff --git a/conf/modules.config b/conf/modules.config
@@ -184,8 +184,8 @@ process {
         ]
     }
 
-    withName: EDTA_LTRHARVEST {
-        ext.prefix = { "${meta.id}_edta_ltrharvest" }
+    withName: LTRHARVEST {
+        ext.prefix = { "${meta.id}_ltrharvest" }
     }
 
     withName: LTRFINDER {
@@ -196,7 +196,7 @@ process {
         ext.prefix = { "${meta.id}_ltrharvest_ltrfinder.tabout" }
     }
 
-    withName: LTRRETRIEVER {
+    withName: LTRRETRIEVER_LTRRETRIEVER {
         publishDir = [
             path: { "${params.outdir}/lai" },
             mode: params.publish_dir_mode,
@@ -212,7 +212,7 @@ process {
         ]
     }
 
-    withName: LAI {
+    withName: LTRRETRIEVER_LAI {
         publishDir = [
             path: { "${params.outdir}/lai" },
             mode: params.publish_dir_mode,

diff --git a/modules.json b/modules.json
@@ -5,11 +5,26 @@
         "git@github.com:PlantandFoodResearch/nxf-modules.git": {
             "modules": {
                 "pfr": {
+                    "cat/cat": {
+                        "branch": "main",
+                        "git_sha": "4b9da80b1e4c16067babd97554bea42d7cd9ca85",
+                        "installed_by": ["fasta_ltrretriever_lai"]
+                    },
                     "custom/checkgff3fastacorrespondence": {
                         "branch": "main",
                         "git_sha": "1a76f884082c786760559c462063a5d1de94ca83",
                         "installed_by": ["gff3_validate"]
                     },
+                    "custom/restoregffids": {
+                        "branch": "main",
+                        "git_sha": "e9f6bdd634bdbcd52c5568ba82f16176ec06631f",
+                        "installed_by": ["fasta_ltrretriever_lai"]
+                    },
+                    "custom/shortenfastaids": {
+                        "branch": "main",
+                        "git_sha": "5e0e41b51d7fc7f68ae43692b6fe19b95d7f3a8c",
+                        "installed_by": ["fasta_ltrretriever_lai"]
+                    },
                     "gt/gff3": {
                         "branch": "main",
                         "git_sha": "bfa4874d3942bdff70cb8df17322834125cafb28",
@@ -24,11 +39,36 @@
                         "branch": "main",
                         "git_sha": "cb5fb0be78a98fd1e32b7c90d6adf8c3bf44133e",
                         "installed_by": ["modules"]
+                    },
+                    "ltrfinder": {
+                        "branch": "main",
+                        "git_sha": "835879b8f174bb4d2c5534d0381ffbe62cc1b060",
+                        "installed_by": ["fasta_ltrretriever_lai"]
+                    },
+                    "ltrharvest": {
+                        "branch": "main",
+                        "git_sha": "835879b8f174bb4d2c5534d0381ffbe62cc1b060",
+                        "installed_by": ["fasta_ltrretriever_lai"]
+                    },
+                    "ltrretriever/lai": {
+                        "branch": "main",
+                        "git_sha": "154661d7c1769532ff7b5f11259644ec200dd47d",
+                        "installed_by": ["fasta_ltrretriever_lai"]
+                    },
+                    "ltrretriever/ltrretriever": {
+                        "branch": "main",
+                        "git_sha": "835879b8f174bb4d2c5534d0381ffbe62cc1b060",
+                        "installed_by": ["fasta_ltrretriever_lai"]
                     }
                 }
             },
             "subworkflows": {
                 "pfr": {
+                    "fasta_ltrretriever_lai": {
+                        "branch": "main",
+                        "git_sha": "154661d7c1769532ff7b5f11259644ec200dd47d",
+                        "installed_by": ["subworkflows"]
+                    },
                     "gff3_validate": {
                         "branch": "main",
                         "git_sha": "f9b96bf8142a01f0649ff90570fb10aa973504b9",

diff --git a/modules/pfr/cat/cat/environment.yml b/modules/pfr/cat/cat/environment.yml
@@ -0,0 +1,7 @@
+name: cat_cat
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - conda-forge::pigz=2.3.4
diff --git a/modules/pfr/cat/cat/main.nf b/modules/pfr/cat/cat/main.nf
@@ -0,0 +1,79 @@
+process CAT_CAT {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/pigz:2.3.4' :
+        'biocontainers/pigz:2.3.4' }"
+
+    input:
+    tuple val(meta), path(files_in)
+
+    output:
+    tuple val(meta), path("${prefix}"), emit: file_out
+    path "versions.yml"               , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def args2 = task.ext.args2 ?: ''
+    def file_list = files_in.collect { it.toString() }
+
+    // choose appropriate concatenation tool depending on input and output format
+
+    // | input     | output     | command1 | command2 |
+    // |-----------|------------|----------|----------|
+    // | gzipped   | gzipped    | cat      |          |
+    // | ungzipped | ungzipped  | cat      |          |
+    // | gzipped   | ungzipped  | zcat     |          |
+    // | ungzipped | gzipped    | cat      | pigz     |
+
+    // Use input file ending as default
+    prefix   = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}"
+    out_zip  = prefix.endsWith('.gz')
+    in_zip   = file_list[0].endsWith('.gz')
+    command1 = (in_zip && !out_zip) ? 'zcat' : 'cat'
+    command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : ''
+    if(file_list.contains(prefix.trim())) {
+        error "The name of the input file can't be the same as for the output prefix in the " +
+        "module CAT_CAT (currently `$prefix`). Please choose a different one."
+    }
+    """
+    $command1 \\
+        $args \\
+        ${file_list.join(' ')} \\
+        $command2 \\
+        > ${prefix}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
+    END_VERSIONS
+    """
+
+    stub:
+    def file_list   = files_in.collect { it.toString() }
+    prefix          = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}"
+    if(file_list.contains(prefix.trim())) {
+        error "The name of the input file can't be the same as for the output prefix in the " +
+        "module CAT_CAT (currently `$prefix`). Please choose a different one."
+    }
+    """
+    touch $prefix
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
+    END_VERSIONS
+    """
+}
+
+// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz
+def getFileSuffix(filename) {
+    def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/
+    return match ? match[0][1] : filename.substring(filename.lastIndexOf('.'))
+}
+
diff --git a/modules/pfr/cat/cat/meta.yml b/modules/pfr/cat/cat/meta.yml
@@ -0,0 +1,36 @@
+name: cat_cat
+description: A module for concatenation of gzipped or uncompressed files
+keywords:
+  - concatenate
+  - gzip
+  - cat
+tools:
+  - cat:
+      description: Just concatenation
+      documentation: https://man7.org/linux/man-pages/man1/cat.1.html
+      licence: ["GPL-3.0-or-later"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - files_in:
+      type: file
+      description: List of compressed / uncompressed files
+      pattern: "*"
+output:
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - file_out:
+      type: file
+      description: Concatenated file. Will be gzipped if file_out ends with ".gz"
+      pattern: "${file_out}"
+authors:
+  - "@erikrikarddaniel"
+  - "@FriederikeHanssen"
+maintainers:
+  - "@erikrikarddaniel"
+  - "@FriederikeHanssen"
diff --git a/modules/pfr/cat/cat/tests/main.nf.test b/modules/pfr/cat/cat/tests/main.nf.test
@@ -0,0 +1,177 @@
+nextflow_process {
+
+    name "Test Process CAT_CAT"
+    script "../main.nf"
+    process "CAT_CAT"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "cat"
+    tag "cat/cat"
+
+    test("test_cat_name_conflict") {
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'genome', single_end:true ],
+                        [
+                            file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert !process.success },
+                { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }
+            )
+        }
+    }
+
+    test("test_cat_unzipped_unzipped") {
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+
+    test("test_cat_zipped_zipped") {
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("test_cat_zipped_unzipped") {
+        config './nextflow_zipped_unzipped.config'
+
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("test_cat_unzipped_zipped") {
+        config './nextflow_unzipped_zipped.config'
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
+                            file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("test_cat_one_file_unzipped_zipped") {
+        config './nextflow_unzipped_zipped.config'
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
+            assertAll(
+                { assert process.success },
+                { assert snapshot(lines[0..5]).match("test_cat_one_file_unzipped_zipped_lines") },
+                { assert snapshot(lines.size()).match("test_cat_one_file_unzipped_zipped_size")}
+            )
+        }
+    }
+}
+