Skip to content

Commit

Permalink
Added fasta_ltrretriever_lai
Browse files Browse the repository at this point in the history
  • Loading branch information
GallVp committed Feb 22, 2024
1 parent 92c5da1 commit d6ba080
Show file tree
Hide file tree
Showing 58 changed files with 3,168 additions and 7 deletions.
8 changes: 4 additions & 4 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,8 @@ process {
]
}

withName: EDTA_LTRHARVEST {
ext.prefix = { "${meta.id}_edta_ltrharvest" }
withName: LTRHARVEST {
ext.prefix = { "${meta.id}_ltrharvest" }
}

withName: LTRFINDER {
Expand All @@ -196,7 +196,7 @@ process {
ext.prefix = { "${meta.id}_ltrharvest_ltrfinder.tabout" }
}

withName: LTRRETRIEVER {
withName: LTRRETRIEVER_LTRRETRIEVER {
publishDir = [
path: { "${params.outdir}/lai" },
mode: params.publish_dir_mode,
Expand All @@ -212,7 +212,7 @@ process {
]
}

withName: LAI {
withName: LTRRETRIEVER_LAI {
publishDir = [
path: { "${params.outdir}/lai" },
mode: params.publish_dir_mode,
Expand Down
40 changes: 40 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,26 @@
"git@github.com:PlantandFoodResearch/nxf-modules.git": {
"modules": {
"pfr": {
"cat/cat": {
"branch": "main",
"git_sha": "4b9da80b1e4c16067babd97554bea42d7cd9ca85",
"installed_by": ["fasta_ltrretriever_lai"]
},
"custom/checkgff3fastacorrespondence": {
"branch": "main",
"git_sha": "1a76f884082c786760559c462063a5d1de94ca83",
"installed_by": ["gff3_validate"]
},
"custom/restoregffids": {
"branch": "main",
"git_sha": "e9f6bdd634bdbcd52c5568ba82f16176ec06631f",
"installed_by": ["fasta_ltrretriever_lai"]
},
"custom/shortenfastaids": {
"branch": "main",
"git_sha": "5e0e41b51d7fc7f68ae43692b6fe19b95d7f3a8c",
"installed_by": ["fasta_ltrretriever_lai"]
},
"gt/gff3": {
"branch": "main",
"git_sha": "bfa4874d3942bdff70cb8df17322834125cafb28",
Expand All @@ -24,11 +39,36 @@
"branch": "main",
"git_sha": "cb5fb0be78a98fd1e32b7c90d6adf8c3bf44133e",
"installed_by": ["modules"]
},
"ltrfinder": {
"branch": "main",
"git_sha": "835879b8f174bb4d2c5534d0381ffbe62cc1b060",
"installed_by": ["fasta_ltrretriever_lai"]
},
"ltrharvest": {
"branch": "main",
"git_sha": "835879b8f174bb4d2c5534d0381ffbe62cc1b060",
"installed_by": ["fasta_ltrretriever_lai"]
},
"ltrretriever/lai": {
"branch": "main",
"git_sha": "154661d7c1769532ff7b5f11259644ec200dd47d",
"installed_by": ["fasta_ltrretriever_lai"]
},
"ltrretriever/ltrretriever": {
"branch": "main",
"git_sha": "835879b8f174bb4d2c5534d0381ffbe62cc1b060",
"installed_by": ["fasta_ltrretriever_lai"]
}
}
},
"subworkflows": {
"pfr": {
"fasta_ltrretriever_lai": {
"branch": "main",
"git_sha": "154661d7c1769532ff7b5f11259644ec200dd47d",
"installed_by": ["subworkflows"]
},
"gff3_validate": {
"branch": "main",
"git_sha": "f9b96bf8142a01f0649ff90570fb10aa973504b9",
Expand Down
7 changes: 7 additions & 0 deletions modules/pfr/cat/cat/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: cat_cat
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- conda-forge::pigz=2.3.4
79 changes: 79 additions & 0 deletions modules/pfr/cat/cat/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
process CAT_CAT {
tag "$meta.id"
label 'process_low'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pigz:2.3.4' :
'biocontainers/pigz:2.3.4' }"

input:
tuple val(meta), path(files_in)

output:
tuple val(meta), path("${prefix}"), emit: file_out
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def file_list = files_in.collect { it.toString() }

// choose appropriate concatenation tool depending on input and output format

// | input | output | command1 | command2 |
// |-----------|------------|----------|----------|
// | gzipped | gzipped | cat | |
// | ungzipped | ungzipped | cat | |
// | gzipped | ungzipped | zcat | |
// | ungzipped | gzipped | cat | pigz |

// Use input file ending as default
prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}"
out_zip = prefix.endsWith('.gz')
in_zip = file_list[0].endsWith('.gz')
command1 = (in_zip && !out_zip) ? 'zcat' : 'cat'
command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : ''
if(file_list.contains(prefix.trim())) {
error "The name of the input file can't be the same as for the output prefix in the " +
"module CAT_CAT (currently `$prefix`). Please choose a different one."
}
"""
$command1 \\
$args \\
${file_list.join(' ')} \\
$command2 \\
> ${prefix}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
END_VERSIONS
"""

stub:
def file_list = files_in.collect { it.toString() }
prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}"
if(file_list.contains(prefix.trim())) {
error "The name of the input file can't be the same as for the output prefix in the " +
"module CAT_CAT (currently `$prefix`). Please choose a different one."
}
"""
touch $prefix
cat <<-END_VERSIONS > versions.yml
"${task.process}":
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
END_VERSIONS
"""
}

// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz
def getFileSuffix(filename) {
def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/
return match ? match[0][1] : filename.substring(filename.lastIndexOf('.'))
}

36 changes: 36 additions & 0 deletions modules/pfr/cat/cat/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: cat_cat
description: A module for concatenation of gzipped or uncompressed files
keywords:
- concatenate
- gzip
- cat
tools:
- cat:
description: Just concatenation
documentation: https://man7.org/linux/man-pages/man1/cat.1.html
licence: ["GPL-3.0-or-later"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- files_in:
type: file
description: List of compressed / uncompressed files
pattern: "*"
output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- file_out:
type: file
description: Concatenated file. Will be gzipped if file_out ends with ".gz"
pattern: "${file_out}"
authors:
- "@erikrikarddaniel"
- "@FriederikeHanssen"
maintainers:
- "@erikrikarddaniel"
- "@FriederikeHanssen"
177 changes: 177 additions & 0 deletions modules/pfr/cat/cat/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
nextflow_process {

name "Test Process CAT_CAT"
script "../main.nf"
process "CAT_CAT"
tag "modules"
tag "modules_nfcore"
tag "cat"
tag "cat/cat"

test("test_cat_name_conflict") {
when {
params {
outdir = "${outputDir}"
}
process {
"""
input[0] =
[
[ id:'genome', single_end:true ],
[
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
]
]
"""
}
}
then {
assertAll(
{ assert !process.success },
{ assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }
)
}
}

test("test_cat_unzipped_unzipped") {
when {
params {
outdir = "${outputDir}"
}
process {
"""
input[0] =
[
[ id:'test', single_end:true ],
[
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
]
]
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}


test("test_cat_zipped_zipped") {
when {
params {
outdir = "${outputDir}"
}
process {
"""
input[0] =
[
[ id:'test', single_end:true ],
[
file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true)
]
]
"""
}
}
then {
def lines = path(process.out.file_out.get(0).get(1)).linesGzip
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("test_cat_zipped_unzipped") {
config './nextflow_zipped_unzipped.config'

when {
params {
outdir = "${outputDir}"
}
process {
"""
input[0] =
[
[ id:'test', single_end:true ],
[
file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true)
]
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("test_cat_unzipped_zipped") {
config './nextflow_unzipped_zipped.config'
when {
params {
outdir = "${outputDir}"
}
process {
"""
input[0] =
[
[ id:'test', single_end:true ],
[
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
]
]
"""
}
}
then {
def lines = path(process.out.file_out.get(0).get(1)).linesGzip
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("test_cat_one_file_unzipped_zipped") {
config './nextflow_unzipped_zipped.config'
when {
params {
outdir = "${outputDir}"
}
process {
"""
input[0] =
[
[ id:'test', single_end:true ],
[
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
]
"""
}
}
then {
def lines = path(process.out.file_out.get(0).get(1)).linesGzip
assertAll(
{ assert process.success },
{ assert snapshot(lines[0..5]).match("test_cat_one_file_unzipped_zipped_lines") },
{ assert snapshot(lines.size()).match("test_cat_one_file_unzipped_zipped_size")}
)
}
}
}

Loading

0 comments on commit d6ba080

Please sign in to comment.