Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::picard=3.4.0"
64 changes: 64 additions & 0 deletions modules/nf-core/picard/collectvariantcallingmetrics/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
process PICARD_COLLECTVARIANTCALLINGMETRICS {
tag "${meta.id}"
label 'process_low'

conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://depot.galaxyproject.org/singularity/picard:3.4.0--hdfd78af_0'
: 'biocontainers/picard:3.4.0--hdfd78af_0'}"

input:
tuple val(meta), path(vcf), path(index), path(intervals_file), path(fasta), path(dict), path(dbsnp), path(dbsnp_index)

output:
tuple val(meta), path("*.variant_calling_detail_metrics"), emit: detail_metrics
tuple val(meta), path("*.variant_calling_summary_metrics"), emit: summary_metrics
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : ""
def intervals = intervals_file ? "--TARGET_INTERVALS ${intervals_file}" : ""

def avail_mem = 3072
if (!task.memory) {
log.info('[Picard CollectVariantCallingMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.')
}
else {
avail_mem = (task.memory.mega * 0.8).intValue()
}
"""
picard \\
-Xmx${avail_mem}M \\
CollectVariantCallingMetrics \\
${args} \\
--THREAD_COUNT ${task.cpus} \\
--INPUT ${vcf} \\
--OUTPUT ${prefix} \\
--DBSNP ${dbsnp} \\
${reference} \\
--TMP_DIR . \\
${intervals} \\

cat <<-END_VERSIONS > versions.yml
"${task.process}":
picard: \$(echo \$(picard CollectVariantCallingMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:)
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.variant_calling_detail_metrics
touch ${prefix}.variant_calling_summary_metrics

cat <<-END_VERSIONS > versions.yml
"${task.process}":
picard: \$(echo \$(picard CollectVariantCallingMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:)
END_VERSIONS
"""
}
88 changes: 88 additions & 0 deletions modules/nf-core/picard/collectvariantcallingmetrics/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "picard_collectvariantcallingmetrics"
description: Collects per-sample and aggregate (spanning all samples) metrics from the provided VCF file
keywords:
- vcf
- metrics
- variant calling
- statistics
tools:
- "picard":
description: "A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) data and formats such as SAM/BAM/CRAM and VCF."
homepage: "https://broadinstitute.github.io/picard/"
documentation: "https://broadinstitute.github.io/picard/"
tool_dev_url: "https://github.com/broadinstitute/picard"
licence: ["MIT"]
identifier: biotools:picard_tools

input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- vcf:
type: file
description: Input VCF file for analysis
pattern: "*.{vcf,vcf.gz}"
ontologies:
- edam: "http://edamontology.org/format_3016" # VCF
- index:
type: file
description: Index file for the input VCF file
pattern: "*.{idx,tbi}"
- intervals_file:
type: file
description: Optional BED file specifying target intervals
pattern: "*.{bed,bed.gz,intervals_list}"
- fasta:
type: file
description: Reference sequence file
pattern: "*.{fasta,fa,fasta.gz,fa.gz}"
ontologies:
- edam: "http://edamontology.org/format_1929" # FASTA
- dict:
type: file
description: Reference sequence dictionary file
pattern: "*.{dict}"
- dbsnp:
type: file
description: Reference dbSNP file in dbSNP or VCF format
pattern: "*.{vcf,vcf.gz}"
ontologies:
- edam: "http://edamontology.org/format_3016" # VCF
- dbsnp_index:
type: file
description: Reference dbSNP file in dbSNP or VCF format
pattern: "*.{idx,tbi}"
output:
detail_metrics:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "*.variant_calling_detail_metrics":
type: file
description: Detailed variant calling metrics file
pattern: "*.variant_calling_detail_metrics"
summary_metrics:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "*.variant_calling_summary_metrics":
type: file
description: Summary variant calling metrics file
pattern: "*.variant_calling_summary_metrics"
versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@georgiakes"
maintainers:
- "@georgiakes"
130 changes: 130 additions & 0 deletions modules/nf-core/picard/collectvariantcallingmetrics/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
nextflow_process {

name "Test Process PICARD_COLLECTVARIANTCALLINGMETRICS"
script "../main.nf"
process "PICARD_COLLECTVARIANTCALLINGMETRICS"

tag "modules"
tag "modules_nfcore"
tag "picard"
tag "picard/collectvariantcallingmetrics"

test("homo_sapiens - vcf_gz") {

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true),
[],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
path(process.out.detail_metrics[0][1]).readLines()[5..7].join('\n').md5(),
path(process.out.summary_metrics[0][1]).readLines()[5..7].join('\n').md5(),
process.out.versions
).match() }
)
}
}

test("homo_sapiens - vcf_gz - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true),
[],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}
test("homo_sapiens - vcf_gz - intervals"){

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.interval_list',checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
path(process.out.detail_metrics[0][1]).readLines()[5..7].join('\n').md5(),
path(process.out.summary_metrics[0][1]).readLines()[5..7].join('\n').md5(),
process.out.versions
).match() }
)
}
}

test("homo_sapiens - vcf_gz - intervals - stub"){

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.interval_list', checkIfExists: true ),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}
}
Loading
Loading