nf-core · georgiakes · Dec 10, 2025 · Dec 3, 2025 · Dec 4, 2025 · Dec 9, 2025
diff --git a/modules/nf-core/picard/collectvariantcallingmetrics/environment.yml b/modules/nf-core/picard/collectvariantcallingmetrics/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - "bioconda::picard=3.4.0"
diff --git a/modules/nf-core/picard/collectvariantcallingmetrics/main.nf b/modules/nf-core/picard/collectvariantcallingmetrics/main.nf
@@ -0,0 +1,64 @@
+process PICARD_COLLECTVARIANTCALLINGMETRICS {
+    tag "${meta.id}"
+    label 'process_low'
+
+    conda "${moduleDir}/environment.yml"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+        ? 'https://depot.galaxyproject.org/singularity/picard:3.4.0--hdfd78af_0'
+        : 'biocontainers/picard:3.4.0--hdfd78af_0'}"
+
+    input:
+    tuple val(meta), path(vcf), path(index), path(intervals_file), path(fasta), path(dict), path(dbsnp), path(dbsnp_index)
+
+    output:
+    tuple val(meta), path("*.variant_calling_detail_metrics"),  emit: detail_metrics
+    tuple val(meta), path("*.variant_calling_summary_metrics"), emit: summary_metrics
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : ""
+    def intervals = intervals_file ? "--TARGET_INTERVALS ${intervals_file}" : ""
+
+    def avail_mem = 3072
+    if (!task.memory) {
+        log.info('[Picard CollectVariantCallingMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.')
+    }
+    else {
+        avail_mem = (task.memory.mega * 0.8).intValue()
+    }
+    """
+    picard \\
+        -Xmx${avail_mem}M \\
+        CollectVariantCallingMetrics \\
+        ${args} \\
+        --THREAD_COUNT ${task.cpus} \\
+        --INPUT ${vcf} \\
+        --OUTPUT ${prefix} \\
+        --DBSNP ${dbsnp} \\
+        ${reference} \\
+        --TMP_DIR . \\
+        ${intervals} \\
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        picard: \$(echo \$(picard CollectVariantCallingMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:)
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.variant_calling_detail_metrics
+    touch ${prefix}.variant_calling_summary_metrics
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        picard: \$(echo \$(picard CollectVariantCallingMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:)
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/picard/collectvariantcallingmetrics/meta.yml b/modules/nf-core/picard/collectvariantcallingmetrics/meta.yml
@@ -0,0 +1,88 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "picard_collectvariantcallingmetrics"
+description: Collects per-sample and aggregate (spanning all samples) metrics from the provided VCF file
+keywords:
+  - vcf
+  - metrics
+  - variant calling
+  - statistics
+tools:
+  - "picard":
+      description: "A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) data and formats such as SAM/BAM/CRAM and VCF."
+      homepage: "https://broadinstitute.github.io/picard/"
+      documentation: "https://broadinstitute.github.io/picard/"
+      tool_dev_url: "https://github.com/broadinstitute/picard"
+      licence: ["MIT"]
+      identifier: biotools:picard_tools
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1' ]`
+    - vcf:
+        type: file
+        description: Input VCF file for analysis
+        pattern: "*.{vcf,vcf.gz}"
+        ontologies:
+          - edam: "http://edamontology.org/format_3016" # VCF
+    - index:
+        type: file
+        description: Index file for the input VCF file
+        pattern: "*.{idx,tbi}"
+    - intervals_file:
+        type: file
+        description: Optional BED file specifying target intervals
+        pattern: "*.{bed,bed.gz,intervals_list}"
+    - fasta:
+        type: file
+        description: Reference sequence file
+        pattern: "*.{fasta,fa,fasta.gz,fa.gz}"
+        ontologies:
+          - edam: "http://edamontology.org/format_1929" # FASTA
+    - dict:
+        type: file
+        description: Reference sequence dictionary file
+        pattern: "*.{dict}"
+    - dbsnp:
+        type: file
+        description: Reference dbSNP file in dbSNP or VCF format
+        pattern: "*.{vcf,vcf.gz}"
+        ontologies:
+          - edam: "http://edamontology.org/format_3016" # VCF
+    - dbsnp_index:
+        type: file
+        description: Reference dbSNP file in dbSNP or VCF format
+        pattern: "*.{idx,tbi}"
+output:
+  detail_metrics:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "*.variant_calling_detail_metrics":
+          type: file
+          description: Detailed variant calling metrics file
+          pattern: "*.variant_calling_detail_metrics"
+  summary_metrics:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "*.variant_calling_summary_metrics":
+          type: file
+          description: Summary variant calling metrics file
+          pattern: "*.variant_calling_summary_metrics"
+  versions:
+    - versions.yml:
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+
+authors:
+  - "@georgiakes"
+maintainers:
+  - "@georgiakes"
diff --git a/modules/nf-core/picard/collectvariantcallingmetrics/tests/main.nf.test b/modules/nf-core/picard/collectvariantcallingmetrics/tests/main.nf.test
@@ -0,0 +1,130 @@
+nextflow_process {
+
+    name "Test Process PICARD_COLLECTVARIANTCALLINGMETRICS"
+    script "../main.nf"
+    process "PICARD_COLLECTVARIANTCALLINGMETRICS"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "picard"
+    tag "picard/collectvariantcallingmetrics"
+
+    test("homo_sapiens - vcf_gz") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true),
+                    [],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    path(process.out.detail_metrics[0][1]).readLines()[5..7].join('\n').md5(),
+                    path(process.out.summary_metrics[0][1]).readLines()[5..7].join('\n').md5(),
+                    process.out.versions
+                ).match() }
+            )
+        }
+    }
+
+    test("homo_sapiens - vcf_gz - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true),
+                    [],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+    test("homo_sapiens - vcf_gz - intervals"){
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.interval_list',checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    path(process.out.detail_metrics[0][1]).readLines()[5..7].join('\n').md5(),
+                    path(process.out.summary_metrics[0][1]).readLines()[5..7].join('\n').md5(),
+                    process.out.versions
+                ).match() }
+            )
+        }
+    }
+
+    test("homo_sapiens - vcf_gz - intervals - stub"){
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.interval_list', checkIfExists: true ),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+}