From 566f67a9c982a705d15794401eb98b5ecaf0436c Mon Sep 17 00:00:00 2001 From: Fernando Duarte <123090819+FernandoDuarteF@users.noreply.github.com> Date: Tue, 13 May 2025 19:31:22 +0000 Subject: [PATCH 1/4] Input declaration and added plink/vcf --- assets/samplesheet.csv | 5 +- assets/schema_input.json | 16 +- modules.json | 5 + modules/nf-core/plink/vcf/environment.yml | 7 + modules/nf-core/plink/vcf/main.nf | 54 +++++++ modules/nf-core/plink/vcf/meta.yml | 68 +++++++++ modules/nf-core/plink/vcf/tests/main.nf.test | 57 +++++++ .../nf-core/plink/vcf/tests/main.nf.test.snap | 144 ++++++++++++++++++ .../nf-core/plink/vcf/tests/nextflow.config | 5 + .../local/utils_nfcore_gwas_pipeline/main.nf | 23 +-- workflows/gwas.nf | 15 ++ 11 files changed, 369 insertions(+), 30 deletions(-) create mode 100644 modules/nf-core/plink/vcf/environment.yml create mode 100644 modules/nf-core/plink/vcf/main.nf create mode 100644 modules/nf-core/plink/vcf/meta.yml create mode 100644 modules/nf-core/plink/vcf/tests/main.nf.test create mode 100644 modules/nf-core/plink/vcf/tests/main.nf.test.snap create mode 100644 modules/nf-core/plink/vcf/tests/nextflow.config diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 5f653ab..7cac86e 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,2 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, +id,vcf,pheno,cov +test,https://github.com/nf-core/test-datasets/blob/gwas/data/data_shrink_combined_4500/chr1_to_22_and_X.vcf.bgz,https://github.com/nf-core/test-datasets/blob/gwas/data/data_phenotypes_and_covariates/example1.pheno,https://github.com/nf-core/test-datasets/blob/gwas/data/data_phenotypes_and_covariates/example1.covar diff --git a/assets/schema_input.json b/assets/schema_input.json index 6011b20..0e2b67e 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -7,27 +7,31 @@ "items": { "type": "object", "properties": { - "sample": { + "id": { "type": "string", "pattern": "^\\S+$", "errorMessage": "Sample name must be provided and cannot contain spaces", "meta": ["id"] }, - "fastq_1": { + "vcf": { "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, - "fastq_2": { + "pheno": { + "type": "string", + "format": "file-path", + "exists": true, + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "cov": { "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" } }, - "required": ["sample", "fastq_1"] + "required": ["id", "vcf", "pheno"] } } diff --git a/modules.json b/modules.json index 5ea2d55..9d3d943 100644 --- a/modules.json +++ b/modules.json @@ -9,6 +9,11 @@ "branch": "master", "git_sha": "f0719ae309075ae4a291533883847c3f7c441dad", "installed_by": ["modules"] + }, + "plink/vcf": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"] } } }, diff --git a/modules/nf-core/plink/vcf/environment.yml b/modules/nf-core/plink/vcf/environment.yml new file mode 100644 index 0000000..fe7bb86 --- /dev/null +++ b/modules/nf-core/plink/vcf/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::plink=1.90b6.21 diff --git a/modules/nf-core/plink/vcf/main.nf b/modules/nf-core/plink/vcf/main.nf new file mode 100644 index 0000000..8419be1 --- /dev/null +++ b/modules/nf-core/plink/vcf/main.nf @@ -0,0 +1,54 @@ +process PLINK_VCF { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/plink:1.90b6.21--h779adbc_1' : + 'biocontainers/plink:1.90b6.21--h779adbc_1' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.bed"), emit: bed, optional: true + tuple val(meta), path("*.bim"), emit: bim, optional: true + tuple val(meta), path("*.fam"), emit: fam, optional: true + + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + plink \\ + --vcf ${vcf} \\ + $args \\ + --threads $task.cpus \\ + --out ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + plink: \$(echo \$(plink --version 2>&1) | sed 's/^PLINK v//' | sed 's/..-bit.*//' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.bed + touch ${prefix}.bim + touch ${prefix}.fam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + plink: \$(echo \$(plink --version 2>&1) | sed 's/^PLINK v//' | sed 's/..-bit.*//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/plink/vcf/meta.yml b/modules/nf-core/plink/vcf/meta.yml new file mode 100644 index 0000000..d6eefd9 --- /dev/null +++ b/modules/nf-core/plink/vcf/meta.yml @@ -0,0 +1,68 @@ +name: plink_vcf +description: Analyses variant calling files using plink +keywords: + - plink + - vcf + - variant + - call +tools: + - plink: + description: | + Whole genome association analysis toolset, designed to perform a range + of basic, large-scale analyses in a computationally efficient manner + homepage: "https://www.cog-genomics.org/plink" + tool_dev_url: "https://www.cog-genomics.org/plink/1.9/dev" + licence: ["GPL"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: Variant calling file (vcf) + pattern: "*.{vcf}" +output: + - bed: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bed": + type: file + description: PLINK binary biallelic genotype table + pattern: "*.{bed}" + - bim: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bim": + type: file + description: PLINK extended MAP file + pattern: "*.{bim}" + - fam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fam": + type: file + description: PLINK sample information file + pattern: "*.{fam}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Mxrcon" + - "@abhi18av" +maintainers: + - "@Mxrcon" + - "@abhi18av" diff --git a/modules/nf-core/plink/vcf/tests/main.nf.test b/modules/nf-core/plink/vcf/tests/main.nf.test new file mode 100644 index 0000000..f091ac3 --- /dev/null +++ b/modules/nf-core/plink/vcf/tests/main.nf.test @@ -0,0 +1,57 @@ + +nextflow_process { + + name "Test Process PLINK_VCF" + script "../main.nf" + process "PLINK_VCF" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "plink" + tag "plink/vcf" + + test("test-plink-vcf") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test-plink-vcf-stub") { + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/plink/vcf/tests/main.nf.test.snap b/modules/nf-core/plink/vcf/tests/main.nf.test.snap new file mode 100644 index 0000000..bd25aad --- /dev/null +++ b/modules/nf-core/plink/vcf/tests/main.nf.test.snap @@ -0,0 +1,144 @@ +{ + "test-plink-vcf-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bim:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,e8954e602ae53e958694bbc72e67abca" + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bim": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bim:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e8954e602ae53e958694bbc72e67abca" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-29T15:53:07.531154" + }, + "test-plink-vcf": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,55c3ab2636212911b5f952ef6f5d855c" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bim:md5,54164b6f103e152de05712c6bb317db8" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fam:md5,0c6d4ffd228a3c5248b5a73eef84b9cc" + ] + ], + "3": [ + "versions.yml:md5,e8954e602ae53e958694bbc72e67abca" + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,55c3ab2636212911b5f952ef6f5d855c" + ] + ], + "bim": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bim:md5,54164b6f103e152de05712c6bb317db8" + ] + ], + "fam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fam:md5,0c6d4ffd228a3c5248b5a73eef84b9cc" + ] + ], + "versions": [ + "versions.yml:md5,e8954e602ae53e958694bbc72e67abca" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-29T15:53:02.837603" + } +} \ No newline at end of file diff --git a/modules/nf-core/plink/vcf/tests/nextflow.config b/modules/nf-core/plink/vcf/tests/nextflow.config new file mode 100644 index 0000000..bb80765 --- /dev/null +++ b/modules/nf-core/plink/vcf/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: PLINK_VCF { + ext.args = ' --make-bed --biallelic-only strict --vcf-half-call missing --double-id --recode ped --id-delim \'=\' --allow-extra-chr' + } +} diff --git a/subworkflows/local/utils_nfcore_gwas_pipeline/main.nf b/subworkflows/local/utils_nfcore_gwas_pipeline/main.nf index 97f52c5..a72e36e 100644 --- a/subworkflows/local/utils_nfcore_gwas_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_gwas_pipeline/main.nf @@ -69,22 +69,9 @@ workflow PIPELINE_INITIALISATION { Channel .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) - .map { - meta, fastq_1, fastq_2 -> - if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] - } - } - .groupTuple() .map { samplesheet -> validateInputSamplesheet(samplesheet) } - .map { - meta, fastqs -> - return [ meta, fastqs.flatten() ] - } .set { ch_samplesheet } emit: @@ -150,15 +137,9 @@ workflow PIPELINE_COMPLETION { // Validate channels from input samplesheet // def validateInputSamplesheet(input) { - def (metas, fastqs) = input[1..2] - - // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end - def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1 - if (!endedness_ok) { - error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") - } + def (meta, vcf, pheno, cov) = input - return [ metas[0], fastqs ] + return [ meta, vcf, pheno, cov ] } // // Generate methods description for MultiQC diff --git a/workflows/gwas.nf b/workflows/gwas.nf index c35b58d..3651464 100644 --- a/workflows/gwas.nf +++ b/workflows/gwas.nf @@ -8,6 +8,7 @@ include { paramsSummaryMap } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_gwas_pipeline' +include { PLINK_VCF } from '../modules/nf-core/plink/vcf/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -24,6 +25,20 @@ workflow GWAS { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() + // Prob multiMap is not necessary but I'll use it in case + // it's helpful in the future + ch_inputs = ch_samplesheet + | multiMap { + meta, vcf, pheno, cov -> + vcf : [ meta,vcf ] + pheno : [ meta, pheno] + cov : cov ? [ meta,cov ] : null + } + + PLINK_VCF ( + ch_inputs.vcf + ) + // // Collate and save software versions // From 8d846278c9b50127b3b35174f0dee96a071b518c Mon Sep 17 00:00:00 2001 From: Fernando Duarte <123090819+FernandoDuarteF@users.noreply.github.com> Date: Tue, 13 May 2025 20:22:03 +0000 Subject: [PATCH 2/4] Module plink/vcf can be run --- assets/samplesheet.csv | 2 +- subworkflows/local/utils_nfcore_gwas_pipeline/main.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 7cac86e..efbba4d 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,2 +1,2 @@ id,vcf,pheno,cov -test,https://github.com/nf-core/test-datasets/blob/gwas/data/data_shrink_combined_4500/chr1_to_22_and_X.vcf.bgz,https://github.com/nf-core/test-datasets/blob/gwas/data/data_phenotypes_and_covariates/example1.pheno,https://github.com/nf-core/test-datasets/blob/gwas/data/data_phenotypes_and_covariates/example1.covar +test,https://raw.githubusercontent.com/nf-core/test-datasets/gwas/data/data_shrink_combined_4500/chr1_to_22_and_X.vcf.bgz,https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/gwas/data/data_phenotypes_and_covariates/example1.pheno,https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/gwas/data/data_phenotypes_and_covariates/example1.covar diff --git a/subworkflows/local/utils_nfcore_gwas_pipeline/main.nf b/subworkflows/local/utils_nfcore_gwas_pipeline/main.nf index a72e36e..82b1bb1 100644 --- a/subworkflows/local/utils_nfcore_gwas_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_gwas_pipeline/main.nf @@ -138,7 +138,7 @@ workflow PIPELINE_COMPLETION { // def validateInputSamplesheet(input) { def (meta, vcf, pheno, cov) = input - + return [ meta, vcf, pheno, cov ] } // From 1b1e2c6bd844fd9db325460306d9fa798a68933e Mon Sep 17 00:00:00 2001 From: Fernando Duarte <123090819+FernandoDuarteF@users.noreply.github.com> Date: Wed, 14 May 2025 16:39:11 +0000 Subject: [PATCH 3/4] Added plink association --- conf/modules.config | 7 + modules.json | 5 + modules/nf-core/plink/gwas/environment.yml | 7 + modules/nf-core/plink/gwas/main.nf | 94 +++++++++ modules/nf-core/plink/gwas/meta.yml | 108 ++++++++++ modules/nf-core/plink/gwas/tests/main.nf.test | 198 ++++++++++++++++++ .../plink/gwas/tests/main.nf.test.snap | 134 ++++++++++++ .../nf-core/plink/gwas/tests/nextflow.config | 7 + .../plink/gwas/tests/pheno_name.config | 7 + modules/nf-core/plink/vcf/main.nf | 5 +- workflows/gwas.nf | 28 ++- 11 files changed, 596 insertions(+), 4 deletions(-) create mode 100644 modules/nf-core/plink/gwas/environment.yml create mode 100644 modules/nf-core/plink/gwas/main.nf create mode 100644 modules/nf-core/plink/gwas/meta.yml create mode 100644 modules/nf-core/plink/gwas/tests/main.nf.test create mode 100644 modules/nf-core/plink/gwas/tests/main.nf.test.snap create mode 100644 modules/nf-core/plink/gwas/tests/nextflow.config create mode 100644 modules/nf-core/plink/gwas/tests/pheno_name.config diff --git a/conf/modules.config b/conf/modules.config index f0b0d55..dce24e0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -27,4 +27,11 @@ process { ] } + withName: 'PLINK_GWAS' { + ext.args = '--allow-no-sex --linear' + publishDir = [ + path: { "${params.outdir}/plink_gwas" }, + mode: params.publish_dir_mode + ] + } } diff --git a/modules.json b/modules.json index 9d3d943..f12b644 100644 --- a/modules.json +++ b/modules.json @@ -10,6 +10,11 @@ "git_sha": "f0719ae309075ae4a291533883847c3f7c441dad", "installed_by": ["modules"] }, + "plink/gwas": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, "plink/vcf": { "branch": "master", "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", diff --git a/modules/nf-core/plink/gwas/environment.yml b/modules/nf-core/plink/gwas/environment.yml new file mode 100644 index 0000000..fe7bb86 --- /dev/null +++ b/modules/nf-core/plink/gwas/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::plink=1.90b6.21 diff --git a/modules/nf-core/plink/gwas/main.nf b/modules/nf-core/plink/gwas/main.nf new file mode 100644 index 0000000..7e42cfe --- /dev/null +++ b/modules/nf-core/plink/gwas/main.nf @@ -0,0 +1,94 @@ +process PLINK_GWAS { + tag "$meta.id" + label 'process_medium' + + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/plink:1.90b6.21--h031d066_5': + 'biocontainers/plink:1.90b6.21--h031d066_5' }" + + input: + tuple val(meta), path(bed), path(bim), path(fam) + tuple val(meta2), path(vcf) + tuple val(meta3), path(bcf) + tuple val(meta4), path(phe) + + output: + tuple val(meta), path("*.assoc"), emit: assoc, optional:true + tuple val(meta), path("*.qassoc"), emit: qassoc, optional:true + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*.nosex"), emit: nosex , optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = "" + // define input string based on provided input files + // in hierarchical order + def input_command = "" + def outmeta = "" + if (bed){ + input_command = "--bed ${bed} --bim ${bim} --fam ${fam}" + prefix = task.ext.prefix ?: "${meta.id}" + } else if (vcf) { + input_command = "--vcf ${vcf} --pheno ${phe}" + prefix = task.ext.prefix ?: "${meta2.id}" + meta = meta2 + } else if (bcf) { + input_command = "--bcf ${bcf} --pheno ${phe}" + prefix = task.ext.prefix ?: "${meta3.id}" + meta = meta3 + } else { + log.error 'ERROR: the input should be either plink native binary format, VCF or BCF' + } + + """ + plink \\ + $input_command \\ + --threads $task.cpus \\ + --assoc \\ + $args \\ + --out $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + plink: \$(echo \$(plink --version) | sed 's/^PLINK v//;s/64.*//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = "" + // define input string based on provided input files + // in hierarchical order + def input_command = "" + def outmeta = "" + if (bed){ + input_command = "--bed ${bed} --bim ${bim} --fam ${fam}" + prefix = task.ext.prefix ?: "${meta.id}" + } else if (vcf) { + input_command = "--vcf ${vcf}" + prefix = task.ext.prefix ?: "${meta2.id} --pheno ${phe}" + meta = meta2 + } else if (bcf) { + input_command = "--bcf ${bcf} --pheno ${phe}" + prefix = task.ext.prefix ?: "${meta3.id}" + meta = meta3 + } else { + log.error 'ERROR: the input should be either plink native binary format, VCF or BCF' + } + """ + touch ${prefix}.assoc + touch ${prefix}.nosex + touch ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + plink: \$(echo \$(plink --version) | sed 's/^PLINK v//;s/64.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/plink/gwas/meta.yml b/modules/nf-core/plink/gwas/meta.yml new file mode 100644 index 0000000..67a6272 --- /dev/null +++ b/modules/nf-core/plink/gwas/meta.yml @@ -0,0 +1,108 @@ +name: "plink_gwas" +description: Generate GWAS association studies +keywords: + - association + - GWAS + - case/control +tools: + - "plink": + description: "Whole genome association analysis toolset, designed to perform a + range of basic, large-scale analyses in a computationally efficient manner." + homepage: "https://www.cog-genomics.org/plink" + documentation: "https://www.cog-genomics.org/plink/1.9/data#recode" + tool_dev_url: "https://www.cog-genomics.org/plink/1.9/dev" + licence: ["GPL"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + meta is associated to the PLINK native file input + - bed: + type: file + description: PLINK binary biallelic genotype table file + pattern: "*.{bed}" + - bim: + type: file + description: PLINK extended MAP file + pattern: "*.{bim}" + - fam: + type: file + description: PLINK sample information file + pattern: "*.{fam}" + - - meta2: + type: map + description: | + Groovy Map containing sample information, + e.g. [ id:'test', single_end:false ] + meta2 is associated to VCF file input + - vcf: + type: file + description: Variant calling file (vcf) + pattern: "*.{vcf}" + - - meta3: + type: map + description: | + Groovy Map containing sample information, + e.g. [ id:'test', single_end:false ] + meta3 is associated to BCF file input + - bcf: + type: file + description: PLINK variant information + sample ID + genotype call binary file + pattern: "*.{bcf}" + - - meta4: + type: map + description: | + Groovy Map containing sample information, + e.g. [ id:'test', single_end:false ] + meta4 is associated to phenotype file input + - phe: + type: file + description: PLINK file containing phenotype information. This phenotype information + can be read from the third column with the --pheno option or from a specific + column with the --pheno-name option + pattern: "*.{phe}" +output: + - assoc: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.assoc": + type: file + description: PLINK GWAS association file + pattern: "*.{assoc}" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: PLINK GWAS association log file + pattern: "*.{log}" + - nosex: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.nosex": + type: file + description: PLINK GWAS association file that retains phenotypes for samples + with ambiguous sex. Produced with the option --allow-no-sex + pattern: "*.{nosex}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@LorenzoS96" +maintainers: + - "@LorenzoS96" diff --git a/modules/nf-core/plink/gwas/tests/main.nf.test b/modules/nf-core/plink/gwas/tests/main.nf.test new file mode 100644 index 0000000..38236e1 --- /dev/null +++ b/modules/nf-core/plink/gwas/tests/main.nf.test @@ -0,0 +1,198 @@ +nextflow_process { + + name "test Plink GWAS function" + script "../main.nf" + process "PLINK_GWAS" + tag "modules" + tag "modules_nfcore" + tag "plink" + tag "plink/gwas" + + test("plink - VCF") { + + config "./nextflow.config" + + when { + params { + outdir = "test" + } + process { + """ + input[0] = [ [id:"null"], [], [], []] + input[1] = [ + [id:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.vcf.gz', checkIfExists: true) + ] + input[2] = [ [id:"null"], []] + input[3] = [ + [id:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.phe', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.log.get(0).get(1) ==~ ".*/*.log" }, + { assert snapshot( + process.out.assoc, + process.out.nosex, + process.out.versions + ).match() + } + ) + } + } + + test("plink - VCF with phenotype name") { + + config "./pheno_name.config" + + when { + params { + outdir = "test" + } + process { + """ + input[0] = [ [id:"null"], [], [], []] + input[1] = [ + [id:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.vcf.gz', checkIfExists: true) + ] + input[2] = [ [id:"null"], []] + input[3] = [ + [id:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_phenoname.phe', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.log.get(0).get(1) ==~ ".*/*.log" }, + { assert snapshot( + process.out.assoc, + process.out.nosex, + process.out.versions + ).match() + } + ) + } + } + + test("plink - binary") { + + config "./nextflow.config" + + when { + params { + outdir = "test" + } + process { + """ + input[0] = [ + [id:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + input[1] = [ [id:"null"], []] + input[2] = [ [id:"null"], []] + input[3] = [ [id:"null"], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.log.get(0).get(1) ==~ ".*/*.log" }, + { assert snapshot( + process.out.assoc, + process.out.versions + ).match() + } + ) + } + } + + test("plink - BCF") { + + config "./nextflow.config" + + when { + params { + outdir = "test" + } + process { + """ + input[0] = [ [id:"null"], [], [], []] + input[1] = [ [id:"null"], []] + input[2] = [ + [id:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bcf.gz', checkIfExists: true) + ] + input[3] = [ + [id:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.phe', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.log.get(0).get(1) ==~ ".*/*.log" }, + { assert snapshot( + process.out.assoc, + process.out.nosex, + process.out.versions + ).match() + } + ) + } + } + + test("plink - BCF with phenotype name") { + + config "./pheno_name.config" + + when { + params { + outdir = "test" + } + process { + """ + input[0] = [ [id:"null"], [], [], []] + input[1] = [ + [id:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.vcf.gz', checkIfExists: true) + ] + input[2] = [ [id:"null"], []] + input[3] = [ + [id:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_phenoname.phe', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.log.get(0).get(1) ==~ ".*/*.log" }, + { assert snapshot( + process.out.assoc, + process.out.nosex, + process.out.versions + ).match() + } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/plink/gwas/tests/main.nf.test.snap b/modules/nf-core/plink/gwas/tests/main.nf.test.snap new file mode 100644 index 0000000..3a0c750 --- /dev/null +++ b/modules/nf-core/plink/gwas/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "plink - BCF": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.assoc:md5,ab8f91519a605cd9f2b2f89e18c3df9b" + ] + ], + [ + [ + { + "id": "test" + }, + "test.nosex:md5,4f9aa36c44a417ff6d7caa9841e66ad9" + ] + ], + [ + "versions.yml:md5,6201877470ebd48d6b5cb97ca0641641" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-30T18:03:08.363934" + }, + "plink - BCF with phenotype name": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.assoc:md5,ab8f91519a605cd9f2b2f89e18c3df9b" + ] + ], + [ + [ + { + "id": "test" + }, + "test.nosex:md5,4f9aa36c44a417ff6d7caa9841e66ad9" + ] + ], + [ + "versions.yml:md5,6201877470ebd48d6b5cb97ca0641641" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-30T18:01:33.223256" + }, + "plink - VCF with phenotype name": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.assoc:md5,ab8f91519a605cd9f2b2f89e18c3df9b" + ] + ], + [ + [ + { + "id": "test" + }, + "test.nosex:md5,4f9aa36c44a417ff6d7caa9841e66ad9" + ] + ], + [ + "versions.yml:md5,6201877470ebd48d6b5cb97ca0641641" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-30T18:01:15.485141" + }, + "plink - binary": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.assoc:md5,ab8f91519a605cd9f2b2f89e18c3df9b" + ] + ], + [ + "versions.yml:md5,6201877470ebd48d6b5cb97ca0641641" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-30T18:01:22.136918" + }, + "plink - VCF": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.assoc:md5,ab8f91519a605cd9f2b2f89e18c3df9b" + ] + ], + [ + [ + { + "id": "test" + }, + "test.nosex:md5,4f9aa36c44a417ff6d7caa9841e66ad9" + ] + ], + [ + "versions.yml:md5,6201877470ebd48d6b5cb97ca0641641" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-30T18:01:09.222434" + } +} \ No newline at end of file diff --git a/modules/nf-core/plink/gwas/tests/nextflow.config b/modules/nf-core/plink/gwas/tests/nextflow.config new file mode 100644 index 0000000..ed5ce1c --- /dev/null +++ b/modules/nf-core/plink/gwas/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: PLINK_GWAS { + ext.args = '--1 --allow-no-sex' + } + +} diff --git a/modules/nf-core/plink/gwas/tests/pheno_name.config b/modules/nf-core/plink/gwas/tests/pheno_name.config new file mode 100644 index 0000000..5cf1d78 --- /dev/null +++ b/modules/nf-core/plink/gwas/tests/pheno_name.config @@ -0,0 +1,7 @@ +process { + + withName: PLINK_GWAS { + ext.args = '--1 --pheno-name Phenotype --allow-no-sex' + } + +} diff --git a/modules/nf-core/plink/vcf/main.nf b/modules/nf-core/plink/vcf/main.nf index 8419be1..baff00c 100644 --- a/modules/nf-core/plink/vcf/main.nf +++ b/modules/nf-core/plink/vcf/main.nf @@ -1,6 +1,6 @@ process PLINK_VCF { tag "$meta.id" - label 'process_medium' + label 'process_low' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -9,6 +9,7 @@ process PLINK_VCF { input: tuple val(meta), path(vcf) + tuple val(meta2), path(pheno) output: tuple val(meta), path("*.bed"), emit: bed, optional: true @@ -23,10 +24,12 @@ process PLINK_VCF { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def phenotype = pheno ? "--pheno ${pheno} --make-bed" : '' """ plink \\ --vcf ${vcf} \\ + $phenotype \\ $args \\ --threads $task.cpus \\ --out ${prefix} diff --git a/workflows/gwas.nf b/workflows/gwas.nf index 3651464..6f01928 100644 --- a/workflows/gwas.nf +++ b/workflows/gwas.nf @@ -9,6 +9,7 @@ include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pi include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_gwas_pipeline' include { PLINK_VCF } from '../modules/nf-core/plink/vcf/main' +include { PLINK_GWAS } from '../modules/nf-core/plink/gwas/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -30,13 +31,34 @@ workflow GWAS { ch_inputs = ch_samplesheet | multiMap { meta, vcf, pheno, cov -> - vcf : [ meta,vcf ] + vcf : [ meta, vcf ] pheno : [ meta, pheno] - cov : cov ? [ meta,cov ] : null + cov : [ meta, cov ] // Might be worth adding ? : null } PLINK_VCF ( - ch_inputs.vcf + ch_inputs.vcf, + ch_inputs.pheno + ) + + // Update input with plink binary coversion from vcf + new_ch_input = PLINK_VCF.out.bed + | combine(PLINK_VCF.out.bim, by:0) + | combine(PLINK_VCF.out.fam, by:0) + | combine(ch_inputs.pheno, by:0) + | combine(ch_inputs.cov, by:0) + | multiMap { + meta, bed, bim, fam, pheno, cov -> + plink: [ meta, bed, bim, fam ] + pheno: [ meta, pheno ] + cov: [ meta, cov ] + } + + PLINK_GWAS ( + new_ch_input.plink, + [[],[]], // vcf if not converted to plink binary before + [[],[]], // bcf if not converted to plink birary before + [[],[]], ) // From b52cdb916f2e851fbb2b3a09d984292e42a52268 Mon Sep 17 00:00:00 2001 From: Fernando Duarte <123090819+FernandoDuarteF@users.noreply.github.com> Date: Fri, 23 May 2025 11:12:41 +0100 Subject: [PATCH 4/4] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 696459d..f423837 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,8 @@ ## Introduction +REMOVE THIS LINE + **nf-core/gwas** is a bioinformatics pipeline that ...