diff --git a/modules/nf-core/glimpse2/splitreference/main.nf b/modules/nf-core/glimpse2/splitreference/main.nf index 019072b8689d..0311e09dabc2 100644 --- a/modules/nf-core/glimpse2/splitreference/main.nf +++ b/modules/nf-core/glimpse2/splitreference/main.nf @@ -18,8 +18,7 @@ process GLIMPSE2_SPLITREFERENCE { 'biocontainers/glimpse-bio:2.0.1--h46b9e50_1' }" input: - tuple val(meta) , path(reference), path(reference_index), val(input_region), val(output_region) - tuple val(meta2), path(map) + tuple val(meta) , path(reference), path(reference_index), val(input_region), val(output_region), path(map) output: diff --git a/modules/nf-core/glimpse2/splitreference/meta.yml b/modules/nf-core/glimpse2/splitreference/meta.yml index 161dab0d5781..ed6b63ee03ad 100644 --- a/modules/nf-core/glimpse2/splitreference/meta.yml +++ b/modules/nf-core/glimpse2/splitreference/meta.yml @@ -42,11 +42,6 @@ input: type: string description: Target imputed region, excluding left and right buffers (e.g. chr20:1000000-2000000). pattern: "chrXX:leftBufferPosition-rightBufferPosition" - - - meta2: - type: map - description: | - Groovy Map containing genomic map information - e.g. `[ map:'GRCh38' ]` - map: type: file description: File containing the genetic map. diff --git a/modules/nf-core/glimpse2/splitreference/tests/main.nf.test b/modules/nf-core/glimpse2/splitreference/tests/main.nf.test index e95febde9a3c..9c353df2f643 100644 --- a/modules/nf-core/glimpse2/splitreference/tests/main.nf.test +++ b/modules/nf-core/glimpse2/splitreference/tests/main.nf.test @@ -20,9 +20,9 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), "chr22:16600000-16800000", - "chr22:16600000-16800000" + "chr22:16600000-16800000", + [] ] - input[1]= [[ id:'map'],[]] """ } } @@ -48,10 +48,7 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), "chr22:16600000-16800000", - "chr22:16600000-16800000" - ] - input[1]= [ - [ id:'map'], + "chr22:16600000-16800000", file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.glimpse.map", checkIfExists:true) ] """ @@ -80,9 +77,9 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), "chr22:16600000-16800000", - "chr22:16600000-16800000" + "chr22:16600000-16800000", + [] ] - input[1]= [[ id:'map'],[]] """ } } diff --git a/subworkflows/nf-core/bam_vcf_impute_glimpse2/main.nf b/subworkflows/nf-core/bam_vcf_impute_glimpse2/main.nf new file mode 100644 index 000000000000..db11d1f534b3 --- /dev/null +++ b/subworkflows/nf-core/bam_vcf_impute_glimpse2/main.nf @@ -0,0 +1,123 @@ +include { GLIMPSE2_CHUNK } from '../../../modules/nf-core/glimpse2/chunk/main' +include { GLIMPSE2_SPLITREFERENCE } from '../../../modules/nf-core/glimpse2/splitreference/main' +include { GLIMPSE2_PHASE } from '../../../modules/nf-core/glimpse2/phase/main' +include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate/main' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index/main.nf' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index/main.nf' + +workflow BAM_VCF_IMPUTE_GLIMPSE2 { + + take: + ch_input // channel (mandatory): [ meta, vcf, csi, list, infos ] + ch_ref // channel (mandatory): [ meta, vcf, csi, region ] + ch_chunks // channel (optional) : [ meta, regionin, regionout ] + ch_map // channel (optional) : [ meta, map ] + ch_fasta // channel (optional) : [ meta, fasta, index ] + chunk // val (optional) : boolean to activate/deactivate chunking step + chunk_model // val (optional) : model file for chunking + splitreference // val (optional) : boolean to activate/deactivate split reference step + + main: + + ch_versions = channel.empty() + + if ( chunk == true ){ + // Error if pre-defined chunks are provided when chunking is activated + ch_chunks + .filter { _meta, regionin, regionout -> regionin.size() == 0 && regionout.size() == 0 } + .ifEmpty { + error "ERROR: Cannot provide pre-defined chunks (regionin) when chunk=true. Please either set chunk=false to use provided chunks, or remove input chunks to enable automatic chunking." + } + + // Chunk reference panel + ch_ref_map = ch_ref + .combine(ch_map, by: 0) + GLIMPSE2_CHUNK ( ch_ref_map, chunk_model ) + ch_versions = ch_versions.mix( GLIMPSE2_CHUNK.out.versions.first() ) + + ch_chunks = GLIMPSE2_CHUNK.out.chunk_chr + .splitCsv(header: [ + 'ID', 'Chr', 'RegionBuf', 'RegionCnk', 'WindowCm', + 'WindowMb', 'NbTotVariants', 'NbComVariants' + ], sep: "\t", skip: 0) + .map { meta, it -> [meta, it["RegionBuf"], it["RegionCnk"]]} + } + + ch_chunks + .filter { _meta, regionin, regionout -> regionin.size() > 0 && regionout.size() > 0 } + .ifEmpty { error "ERROR: ch_chunks channel is empty. Please provide a valid channel or set chunk parameter to true." } + + if ( splitreference == true ) { + // Split reference panel in bin files + split_input = ch_ref + .combine(ch_chunks, by: 0) + .combine(ch_map, by: 0) + .map{ meta, ref, index, _region, regionin, regionout, gmap -> [ + meta + ["regionin": regionin, "regionout": regionout], + ref, index, regionin, regionout, gmap + ] } + + GLIMPSE2_SPLITREFERENCE( split_input ) + ch_versions = ch_versions.mix( GLIMPSE2_SPLITREFERENCE.out.versions.first() ) + + ch_chunks_panel_map = GLIMPSE2_SPLITREFERENCE.out.bin_ref + .map{ meta, bin_ref -> [ meta, [], [], bin_ref, [], [] ] } // Everything is provided by the bin file + } else { + ch_chunks_panel_map = ch_chunks + .combine(ch_ref, by:0) + .combine(ch_map, by:0) + .map{ meta, regionin, regionout, ref, ref_index, _region, gmap -> [ + meta + ["regionin": regionin, "regionout": regionout], + regionin, regionout, ref, ref_index, gmap + ] } + } + + ch_chunks_panel_map.ifEmpty{ + error "ERROR: join operation resulted in an empty channel. Please provide a valid ch_chunks and ch_map channel as input." + } + + ch_phase_input = ch_input + .combine(ch_chunks_panel_map) + .map{ metaI, input, index, list, infos, metaCPM, regionin, regionout, panel, panel_index, gmap -> [ + metaI + metaCPM, // combined metadata + input, index, list, infos, // input files + regionin, regionout, // chunk regions + panel, panel_index, gmap // panel and map files + ] } + + // Impute with Glimpse2 + GLIMPSE2_PHASE(ch_phase_input, ch_fasta) + ch_versions = ch_versions.mix( GLIMPSE2_PHASE.out.versions.first() ) + + // Index phased file + BCFTOOLS_INDEX_1(GLIMPSE2_PHASE.out.phased_variants) + ch_versions = ch_versions.mix( BCFTOOLS_INDEX_1.out.versions.first() ) + + // Ligate all phased files in one and index it + ligate_input = GLIMPSE2_PHASE.out.phased_variants + .join( BCFTOOLS_INDEX_1.out.csi ) + .map{ meta, vcf, index -> + def keysToKeep = meta.keySet() - ['regionin', 'regionout'] + [ meta.subMap(keysToKeep), vcf, index ] + } + .groupTuple() + + GLIMPSE2_LIGATE( ligate_input ) + ch_versions = ch_versions.mix( GLIMPSE2_LIGATE.out.versions.first() ) + + BCFTOOLS_INDEX_2( GLIMPSE2_LIGATE.out.merged_variants ) + ch_versions = ch_versions.mix( BCFTOOLS_INDEX_2.out.versions.first() ) + + // Join imputed and index files + ch_vcf_index = GLIMPSE2_LIGATE.out.merged_variants + .join( + BCFTOOLS_INDEX_2.out.tbi + .mix(BCFTOOLS_INDEX_2.out.csi) + ) + + emit: + ch_chunks = ch_chunks // channel: [ val(meta), regionin, regionout ] + ch_vcf_index = ch_vcf_index // channel: [ val(meta), vcf, csi ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/multiple_impute_glimpse2/meta.yml b/subworkflows/nf-core/bam_vcf_impute_glimpse2/meta.yml similarity index 56% rename from subworkflows/nf-core/multiple_impute_glimpse2/meta.yml rename to subworkflows/nf-core/bam_vcf_impute_glimpse2/meta.yml index 6fea6251cc15..41edf1abe46c 100644 --- a/subworkflows/nf-core/multiple_impute_glimpse2/meta.yml +++ b/subworkflows/nf-core/bam_vcf_impute_glimpse2/meta.yml @@ -1,4 +1,4 @@ -name: "multiple_impute_glimpse2" +name: "bam_vcf_impute_glimpse2" description: Impute VCF/BCF files, but also CRAM and BAM files with Glimpse2 keywords: - glimpse @@ -18,8 +18,9 @@ input: description: | Target dataset in CRAM, BAM or VCF/BCF format. Index file of the input file. + File containing the list of files to be imputed and their sample names (for CRAM/BAM input). File with sample names and ploidy information. - Structure: [ meta, file, index, txt ] + Structure: [ meta, file, index, bamlist, ploidy ] - ch_ref: type: file description: | @@ -28,10 +29,15 @@ input: Target region, usually a full chromosome (e.g. chr20:1000000-2000000 or chr20). The file could possibly be without GT field (for efficiency reasons a file containing only the positions is recommended). Structure: [ meta, vcf, csi, region ] + - ch_chunks: + type: string + description: | + Channel containing the chunking regions for each chromosome. + Structure: [ meta, region with buffer, region without buffer ] - ch_map: type: file description: | - File containing the genetic map. + Genetic map file for each chromosome. Structure: [ meta, gmap ] - ch_fasta: type: file @@ -39,21 +45,30 @@ input: Reference genome in fasta format. Reference genome index in fai format Structure: [ meta, fasta, fai ] + - chunk: + type: boolean + description: Whether to perform chunking of the input data before imputation. + - chunk_model: + type: string + description: | + Chunking model to use. + Options: "sequential", "recursive" + - splitreference: + type: boolean + description: Whether to split the reference panel and convert it to binary files before imputation. + output: - - chunk_chr: - type: file + - ch_chunks: + type: string description: | - Tab delimited output txt file containing buffer and imputation regions. - Structure: [meta, txt] - - merged_variants: + Channel containing the chunking regions for each chromosome. + Structure: [ meta, region with buffer, region without buffer ] + - ch_vcf_index: type: file description: | Output VCF/BCF file for the merged regions. - Phased information (HS field) is updated accordingly for the full region. - Structure: [ val(meta), bcf ] - - merged_variants_index: - type: file - description: Index file of the ligated phased variants files. + Index file of the output VCF/BCF file. + Structure: [ val(meta), variants, index ] - versions: type: file description: File containing software versions diff --git a/subworkflows/nf-core/bam_vcf_impute_glimpse2/tests/main.nf.test b/subworkflows/nf-core/bam_vcf_impute_glimpse2/tests/main.nf.test new file mode 100644 index 000000000000..15ed651546b1 --- /dev/null +++ b/subworkflows/nf-core/bam_vcf_impute_glimpse2/tests/main.nf.test @@ -0,0 +1,461 @@ +nextflow_workflow { + + name "Test Workflow BAM_VCF_IMPUTE_GLIMPSE2" + config "./nextflow.config" + script "../main.nf" + workflow "BAM_VCF_IMPUTE_GLIMPSE2" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "bam_vcf_impute_glimpse2" + tag "subworkflows/bam_vcf_impute_glimpse2" + + tag "glimpse2/chunk" + tag "glimpse2/splitreference" + tag "glimpse2/phase" + tag "glimpse2/ligate" + tag "glimpse2" + tag "bcftools/index" + tag "bcftools" + + test("homo_sapiens - vcf no list and no sample, panel vcf region, no chunks, no map, no fasta, chunk recursive + splitreference") { + when { + workflow { + """ + input[0] = Channel.of([ + [id:'input_vcf'], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi", checkIfExists: true), + [], [] + ]) // input + input[1] = Channel.of([ + [panel:'ref_panel', chr: "22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), + "chr22" + ]) // reference + input[2] = Channel.of([ + [panel: 'ref_panel', chr: "22"], + [], [] + ]) // chunks + input[3] = Channel.of([ + [panel: 'ref_panel', chr: "22"], + [] + ]) // map + input[4] = Channel.of([ + [id_genome:'ref_fasta'], [], [] + ]).collect() // genome + input[5] = true // perform chunking + input[6] = "recursive" // chunking model + input[7] = true // splitreference + """ + } + } + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.ch_chunks, + workflow.out.ch_vcf_index.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString(), + path(it[1]).vcf.summary, + path(it[1]).vcf.header.getGenotypeSamples().sort(), + path(it[1]).vcf.variantsMD5 + ]}, + workflow.out.versions.collect{ path(it).yaml } + ).match() } + ) + } + } + test("homo_sapiens - vcf no list with sample, panel vcf region, chunks, map, no fasta, no chunking + splitreference") { + when { + workflow { + """ + sample = Channel.of('NA12878 2') + .collectFile(name: 'sampleinfos.txt') + input[0] = Channel.of([ + [id:'input_vcf'], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi", checkIfExists: true), + [] // no list + ]) + .combine(sample) + input[1] = Channel.of([ + [panel: 'ref_panel', chr: "22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), + "chr22:16570065-16609999", + ]) + input[2] = Channel.of( + [[panel: 'ref_panel', chr: "22"], "chr22:16570065-16597215", "chr22:16570065-16592216"], + [[panel: 'ref_panel', chr: "22"], "chr22:16587172-16609999", "chr22:16592229-16609999"] + ) // chunks + input[3] = Channel.of([ + [panel: 'ref_panel', chr: "22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.glimpse.map", checkIfExists: true) + ]) // map + input[4] = Channel.of([ + [id_genome:'ref_fasta'], [], [] + ]).collect() // genome + input[5] = false // do not perform chunking + input[6] = "recursive" // chunking model + input[7] = true // splitreference + """ + } + } + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.ch_chunks, + workflow.out.ch_vcf_index.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString(), + path(it[1]).vcf.summary, + path(it[1]).vcf.header.getGenotypeSamples().sort(), + path(it[1]).vcf.variantsMD5 + ]}, + workflow.out.versions.collect{ path(it).yaml } + ).match() } + ) + } + } + test("homo_sapiens - vcf no list with sample, panel vcf region, chunks, map, no fasta, no chunking + no splitreference") { + when { + workflow { + """ + sample = Channel.of('NA12878 2') + .collectFile(name: 'sampleinfos.txt') + input[0] = Channel.of([ + [id:'input_vcf'], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi", checkIfExists: true), + [] // no list + ]) + .combine(sample) + input[1] = Channel.of([ + [panel: 'ref_panel', chr: "22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), + "chr22:16570065-16609999", + ]) + input[2] = Channel.of( + [[panel: 'ref_panel', chr: "22"], "chr22:16570065-16597215", "chr22:16570065-16592216"], + [[panel: 'ref_panel', chr: "22"], "chr22:16587172-16609999", "chr22:16592229-16609999"] + ) // chunks + input[3] = Channel.of([ + [panel: 'ref_panel', chr: "22"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/phaseimpute/hum_data/reference_genome/GRCh38_chr22.glimpse.map", checkIfExists: true) + ]) // map + input[4] = Channel.of([ + [id_genome:'ref_fasta'], [], [] + ]).collect() // genome + input[5] = false // do not perform chunking + input[6] = "recursive" // chunking model + input[7] = false // no splitreference + """ + } + } + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.ch_chunks, + workflow.out.ch_vcf_index.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString(), + path(it[1]).vcf.summary, + path(it[1]).vcf.header.getGenotypeSamples().sort(), + path(it[1]).vcf.variantsMD5 + ]}, + workflow.out.versions.collect{ path(it).yaml } + ).match() } + ) + } + } + test("homo_sapiens - bam no list and no sample, panel vcf region, no chunks, no map, no fasta, chunk sequential + splitreference") { + when { + workflow { + """ + input[0] = Channel.of([ + [id:'input_bam'], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr22.1X.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr22.1X.bam.bai", checkIfExists: true), + [], [] + ]) + input[1] = Channel.of([ + [panel: 'ref_panel', chr: "22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), + "chr22" + ]) + input[2] = Channel.of([ + [panel: 'ref_panel', chr: "22"], + [], [] + ]) // chunks + input[3] = Channel.of([ + [panel: 'ref_panel', chr: "22"], + [] + ]) // map + input[4] = Channel.of([ + [id_genome:'ref_fasta'], [], [] + ]).collect() // genome + input[5] = true // perform chunking + input[6] = "sequential" // chunking model + input[7] = true // splitreference + """ + } + } + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.ch_chunks, + workflow.out.ch_vcf_index.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString(), + path(it[1]).vcf.summary, + path(it[1]).vcf.header.getGenotypeSamples().sort(), + path(it[1]).vcf.variantsMD5 + ]}, + workflow.out.versions.collect{ path(it).yaml } + ).match() } + ) + } + } + test("homo_sapiens - bam list and sample, panel vcf region, chunks, map, no fasta, no chunking + splitreference") { + when { + workflow { + """ + bamlist = Channel.of( + "NA12878.chr21_22.1X.bam\tSample1", + "NA19401.chr21_22.1X.bam\tSample2" + ).collectFile(name: 'bamlist.txt', newLine: true) + sample = Channel.of('NA12878 2') + .collectFile(name: 'sampleinfos.txt') + input[0] = Channel.of([ + [id: "allid"], [ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA19401.chr21_22.1X.bam", checkIfExists: true) + ], [ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA19401.chr21_22.1X.bam.bai", checkIfExists: true) + ] + ]).combine(bamlist).combine(sample) + input[1] = Channel.of([ + [panel: 'ref_panel', chr: "22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), + "chr22" + ],[ + [panel: 'ref_panel', chr: "21"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr21.vcf.gz", checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr21.vcf.gz.csi", checkIfExists:true), + "chr21" + ]) // reference + input[2] = Channel.of( + [[panel: 'ref_panel', chr: "22"], "chr22:16570065-16597215", "chr22:16570065-16592216"], + [[panel: 'ref_panel', chr: "22"], "chr22:16587172-16609999", "chr22:16592229-16609999"], + [[panel: 'ref_panel', chr: "21"], "chr21:16570065-16597215", "chr21:16570065-16592216"], + [[panel: 'ref_panel', chr: "21"], "chr21:16587172-16609999", "chr21:16592229-16609999"] + ) // chunks + input[3] = Channel.of([ + [panel: 'ref_panel', chr: "22"], file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.glimpse.map", checkIfExists:true) + ],[ + [panel: 'ref_panel', chr: "21"], file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr21.glimpse.map", checkIfExists:true) + ]) // map + input[4] = Channel.of([ + [id_genome:'ref_fasta'], [], [] + ]).collect() // genome + input[5] = false // perform chunking + input[6] = "sequential" // chunking model + input[7] = true // splitreference + """ + } + } + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.ch_chunks, + workflow.out.ch_vcf_index.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString(), + path(it[1]).vcf.summary, + path(it[1]).vcf.header.getGenotypeSamples().sort(), + path(it[1]).vcf.variantsMD5 + ]}, + workflow.out.versions.collect{ path(it).yaml } + ).match() } + ) + } + } + test("homo_sapiens - empty channels, chunk sequential + splitreference - stub") { + options "-stub" + when { + workflow { + """ + input[0] = Channel.of([ + [id: "allid"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true), + [], [] + ]) + input[1] = Channel.of([ + [panel: 'ref_panel', chr: "22"], [], [], "chr22" + ],[ + [panel: 'ref_panel', chr: "21"], [], [], "chr21" + ]) // reference + input[2] = Channel.of( + [[panel: 'ref_panel', chr: "22"], [], []], + [[panel: 'ref_panel', chr: "21"], [], []] + ) // chunks + input[3] = Channel.of([ + [panel: 'ref_panel', chr: "22"], [] + ],[ + [panel: 'ref_panel', chr: "21"], [] + ]) // map + input[4] = Channel.of([ + [id_genome:'ref_fasta'], [], [] + ]).collect() // genome + input[5] = true // perform chunking + input[6] = "sequential" // chunking model + input[7] = true // splitreference + """ + } + } + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out, + workflow.out.versions.collect{ path(it).yaml } + ).match() } + ) + } + } + + test("homo_sapiens - error chunks provided and chunk is true") { + options "-stub" + when { + workflow { + """ + input[0] = Channel.of([ [id: "allid"], [], [], [], [] ]) + input[1] = Channel.of([ + [panel: 'ref_panel', chr: "22"], [], [], "chr22" + ],[ + [panel: 'ref_panel', chr: "21"], [], [], "chr21" + ]) // reference + input[2] = Channel.of( + [[panel: 'ref_panel', chr: "22"], "chr22:16570065-16597215", "chr22:16570065-16592216"], + [[panel: 'ref_panel', chr: "22"], "chr22:16587172-16609999", "chr22:16592229-16609999"], + [[panel: 'ref_panel', chr: "21"], "chr21:16570065-16597215", "chr21:16570065-16592216"], + [[panel: 'ref_panel', chr: "21"], "chr21:16587172-16609999", "chr21:16592229-16609999"] + ) // chunks + input[3] = Channel.of([ + [panel: 'ref_panel', chr: "22"], [] + ],[ + [panel: 'ref_panel', chr: "21"], [] + ]) // map + input[4] = Channel.of([ + [id_genome:'ref_fasta'], [], [] + ]).collect() // genome + input[5] = true // perform chunking + input[6] = "sequential" // chunking model + input[7] = true // splitreference + """ + } + } + then { + assertAll( + { assert workflow.failed }, + { assert workflow.errorMessage.contains("ERROR: Cannot provide pre-defined chunks (regionin) when chunk=true. Please either set chunk=false to use provided chunks, or remove input chunks to enable automatic chunking.") } + ) + } + } + + test("homo_sapiens - error no chunks provided and chunk is false") { + options "-stub" + when { + workflow { + """ + input[0] = Channel.of([ [id: "allid"], [], [], [], [] ]) + input[1] = Channel.of([ + [panel: 'ref_panel', chr: "22"], [], [], "chr22" + ],[ + [panel: 'ref_panel', chr: "21"], [], [], "chr21" + ]) // reference + input[2] = Channel.of( + [[panel: 'ref_panel', chr: "22"], [], []], + [[panel: 'ref_panel', chr: "21"], [], []] + ) // chunks + input[3] = Channel.of([ + [panel: 'ref_panel', chr: "22"], [] + ],[ + [panel: 'ref_panel', chr: "21"], [] + ]) // map + input[4] = Channel.of([ + [id_genome:'ref_fasta'], [], [] + ]).collect() // genome + input[5] = false // perform chunking + input[6] = "sequential" // chunking model + input[7] = true // splitreference + """ + } + } + then { + assertAll( + { assert workflow.failed }, + { assert workflow.errorMessage.contains("ERROR: ch_chunks channel is empty. Please provide a valid channel or set chunk parameter to true.") } + ) + } + } + + test("homo_sapiens - error empty joint") { + options "-stub" + when { + workflow { + """ + input[0] = Channel.of([ + [id: "allid"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr22.1X.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr22.1X.bam.bai", checkIfExists: true), + [], [] + ]) + input[1] = Channel.of([ + [panel: 'ref_panel', chr: "22"], [], [], "chr22" + ],[ + [panel: 'ref_panel', chr: "21"], [], [], "chr21" + ]) // reference + input[2] = Channel.of( + [[panel: 'ref_panel 2', chr: "22"], "chr22:16570065-16597215", "chr22:16570065-16592216"], + [[panel: 'ref_panel', chr: "21"], "chr21:16570065-16597215", "chr21:16570065-16592216"] + ) // chunks + input[3] = Channel.of([ + [panel: 'ref_panel', chr: "22"], [] + ],[ + [panel: 'ref_panel 1', chr: "21"], [] + ]) // map + input[4] = Channel.of([ + [id_genome:'ref_fasta'], [], [] + ]).collect() // genome + input[5] = false // perform chunking + input[6] = "sequential" // chunking model + input[7] = false // splitreference + """ + } + } + then { + assertAll( + { assert workflow.failed }, + { assert workflow.errorMessage.contains("ERROR: join operation resulted in an empty channel. Please provide a valid ch_chunks and ch_map channel as input.") } + ) + } + } +} diff --git a/subworkflows/nf-core/bam_vcf_impute_glimpse2/tests/main.nf.test.snap b/subworkflows/nf-core/bam_vcf_impute_glimpse2/tests/main.nf.test.snap new file mode 100644 index 000000000000..d7e3e862c1e0 --- /dev/null +++ b/subworkflows/nf-core/bam_vcf_impute_glimpse2/tests/main.nf.test.snap @@ -0,0 +1,524 @@ +{ + "homo_sapiens - vcf no list and no sample, panel vcf region, no chunks, no map, no fasta, chunk recursive + splitreference": { + "content": [ + [ + [ + { + "panel": "ref_panel", + "chr": "22" + }, + "chr22:16570065-16609999", + "chr22:16570065-16592222" + ], + [ + { + "panel": "ref_panel", + "chr": "22" + }, + "chr22:16570065-16609999", + "chr22:16592223-16609999" + ] + ], + [ + [ + { + "id": "input_vcf", + "panel": "ref_panel", + "chr": "22" + }, + "input_vcf_22.vcf.gz", + "input_vcf_22.vcf.gz.csi", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=true, phasedAutodetect=false]", + [ + "NA12878" + ], + "b9ccc0359ccca0714fca43367538de47" + ] + ], + [ + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_CHUNK": { + "glimpse2": "2.0.0" + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_PHASE": { + "glimpse2": "2.0.0" + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_2": { + "bcftools": 1.22 + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_LIGATE": { + "glimpse2": "2.0.0" + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_1": { + "bcftools": 1.22 + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_SPLITREFERENCE": { + "glimpse2": "2.0.0" + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-21T12:07:17.825740137" + }, + "homo_sapiens - bam no list and no sample, panel vcf region, no chunks, no map, no fasta, chunk sequential + splitreference": { + "content": [ + [ + [ + { + "panel": "ref_panel", + "chr": "22" + }, + "chr22:1-16609999", + "chr22:1-16590520" + ], + [ + { + "panel": "ref_panel", + "chr": "22" + }, + "chr22:16570065-1248956422", + "chr22:16590521-1248956422" + ] + ], + [ + [ + { + "id": "input_bam", + "panel": "ref_panel", + "chr": "22" + }, + "input_bam_22.vcf.gz", + "input_bam_22.vcf.gz.csi", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=true, phasedAutodetect=true]", + [ + "NA12878.chr22.1X" + ], + "661a9923af273601dbf0e7a3a666ebd0" + ] + ], + [ + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_CHUNK": { + "glimpse2": "2.0.0" + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_PHASE": { + "glimpse2": "2.0.0" + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_2": { + "bcftools": 1.22 + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_LIGATE": { + "glimpse2": "2.0.0" + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_1": { + "bcftools": 1.22 + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_SPLITREFERENCE": { + "glimpse2": "2.0.0" + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-21T12:08:29.707313034" + }, + "homo_sapiens - empty channels, chunk sequential + splitreference - stub": { + "content": [ + { + "0": [ + [ + { + "panel": "ref_panel", + "chr": "21" + }, + "0", + "0" + ], + [ + { + "panel": "ref_panel", + "chr": "22" + }, + "0", + "0" + ] + ], + "1": [ + [ + { + "id": "allid", + "panel": "ref_panel", + "chr": "21" + }, + "allid_21.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "allid_21.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "allid", + "panel": "ref_panel", + "chr": "22" + }, + "allid_22.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "allid_22.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,302281bbff76712870c031340d3819ff", + "versions.yml:md5,3500318c881e3f24e9dd2fddc593142a", + "versions.yml:md5,9422ca169a15fd68878396c2837bc185", + "versions.yml:md5,c6ff81aa44fb2fdcc08db6062731d0d9", + "versions.yml:md5,cb51e18b4296c3791ade5f6f2737bd24", + "versions.yml:md5,ed6ebcf6ca30c2cdc39ec1dd535a2695" + ], + "ch_chunks": [ + [ + { + "panel": "ref_panel", + "chr": "21" + }, + "0", + "0" + ], + [ + { + "panel": "ref_panel", + "chr": "22" + }, + "0", + "0" + ] + ], + "ch_vcf_index": [ + [ + { + "id": "allid", + "panel": "ref_panel", + "chr": "21" + }, + "allid_21.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "allid_21.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "allid", + "panel": "ref_panel", + "chr": "22" + }, + "allid_22.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "allid_22.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,302281bbff76712870c031340d3819ff", + "versions.yml:md5,3500318c881e3f24e9dd2fddc593142a", + "versions.yml:md5,9422ca169a15fd68878396c2837bc185", + "versions.yml:md5,c6ff81aa44fb2fdcc08db6062731d0d9", + "versions.yml:md5,cb51e18b4296c3791ade5f6f2737bd24", + "versions.yml:md5,ed6ebcf6ca30c2cdc39ec1dd535a2695" + ] + }, + [ + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_CHUNK": { + "glimpse2": "2.0.0" + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_PHASE": { + "glimpse2": "2.0.0" + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_2": { + "bcftools": 1.22 + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_LIGATE": { + "glimpse2": "2.0.0" + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_1": { + "bcftools": 1.22 + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_SPLITREFERENCE": { + "glimpse2": "2.0.0" + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-21T12:10:17.321588787" + }, + "homo_sapiens - vcf no list with sample, panel vcf region, chunks, map, no fasta, no chunking + no splitreference": { + "content": [ + [ + [ + { + "panel": "ref_panel", + "chr": "22" + }, + "chr22:16570065-16597215", + "chr22:16570065-16592216" + ], + [ + { + "panel": "ref_panel", + "chr": "22" + }, + "chr22:16587172-16609999", + "chr22:16592229-16609999" + ] + ], + [ + [ + { + "id": "input_vcf", + "panel": "ref_panel", + "chr": "22" + }, + "input_vcf_22.vcf.gz", + "input_vcf_22.vcf.gz.csi", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=true, phasedAutodetect=false]", + [ + "NA12878" + ], + "a0d696cbdb219c168b6d54fc28c5f7dd" + ] + ], + [ + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_PHASE": { + "glimpse2": "2.0.0" + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_2": { + "bcftools": 1.22 + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_LIGATE": { + "glimpse2": "2.0.0" + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_1": { + "bcftools": 1.22 + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-21T12:08:13.222110779" + }, + "homo_sapiens - vcf no list with sample, panel vcf region, chunks, map, no fasta, no chunking + splitreference": { + "content": [ + [ + [ + { + "panel": "ref_panel", + "chr": "22" + }, + "chr22:16570065-16597215", + "chr22:16570065-16592216" + ], + [ + { + "panel": "ref_panel", + "chr": "22" + }, + "chr22:16587172-16609999", + "chr22:16592229-16609999" + ] + ], + [ + [ + { + "id": "input_vcf", + "panel": "ref_panel", + "chr": "22" + }, + "input_vcf_22.vcf.gz", + "input_vcf_22.vcf.gz.csi", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=true, phasedAutodetect=false]", + [ + "NA12878" + ], + "a0d696cbdb219c168b6d54fc28c5f7dd" + ] + ], + [ + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_PHASE": { + "glimpse2": "2.0.0" + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_2": { + "bcftools": 1.22 + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_LIGATE": { + "glimpse2": "2.0.0" + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_1": { + "bcftools": 1.22 + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_SPLITREFERENCE": { + "glimpse2": "2.0.0" + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-21T12:07:58.812913099" + }, + "homo_sapiens - bam list and sample, panel vcf region, chunks, map, no fasta, no chunking + splitreference": { + "content": [ + [ + [ + { + "panel": "ref_panel", + "chr": "21" + }, + "chr21:16570065-16597215", + "chr21:16570065-16592216" + ], + [ + { + "panel": "ref_panel", + "chr": "21" + }, + "chr21:16587172-16609999", + "chr21:16592229-16609999" + ], + [ + { + "panel": "ref_panel", + "chr": "22" + }, + "chr22:16570065-16597215", + "chr22:16570065-16592216" + ], + [ + { + "panel": "ref_panel", + "chr": "22" + }, + "chr22:16587172-16609999", + "chr22:16592229-16609999" + ] + ], + [ + [ + { + "id": "allid", + "panel": "ref_panel", + "chr": "21" + }, + "allid_21.vcf.gz", + "allid_21.vcf.gz.csi", + "VcfFile [chromosomes=[chr21], sampleCount=2, variantCount=836, phased=true, phasedAutodetect=true]", + [ + "Sample1", + "Sample2" + ], + "56a7bd192123c52ea77f8b3a59e50342" + ], + [ + { + "id": "allid", + "panel": "ref_panel", + "chr": "22" + }, + "allid_22.vcf.gz", + "allid_22.vcf.gz.csi", + "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=903, phased=true, phasedAutodetect=true]", + [ + "Sample1", + "Sample2" + ], + "13cdb23c1074393f9fe53feae397a3df" + ] + ], + [ + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_PHASE": { + "glimpse2": "2.0.0" + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_2": { + "bcftools": 1.22 + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_LIGATE": { + "glimpse2": "2.0.0" + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_1": { + "bcftools": 1.22 + } + }, + { + "BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_SPLITREFERENCE": { + "glimpse2": "2.0.0" + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-12-08T19:39:08.135355223" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_vcf_impute_glimpse2/tests/nextflow.config b/subworkflows/nf-core/bam_vcf_impute_glimpse2/tests/nextflow.config new file mode 100644 index 000000000000..9cd201b85a2a --- /dev/null +++ b/subworkflows/nf-core/bam_vcf_impute_glimpse2/tests/nextflow.config @@ -0,0 +1,41 @@ +process { + withName: "GLIMPSE2_CHUNK" { + ext.prefix = { "${meta.panel}_${meta.chr}" } + tag = { "${meta.panel} ${meta.chr}}" } + ext.args = [ + "--window-mb 0.01", + "--window-cm 0.01", + "--window-count 200", + "--buffer-mb 0.005", + "--buffer-cm 0.005", + "--buffer-count 30", + "--seed 1" + ].join(' ') + } + + withName: "GLIMPSE2_SPLITREFERENCE" { + ext.prefix = { "${meta.panel}_${meta.chr}_${meta.regionout}" } + tag = { "${meta.panel} ${meta.chr} ${meta.regionout}" } + ext.args = "--seed 1" + } + + withName: "GLIMPSE2_PHASE" { + ext.prefix = { "${meta.id}_${meta.chr}_${meta.regionout}" } + tag = { "${meta.id} ${meta.chr} ${meta.regionout}" } + cpus = 1 + ext.args = "--keep-monomorphic-ref-sites --seed 1" + } + + withName: "BCFTOOLS_INDEX_1" { + tag = { "${meta.id} ${meta.chr} ${meta.regionout}" } + } + + withName: "GLIMPSE2_LIGATE" { + ext.prefix = { "${meta.id}_${meta.chr}" } + tag = { "${meta.id} ${meta.chr}" } + } + + withName: "BCFTOOLS_INDEX_2" { + tag = { "${meta.id} ${meta.chr}" } + } +} diff --git a/subworkflows/nf-core/multiple_impute_glimpse2/README.md b/subworkflows/nf-core/multiple_impute_glimpse2/README.md new file mode 100644 index 000000000000..695001a4f198 --- /dev/null +++ b/subworkflows/nf-core/multiple_impute_glimpse2/README.md @@ -0,0 +1,4 @@ +> [!WARNING] +> This subworkflow has been deprecated. Please use `nf-core/modules/subworkflows/bam_vcf_impute_glimpse2` +> +> **Reason:** Subworkflow naming rules were introduced necessitating this move, see https://nf-co.re/docs/guidelines/components/subworkflows#name-format-of-subworkflow-files diff --git a/subworkflows/nf-core/multiple_impute_glimpse2/main.nf b/subworkflows/nf-core/multiple_impute_glimpse2/main.nf deleted file mode 100644 index c9c0022beacc..000000000000 --- a/subworkflows/nf-core/multiple_impute_glimpse2/main.nf +++ /dev/null @@ -1,75 +0,0 @@ -include { GLIMPSE2_CHUNK } from '../../../modules/nf-core/glimpse2/chunk/main' -include { GLIMPSE2_SPLITREFERENCE } from '../../../modules/nf-core/glimpse2/splitreference/main' -include { GLIMPSE2_PHASE } from '../../../modules/nf-core/glimpse2/phase/main' -include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate/main' -include { BCFTOOLS_INDEX as INDEX_PHASE } from '../../../modules/nf-core/bcftools/index/main.nf' -include { BCFTOOLS_INDEX as INDEX_LIGATE } from '../../../modules/nf-core/bcftools/index/main.nf' - -workflow MULTIPLE_IMPUTE_GLIMPSE2 { - - take: - ch_input // channel (mandatory): [ meta, vcf, csi, infos ] - ch_ref // channel (mandatory): [ meta, vcf, csi, region ] - ch_map // channel (optional) : [ meta, map ] - ch_fasta // channel (optional) : [ meta, fasta, index ] - chunk_model // string: model used to chunk the reference panel - - main: - - ch_versions = channel.empty() - - // Chunk reference panel - ch_ref_map = ch_ref.combine(ch_map, by: 0) - GLIMPSE2_CHUNK ( ch_ref_map, chunk_model ) - ch_versions = ch_versions.mix( GLIMPSE2_CHUNK.out.versions.first() ) - - chunk_output = GLIMPSE2_CHUNK.out.chunk_chr - .splitCsv(header: [ - 'ID', 'Chr', 'RegionBuf', 'RegionCnk', 'WindowCm', - 'WindowMb', 'NbTotVariants', 'NbComVariants' - ], sep: "\t", skip: 0) - .map { meta, it -> [meta, it["RegionBuf"], it["RegionCnk"]]} - - // Split reference panel in bin files - split_input = ch_ref.map{ meta, ref, index, _region -> [meta, ref, index]} - .combine(chunk_output, by: 0) - - GLIMPSE2_SPLITREFERENCE( split_input, ch_map ) - ch_versions = ch_versions.mix( GLIMPSE2_SPLITREFERENCE.out.versions.first() ) - - phase_input = ch_input.combine( GLIMPSE2_SPLITREFERENCE.out.bin_ref ) - .map{ input_meta, input_file, input_index, input_infos, - _panel_meta, panel_bin -> - [input_meta, input_file, input_index, [], input_infos, - [], [], panel_bin, [], []] - }/* Remove unnecessary meta maps - add null index as we use a bin file, - add null value for input and output region as we use a bin file */ - - // Phase input files for each reference bin files + indexing - GLIMPSE2_PHASE ( phase_input, ch_fasta ) // [meta, vcf, index, sample_infos, regionin, regionout, regionindex, ref, ref_index, map], [ meta, fasta, index ] - ch_versions = ch_versions.mix( GLIMPSE2_PHASE.out.versions.first() ) - - INDEX_PHASE ( GLIMPSE2_PHASE.out.phased_variants ) - ch_versions = ch_versions.mix( INDEX_PHASE.out.versions.first() ) - - // Ligate all phased files in one and index it - ligate_input = GLIMPSE2_PHASE.out.phased_variants - .groupTuple() - .combine( INDEX_PHASE.out.csi - .groupTuple() - .collect(), by: 0 ) - - GLIMPSE2_LIGATE ( ligate_input ) - ch_versions = ch_versions.mix( GLIMPSE2_LIGATE.out.versions.first() ) - - INDEX_LIGATE ( GLIMPSE2_LIGATE.out.merged_variants ) - ch_versions = ch_versions.mix( INDEX_LIGATE.out.versions.first() ) - - emit: - chunk_chr = GLIMPSE2_CHUNK.out.chunk_chr // channel: [ val(meta), txt ] - merged_variants = GLIMPSE2_LIGATE.out.merged_variants // channel: [ val(meta), bcf ] - merged_variants_index = INDEX_LIGATE.out.csi // channel: [ val(meta), csi ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/multiple_impute_glimpse2/tests/main.nf.test b/subworkflows/nf-core/multiple_impute_glimpse2/tests/main.nf.test deleted file mode 100644 index 989bdb8a1ca7..000000000000 --- a/subworkflows/nf-core/multiple_impute_glimpse2/tests/main.nf.test +++ /dev/null @@ -1,173 +0,0 @@ -nextflow_workflow { - - name "Test Workflow MULTIPLE_IMPUTE_GLIMPSE2" - config "./nextflow.config" - script "../main.nf" - workflow "MULTIPLE_IMPUTE_GLIMPSE2" - - tag "subworkflows" - tag "subworkflows_nfcore" - tag "multiple_impute_glimpse2" - tag "subworkflows/multiple_impute_glimpse2" - - tag "glimpse2/chunk" - tag "glimpse2/splitreference" - tag "glimpse2/phase" - tag "glimpse2/ligate" - tag "glimpse2" - tag "bcftools/index" - tag "bcftools" - - test("homo_sapiens - vcf [] - panel vcf region - [] - [] - recursive") { - when { - workflow { - """ - input[0] = Channel.of([ - [id:'input_vcf'], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExists: true), - [] - ]) - input[1] = Channel.of([ - [id:'ref_panel', chr: "22"], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), - "chr22", - ]) - input[2] = Channel.of([ - [id: 'ref_panel', chr: "22"], - [] - ]) - input[3] = Channel.of([[id:'ref_fasta'], [], []]).collect() - input[4] = "recursive" - """ - } - } - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.chunk_chr, - workflow.out.merged_variants.collect{ path(it[1]).vcf.variantsMD5}, - file(workflow.out.merged_variants_index[0][1]).name, // The .csi index file sometimes change - workflow.out.versions - ).match() } - ) - } - } - test("homo_sapiens - vcf sample - panel vcf - [] - [] - recursive") { - when { - workflow { - """ - sample = Channel.of('NA12878 2') - .collectFile(name: 'sampleinfos.txt') - input[0] = Channel.of([ - [id:'input_vcf'], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi", checkIfExists: true), - ]).combine(sample) - input[1] = Channel.of([ - [id:'ref_panel', chr: "22"], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), - "chr22:16570065-16609999", - ]) - input[2] = Channel.of([ - [id:'ref_panel', chr: "22"], - [] - ]).collect() - input[3] = Channel.of([ - [id:'ref_fasta'], - [], - [] - ]).collect() - input[4] = "recursive" - """ - } - } - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.chunk_chr, - workflow.out.merged_variants.collect{ path(it[1]).vcf.variantsMD5}, - file(workflow.out.merged_variants_index[0][1]).name, - workflow.out.versions - ).match() } - ) - } - } - test("homo_sapiens - bam [] - panel vcf - [] - [] - sequential") { - when { - workflow { - """ - input[0] = Channel.of([ - [id:'input_bam'], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true), - [] - ]) - input[1] = Channel.of([ - [id:'ref_panel', chr: "22"], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), - "chr22", - ]) - input[2] = Channel.of([ - [id: 'ref_panel', chr: "22"], - [] - ]) - input[3] = Channel.of([[id:'ref_fasta'], [], []]).collect() - input[4] = "sequential" - """ - } - } - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.chunk_chr, - workflow.out.merged_variants.collect{ path(it[1]).vcf.variantsMD5}, - file(workflow.out.merged_variants_index[0][1]).name, - workflow.out.versions - ).match() } - ) - } - } - test("homo_sapiens - bam [] - panel vcf - [] - [] - sequential -- stub") { - tag "stub" - options "-stub" - when { - workflow { - """ - input[0] = input[0] = Channel.of([ - [id:'input_bam'], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true), - [] - ]) - input[1] = Channel.of([ - [id:'ref_panel', chr: "22"], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true), - "chr22", - ]) - input[2] = Channel.of([ - [id: 'ref_panel', chr: "22"], - [] - ]) - input[3] = Channel.of([[id:'ref_fasta'], [], []]).collect() - input[4] = "sequential" - """ - } - } - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out - ).match() } - ) - } - } -} diff --git a/subworkflows/nf-core/multiple_impute_glimpse2/tests/main.nf.test.snap b/subworkflows/nf-core/multiple_impute_glimpse2/tests/main.nf.test.snap deleted file mode 100644 index 232c67643f96..000000000000 --- a/subworkflows/nf-core/multiple_impute_glimpse2/tests/main.nf.test.snap +++ /dev/null @@ -1,169 +0,0 @@ -{ - "homo_sapiens - vcf sample - panel vcf - [] - [] - recursive": { - "content": [ - [ - [ - { - "id": "ref_panel", - "chr": "22" - }, - "ref_panel.txt:md5,c363e235162ca0f7d22e5604c192e256" - ] - ], - [ - "9794b042f8c50f7d9a4dbe448de5ed5e" - ], - "input_vcf.vcf.gz.csi", - [ - "versions.yml:md5,048d5415737cdc568fcc4fbdc5df11e2", - "versions.yml:md5,1301ab7e8d92b232e306aef24d94e252", - "versions.yml:md5,1362d3f6834c21239d705f550b11c2af", - "versions.yml:md5,2373902d5432eba6898c90d4c4685f90", - "versions.yml:md5,642ed64b13825472557113ba2e4b1566", - "versions.yml:md5,e81cfbe9cf12832b8312f966b6c9beb1" - ] - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.0" - }, - "timestamp": "2025-11-27T11:11:34.893641629" - }, - "homo_sapiens - bam [] - panel vcf - [] - [] - sequential": { - "content": [ - [ - [ - { - "id": "ref_panel", - "chr": "22" - }, - "ref_panel.txt:md5,4714831df331b665e634eb1a86a84156" - ] - ], - [ - "49df7da6c9745ba9a86112193ff172a9" - ], - "input_bam.vcf.gz.csi", - [ - "versions.yml:md5,048d5415737cdc568fcc4fbdc5df11e2", - "versions.yml:md5,1301ab7e8d92b232e306aef24d94e252", - "versions.yml:md5,1362d3f6834c21239d705f550b11c2af", - "versions.yml:md5,2373902d5432eba6898c90d4c4685f90", - "versions.yml:md5,642ed64b13825472557113ba2e4b1566", - "versions.yml:md5,e81cfbe9cf12832b8312f966b6c9beb1" - ] - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.0" - }, - "timestamp": "2025-11-26T21:19:09.9592341" - }, - "homo_sapiens - bam [] - panel vcf - [] - [] - sequential -- stub": { - "content": [ - { - "0": [ - [ - { - "id": "ref_panel", - "chr": "22" - }, - "ref_panel.txt:md5,2fc283dec1e755af6119f2ef6485ceee" - ] - ], - "1": [ - [ - { - "id": "input_bam" - }, - "input_bam.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "2": [ - [ - { - "id": "input_bam" - }, - "input_bam.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - "versions.yml:md5,048d5415737cdc568fcc4fbdc5df11e2", - "versions.yml:md5,1301ab7e8d92b232e306aef24d94e252", - "versions.yml:md5,1362d3f6834c21239d705f550b11c2af", - "versions.yml:md5,2373902d5432eba6898c90d4c4685f90", - "versions.yml:md5,642ed64b13825472557113ba2e4b1566", - "versions.yml:md5,e81cfbe9cf12832b8312f966b6c9beb1" - ], - "chunk_chr": [ - [ - { - "id": "ref_panel", - "chr": "22" - }, - "ref_panel.txt:md5,2fc283dec1e755af6119f2ef6485ceee" - ] - ], - "merged_variants": [ - [ - { - "id": "input_bam" - }, - "input_bam.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "merged_variants_index": [ - [ - { - "id": "input_bam" - }, - "input_bam.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,048d5415737cdc568fcc4fbdc5df11e2", - "versions.yml:md5,1301ab7e8d92b232e306aef24d94e252", - "versions.yml:md5,1362d3f6834c21239d705f550b11c2af", - "versions.yml:md5,2373902d5432eba6898c90d4c4685f90", - "versions.yml:md5,642ed64b13825472557113ba2e4b1566", - "versions.yml:md5,e81cfbe9cf12832b8312f966b6c9beb1" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" - }, - "timestamp": "2025-09-15T07:30:40.001652658" - }, - "homo_sapiens - vcf [] - panel vcf region - [] - [] - recursive": { - "content": [ - [ - [ - { - "id": "ref_panel", - "chr": "22" - }, - "ref_panel.txt:md5,c363e235162ca0f7d22e5604c192e256" - ] - ], - [ - "9794b042f8c50f7d9a4dbe448de5ed5e" - ], - "input_vcf.vcf.gz.csi", - [ - "versions.yml:md5,048d5415737cdc568fcc4fbdc5df11e2", - "versions.yml:md5,1301ab7e8d92b232e306aef24d94e252", - "versions.yml:md5,1362d3f6834c21239d705f550b11c2af", - "versions.yml:md5,2373902d5432eba6898c90d4c4685f90", - "versions.yml:md5,642ed64b13825472557113ba2e4b1566", - "versions.yml:md5,e81cfbe9cf12832b8312f966b6c9beb1" - ] - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" - }, - "timestamp": "2025-09-15T07:29:48.094975599" - } -} \ No newline at end of file diff --git a/subworkflows/nf-core/multiple_impute_glimpse2/tests/nextflow.config b/subworkflows/nf-core/multiple_impute_glimpse2/tests/nextflow.config deleted file mode 100644 index e89bc79602dd..000000000000 --- a/subworkflows/nf-core/multiple_impute_glimpse2/tests/nextflow.config +++ /dev/null @@ -1,12 +0,0 @@ -process { - withName: "GLIMPSE2_CHUNK" { - ext.args = [ - "--window-mb 0.01", - "--window-cm 0.01", - "--window-count 200", - "--buffer-mb 0.005", - "--buffer-cm 0.005", - "--buffer-count 30" - ].join(' ') - } -}