nf-core · LouisLeNezet · Dec 11, 2025 · Dec 4, 2025 · Dec 8, 2025 · Dec 8, 2025
diff --git a/modules/nf-core/glimpse2/splitreference/main.nf b/modules/nf-core/glimpse2/splitreference/main.nf
@@ -18,8 +18,7 @@ process GLIMPSE2_SPLITREFERENCE {
         'biocontainers/glimpse-bio:2.0.1--h46b9e50_1' }"
 
     input:
-        tuple val(meta) , path(reference), path(reference_index), val(input_region), val(output_region)
-        tuple val(meta2), path(map)
+        tuple val(meta) , path(reference), path(reference_index), val(input_region), val(output_region), path(map)
 
 
     output:

diff --git a/modules/nf-core/glimpse2/splitreference/meta.yml b/modules/nf-core/glimpse2/splitreference/meta.yml
@@ -42,11 +42,6 @@ input:
         type: string
         description: Target imputed region, excluding left and right buffers (e.g. chr20:1000000-2000000).
         pattern: "chrXX:leftBufferPosition-rightBufferPosition"
-  - - meta2:
-        type: map
-        description: |
-          Groovy Map containing genomic map information
-          e.g. `[ map:'GRCh38' ]`
     - map:
         type: file
         description: File containing the genetic map.

diff --git a/modules/nf-core/glimpse2/splitreference/tests/main.nf.test b/modules/nf-core/glimpse2/splitreference/tests/main.nf.test
@@ -20,9 +20,9 @@ nextflow_process {
                     file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true),
                     file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true),
                     "chr22:16600000-16800000",
-                    "chr22:16600000-16800000"
+                    "chr22:16600000-16800000",
+                    []
                 ]
-                input[1]= [[ id:'map'],[]]
                 """
             }
         }
@@ -48,10 +48,7 @@ nextflow_process {
                     file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true),
                     file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true),
                     "chr22:16600000-16800000",
-                    "chr22:16600000-16800000"
-                ]
-                input[1]= [
-                    [ id:'map'],
+                    "chr22:16600000-16800000",
                     file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.glimpse.map", checkIfExists:true)
                 ]
                 """
@@ -80,9 +77,9 @@ nextflow_process {
                     file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true),
                     file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true),
                     "chr22:16600000-16800000",
-                    "chr22:16600000-16800000"
+                    "chr22:16600000-16800000",
+                    []
                 ]
-                input[1]= [[ id:'map'],[]]
                 """
             }
         }

diff --git a/subworkflows/nf-core/bam_vcf_impute_glimpse2/main.nf b/subworkflows/nf-core/bam_vcf_impute_glimpse2/main.nf
@@ -0,0 +1,123 @@
+include { GLIMPSE2_CHUNK                     } from '../../../modules/nf-core/glimpse2/chunk/main'
+include { GLIMPSE2_SPLITREFERENCE            } from '../../../modules/nf-core/glimpse2/splitreference/main'
+include { GLIMPSE2_PHASE                     } from '../../../modules/nf-core/glimpse2/phase/main'
+include { GLIMPSE2_LIGATE                    } from '../../../modules/nf-core/glimpse2/ligate/main'
+include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index/main.nf'
+include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index/main.nf'
+
+workflow BAM_VCF_IMPUTE_GLIMPSE2 {
+
+    take:
+    ch_input       // channel (mandatory): [ meta, vcf, csi, list, infos ]
+    ch_ref         // channel (mandatory): [ meta, vcf, csi, region ]
+    ch_chunks      // channel (optional) : [ meta, regionin, regionout ]
+    ch_map         // channel (optional) : [ meta, map ]
+    ch_fasta       // channel (optional) : [ meta, fasta, index ]
+    chunk          // val (optional)     : boolean to activate/deactivate chunking step
+    chunk_model    // val (optional)     : model file for chunking
+    splitreference // val (optional)     : boolean to activate/deactivate split reference step
+
+    main:
+
+    ch_versions = channel.empty()
+
+    if ( chunk == true ){
+        // Error if pre-defined chunks are provided when chunking is activated
+        ch_chunks
+            .filter { _meta, regionin, regionout -> regionin.size() == 0 && regionout.size() == 0 }
+            .ifEmpty {
+                error "ERROR: Cannot provide pre-defined chunks (regionin) when chunk=true. Please either set chunk=false to use provided chunks, or remove input chunks to enable automatic chunking."
+            }
+
+        // Chunk reference panel
+        ch_ref_map = ch_ref
+            .combine(ch_map, by: 0)
+        GLIMPSE2_CHUNK ( ch_ref_map, chunk_model )
+        ch_versions = ch_versions.mix( GLIMPSE2_CHUNK.out.versions.first() )
+
+        ch_chunks = GLIMPSE2_CHUNK.out.chunk_chr
+            .splitCsv(header: [
+                'ID', 'Chr', 'RegionBuf', 'RegionCnk', 'WindowCm',
+                'WindowMb', 'NbTotVariants', 'NbComVariants'
+            ], sep: "\t", skip: 0)
+            .map { meta, it -> [meta, it["RegionBuf"], it["RegionCnk"]]}
+    }
+
+    ch_chunks
+        .filter { _meta, regionin, regionout -> regionin.size() > 0 && regionout.size() > 0 }
+        .ifEmpty { error "ERROR: ch_chunks channel is empty. Please provide a valid channel or set chunk parameter to true." }
+
+    if ( splitreference == true ) {
+        // Split reference panel in bin files
+        split_input = ch_ref
+            .combine(ch_chunks, by: 0)
+            .combine(ch_map, by: 0)
+            .map{ meta, ref, index, _region, regionin, regionout, gmap -> [
+                meta + ["regionin": regionin, "regionout": regionout],
+                ref, index, regionin, regionout, gmap
+            ] }
+
+        GLIMPSE2_SPLITREFERENCE( split_input )
+        ch_versions = ch_versions.mix( GLIMPSE2_SPLITREFERENCE.out.versions.first() )
+
+        ch_chunks_panel_map = GLIMPSE2_SPLITREFERENCE.out.bin_ref
+            .map{ meta, bin_ref -> [ meta, [], [], bin_ref, [], [] ] } // Everything is provided by the bin file
+    } else {
+        ch_chunks_panel_map = ch_chunks
+            .combine(ch_ref, by:0)
+            .combine(ch_map, by:0)
+            .map{ meta, regionin, regionout, ref, ref_index, _region, gmap -> [
+                meta + ["regionin": regionin, "regionout": regionout],
+                regionin, regionout, ref, ref_index, gmap
+            ] }
+    }
+
+    ch_chunks_panel_map.ifEmpty{
+        error "ERROR: join operation resulted in an empty channel. Please provide a valid ch_chunks and ch_map channel as input."
+    }
+
+    ch_phase_input = ch_input
+        .combine(ch_chunks_panel_map)
+        .map{ metaI, input, index, list, infos, metaCPM, regionin, regionout, panel, panel_index, gmap -> [
+            metaI + metaCPM, // combined metadata
+            input, index, list, infos, // input files
+            regionin, regionout, // chunk regions
+            panel, panel_index, gmap // panel and map files
+        ] }
+
+    // Impute with Glimpse2
+    GLIMPSE2_PHASE(ch_phase_input, ch_fasta)
+    ch_versions = ch_versions.mix( GLIMPSE2_PHASE.out.versions.first() )
+
+    // Index phased file
+    BCFTOOLS_INDEX_1(GLIMPSE2_PHASE.out.phased_variants)
+    ch_versions = ch_versions.mix( BCFTOOLS_INDEX_1.out.versions.first() )
+
+    // Ligate all phased files in one and index it
+    ligate_input = GLIMPSE2_PHASE.out.phased_variants
+        .join( BCFTOOLS_INDEX_1.out.csi )
+        .map{ meta, vcf, index ->
+            def keysToKeep = meta.keySet() - ['regionin', 'regionout']
+            [ meta.subMap(keysToKeep), vcf, index ]
+        }
+        .groupTuple()
+
+    GLIMPSE2_LIGATE( ligate_input )
+    ch_versions = ch_versions.mix( GLIMPSE2_LIGATE.out.versions.first() )
+
+    BCFTOOLS_INDEX_2( GLIMPSE2_LIGATE.out.merged_variants )
+    ch_versions = ch_versions.mix( BCFTOOLS_INDEX_2.out.versions.first() )
+
+    // Join imputed and index files
+    ch_vcf_index = GLIMPSE2_LIGATE.out.merged_variants
+        .join(
+            BCFTOOLS_INDEX_2.out.tbi
+                .mix(BCFTOOLS_INDEX_2.out.csi)
+        )
+
+    emit:
+    ch_chunks     = ch_chunks          // channel: [ val(meta), regionin, regionout ]
+    ch_vcf_index  = ch_vcf_index       // channel: [ val(meta), vcf, csi ]
+
+    versions      = ch_versions        // channel: [ versions.yml ]
+}
diff --git a/...nf-core/multiple_impute_glimpse2/meta.yml → .../nf-core/bam_vcf_impute_glimpse2/meta.yml b/...nf-core/multiple_impute_glimpse2/meta.yml → .../nf-core/bam_vcf_impute_glimpse2/meta.yml
@@ -1,4 +1,4 @@
-name: "multiple_impute_glimpse2"
+name: "bam_vcf_impute_glimpse2"
 description: Impute VCF/BCF files, but also CRAM and BAM files with Glimpse2
 keywords:
   - glimpse
@@ -18,8 +18,9 @@ input:
       description: |
         Target dataset in CRAM, BAM or VCF/BCF format.
         Index file of the input file.
+        File containing the list of files to be imputed and their sample names (for CRAM/BAM input).
         File with sample names and ploidy information.
-        Structure: [ meta, file, index, txt ]
+        Structure: [ meta, file, index, bamlist, ploidy ]
   - ch_ref:
       type: file
       description: |
@@ -28,32 +29,46 @@ input:
         Target region, usually a full chromosome (e.g. chr20:1000000-2000000 or chr20).
         The file could possibly be without GT field (for efficiency reasons a file containing only the positions is recommended).
         Structure: [ meta, vcf, csi, region ]
+  - ch_chunks:
+      type: string
+      description: |
+        Channel containing the chunking regions for each chromosome.
+        Structure: [ meta, region with buffer, region without buffer ]
   - ch_map:
       type: file
       description: |
-        File containing the genetic map.
+        Genetic map file for each chromosome.
         Structure: [ meta, gmap ]
   - ch_fasta:
       type: file
       description: |
         Reference genome in fasta format.
         Reference genome index in fai format
         Structure: [ meta, fasta, fai ]
+  - chunk:
+      type: boolean
+      description: Whether to perform chunking of the input data before imputation.
+  - chunk_model:
+      type: string
+      description: |
+        Chunking model to use.
+        Options: "sequential", "recursive"
+  - splitreference:
+      type: boolean
+      description: Whether to split the reference panel and convert it to binary files before imputation.
+
 output:
-  - chunk_chr:
-      type: file
+  - ch_chunks:
+      type: string
       description: |
-        Tab delimited output txt file containing buffer and imputation regions.
-        Structure: [meta, txt]
-  - merged_variants:
+        Channel containing the chunking regions for each chromosome.
+        Structure: [ meta, region with buffer, region without buffer ]
+  - ch_vcf_index:
       type: file
       description: |
         Output VCF/BCF file for the merged regions.
-        Phased information (HS field) is updated accordingly for the full region.
-        Structure: [ val(meta), bcf ]
-  - merged_variants_index:
-      type: file
-      description: Index file of the ligated phased variants files.
+        Index file of the output VCF/BCF file.
+        Structure: [ val(meta), variants, index ]
   - versions:
       type: file
       description: File containing software versions