Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions modules/nf-core/glimpse2/splitreference/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ process GLIMPSE2_SPLITREFERENCE {
'biocontainers/glimpse-bio:2.0.1--h46b9e50_1' }"

input:
tuple val(meta) , path(reference), path(reference_index), val(input_region), val(output_region)
tuple val(meta2), path(map)
tuple val(meta) , path(reference), path(reference_index), val(input_region), val(output_region), path(map)


output:
Expand Down
5 changes: 0 additions & 5 deletions modules/nf-core/glimpse2/splitreference/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,6 @@ input:
type: string
description: Target imputed region, excluding left and right buffers (e.g. chr20:1000000-2000000).
pattern: "chrXX:leftBufferPosition-rightBufferPosition"
- - meta2:
type: map
description: |
Groovy Map containing genomic map information
e.g. `[ map:'GRCh38' ]`
- map:
type: file
description: File containing the genetic map.
Expand Down
13 changes: 5 additions & 8 deletions modules/nf-core/glimpse2/splitreference/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ nextflow_process {
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true),
"chr22:16600000-16800000",
"chr22:16600000-16800000"
"chr22:16600000-16800000",
[]
]
input[1]= [[ id:'map'],[]]
"""
}
}
Expand All @@ -48,10 +48,7 @@ nextflow_process {
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true),
"chr22:16600000-16800000",
"chr22:16600000-16800000"
]
input[1]= [
[ id:'map'],
"chr22:16600000-16800000",
file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.glimpse.map", checkIfExists:true)
]
"""
Expand Down Expand Up @@ -80,9 +77,9 @@ nextflow_process {
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists:true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists:true),
"chr22:16600000-16800000",
"chr22:16600000-16800000"
"chr22:16600000-16800000",
[]
]
input[1]= [[ id:'map'],[]]
"""
}
}
Expand Down
123 changes: 123 additions & 0 deletions subworkflows/nf-core/bam_vcf_impute_glimpse2/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
include { GLIMPSE2_CHUNK } from '../../../modules/nf-core/glimpse2/chunk/main'
include { GLIMPSE2_SPLITREFERENCE } from '../../../modules/nf-core/glimpse2/splitreference/main'
include { GLIMPSE2_PHASE } from '../../../modules/nf-core/glimpse2/phase/main'
include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate/main'
include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index/main.nf'
include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index/main.nf'

workflow BAM_VCF_IMPUTE_GLIMPSE2 {

take:
ch_input // channel (mandatory): [ meta, vcf, csi, list, infos ]
ch_ref // channel (mandatory): [ meta, vcf, csi, region ]
ch_chunks // channel (optional) : [ meta, regionin, regionout ]
ch_map // channel (optional) : [ meta, map ]
ch_fasta // channel (optional) : [ meta, fasta, index ]
chunk // val (optional) : boolean to activate/deactivate chunking step
chunk_model // val (optional) : model file for chunking
splitreference // val (optional) : boolean to activate/deactivate split reference step

main:

ch_versions = channel.empty()

if ( chunk == true ){
// Error if pre-defined chunks are provided when chunking is activated
ch_chunks
.filter { _meta, regionin, regionout -> regionin.size() == 0 && regionout.size() == 0 }
.ifEmpty {
error "ERROR: Cannot provide pre-defined chunks (regionin) when chunk=true. Please either set chunk=false to use provided chunks, or remove input chunks to enable automatic chunking."
}

// Chunk reference panel
ch_ref_map = ch_ref
.combine(ch_map, by: 0)
GLIMPSE2_CHUNK ( ch_ref_map, chunk_model )
ch_versions = ch_versions.mix( GLIMPSE2_CHUNK.out.versions.first() )

ch_chunks = GLIMPSE2_CHUNK.out.chunk_chr
.splitCsv(header: [
'ID', 'Chr', 'RegionBuf', 'RegionCnk', 'WindowCm',
'WindowMb', 'NbTotVariants', 'NbComVariants'
], sep: "\t", skip: 0)
.map { meta, it -> [meta, it["RegionBuf"], it["RegionCnk"]]}
}

ch_chunks
.filter { _meta, regionin, regionout -> regionin.size() > 0 && regionout.size() > 0 }
.ifEmpty { error "ERROR: ch_chunks channel is empty. Please provide a valid channel or set chunk parameter to true." }

if ( splitreference == true ) {
// Split reference panel in bin files
split_input = ch_ref
.combine(ch_chunks, by: 0)
.combine(ch_map, by: 0)
.map{ meta, ref, index, _region, regionin, regionout, gmap -> [
meta + ["regionin": regionin, "regionout": regionout],
ref, index, regionin, regionout, gmap
] }

GLIMPSE2_SPLITREFERENCE( split_input )
ch_versions = ch_versions.mix( GLIMPSE2_SPLITREFERENCE.out.versions.first() )

ch_chunks_panel_map = GLIMPSE2_SPLITREFERENCE.out.bin_ref
.map{ meta, bin_ref -> [ meta, [], [], bin_ref, [], [] ] } // Everything is provided by the bin file
} else {
ch_chunks_panel_map = ch_chunks
.combine(ch_ref, by:0)
.combine(ch_map, by:0)
.map{ meta, regionin, regionout, ref, ref_index, _region, gmap -> [
meta + ["regionin": regionin, "regionout": regionout],
regionin, regionout, ref, ref_index, gmap
] }
}

ch_chunks_panel_map.ifEmpty{
error "ERROR: join operation resulted in an empty channel. Please provide a valid ch_chunks and ch_map channel as input."
}

ch_phase_input = ch_input
.combine(ch_chunks_panel_map)
.map{ metaI, input, index, list, infos, metaCPM, regionin, regionout, panel, panel_index, gmap -> [
metaI + metaCPM, // combined metadata
input, index, list, infos, // input files
regionin, regionout, // chunk regions
panel, panel_index, gmap // panel and map files
] }

// Impute with Glimpse2
GLIMPSE2_PHASE(ch_phase_input, ch_fasta)
ch_versions = ch_versions.mix( GLIMPSE2_PHASE.out.versions.first() )

// Index phased file
BCFTOOLS_INDEX_1(GLIMPSE2_PHASE.out.phased_variants)
ch_versions = ch_versions.mix( BCFTOOLS_INDEX_1.out.versions.first() )

// Ligate all phased files in one and index it
ligate_input = GLIMPSE2_PHASE.out.phased_variants
.join( BCFTOOLS_INDEX_1.out.csi )
.map{ meta, vcf, index ->
def keysToKeep = meta.keySet() - ['regionin', 'regionout']
[ meta.subMap(keysToKeep), vcf, index ]
}
.groupTuple()

GLIMPSE2_LIGATE( ligate_input )
ch_versions = ch_versions.mix( GLIMPSE2_LIGATE.out.versions.first() )

BCFTOOLS_INDEX_2( GLIMPSE2_LIGATE.out.merged_variants )
ch_versions = ch_versions.mix( BCFTOOLS_INDEX_2.out.versions.first() )

// Join imputed and index files
ch_vcf_index = GLIMPSE2_LIGATE.out.merged_variants
.join(
BCFTOOLS_INDEX_2.out.tbi
.mix(BCFTOOLS_INDEX_2.out.csi)
)

emit:
ch_chunks = ch_chunks // channel: [ val(meta), regionin, regionout ]
ch_vcf_index = ch_vcf_index // channel: [ val(meta), vcf, csi ]

versions = ch_versions // channel: [ versions.yml ]
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: "multiple_impute_glimpse2"
name: "bam_vcf_impute_glimpse2"
description: Impute VCF/BCF files, but also CRAM and BAM files with Glimpse2
keywords:
- glimpse
Expand All @@ -18,8 +18,9 @@ input:
description: |
Target dataset in CRAM, BAM or VCF/BCF format.
Index file of the input file.
File containing the list of files to be imputed and their sample names (for CRAM/BAM input).
File with sample names and ploidy information.
Structure: [ meta, file, index, txt ]
Structure: [ meta, file, index, bamlist, ploidy ]
- ch_ref:
type: file
description: |
Expand All @@ -28,32 +29,46 @@ input:
Target region, usually a full chromosome (e.g. chr20:1000000-2000000 or chr20).
The file could possibly be without GT field (for efficiency reasons a file containing only the positions is recommended).
Structure: [ meta, vcf, csi, region ]
- ch_chunks:
type: string
description: |
Channel containing the chunking regions for each chromosome.
Structure: [ meta, region with buffer, region without buffer ]
- ch_map:
type: file
description: |
File containing the genetic map.
Genetic map file for each chromosome.
Structure: [ meta, gmap ]
- ch_fasta:
type: file
description: |
Reference genome in fasta format.
Reference genome index in fai format
Structure: [ meta, fasta, fai ]
- chunk:
type: boolean
description: Whether to perform chunking of the input data before imputation.
- chunk_model:
type: string
description: |
Chunking model to use.
Options: "sequential", "recursive"
- splitreference:
type: boolean
description: Whether to split the reference panel and convert it to binary files before imputation.

output:
- chunk_chr:
type: file
- ch_chunks:
type: string
description: |
Tab delimited output txt file containing buffer and imputation regions.
Structure: [meta, txt]
- merged_variants:
Channel containing the chunking regions for each chromosome.
Structure: [ meta, region with buffer, region without buffer ]
- ch_vcf_index:
type: file
description: |
Output VCF/BCF file for the merged regions.
Phased information (HS field) is updated accordingly for the full region.
Structure: [ val(meta), bcf ]
- merged_variants_index:
type: file
description: Index file of the ligated phased variants files.
Index file of the output VCF/BCF file.
Structure: [ val(meta), variants, index ]
- versions:
type: file
description: File containing software versions
Expand Down
Loading
Loading