From 1abc2fffed9c7394eec6d38243b20081960edc7b Mon Sep 17 00:00:00 2001 From: Francesco Tabaro Date: Mon, 25 Nov 2024 16:26:40 +0100 Subject: [PATCH] implement suggestions --- subworkflows/local/prepare_genome.nf | 108 +++++++++++---------------- 1 file changed, 43 insertions(+), 65 deletions(-) diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index 526ca9f5..b053c219 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -31,7 +31,7 @@ workflow PREPARE_GENOME { take: genome // string: genome name genomes // map: genome attributes - prepare_tool_index // string : tool to prepare index for + prepare_tool_index // string: tool to prepare index for fasta // path: path to genome fasta file gtf // file: /path/to/genome.gtf gff // file: /path/to/genome.gff @@ -51,7 +51,7 @@ workflow PREPARE_GENOME { // ch_fasta = Channel.empty() if (fasta.endsWith('.gz')) { - ch_fasta = GUNZIP_FASTA([[:], fasta]).gunzip.map { it[1] } + ch_fasta = GUNZIP_FASTA([[:], fasta]).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) } else { @@ -63,30 +63,20 @@ workflow PREPARE_GENOME { // if (gtf) { if (gtf.endsWith('.gz')) { - ch_gtf = GUNZIP_GTF([[:], gtf]).gunzip.map { it[1] } + ch_gtf = GUNZIP_GTF([[:], gtf]).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) - } - else { + } else { ch_gtf = Channel.value(file(gtf, checkIfExists: true)) } - } - else if (gff) { + } else if (gff) { if (gff.endsWith('.gz')) { - ch_gff = GUNZIP_GFF([[:], file(gff, checkIfExists: true)]).gunzip.map { it[1] } - ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) - } - else { + ch_gff = GUNZIP_GFF([[:], file(gff, checkIfExists: true)]).gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions).map { [ [:], it ] } + } else { ch_gff = Channel.value(file(gff, checkIfExists: true)) } - // - // Detect gff file name stripped of extension and .gz - // - extension = (gff - '.gz').tokenize('.')[-1] - id = file(gff).baseName.toString() - '.gz' - ".${extension}" - - ch_gtf = GFFREAD(ch_gff.map { [[id: id], it] }, []).gtf.map { it[1] } - + ch_gtf = GFFREAD(ch_gff, []).gtf.map { it[1] } ch_versions = ch_versions.mix(GFFREAD.out.versions) } @@ -97,9 +87,8 @@ workflow PREPARE_GENOME { if (blacklist) { if (blacklist.endsWith('.gz')) { ch_blacklist = GUNZIP_BLACKLIST([[:], blacklist]).gunzip.map { it[1] } - ch_versions = ch_versions.mix(GUNZIP_BLACKLIST.out.versions) - } - else { + ch_versions = ch_versions.mix(GUNZIP_BLACKLIST.out.versions) + } else { ch_blacklist = Channel.value(file(blacklist)) } } @@ -113,8 +102,7 @@ workflow PREPARE_GENOME { def make_bed = false if (!gene_bed) { make_bed = true - } - else if (genome && gtf) { + } else if (genome && gtf) { if (genomes[genome].gtf != gtf) { make_bed = true } @@ -123,13 +111,11 @@ workflow PREPARE_GENOME { if (make_bed) { ch_gene_bed = GTF2BED(ch_gtf).bed ch_versions = ch_versions.mix(GTF2BED.out.versions) - } - else { + } else { if (gene_bed.endsWith('.gz')) { ch_gene_bed = GUNZIP_GENE_BED([[:], gene_bed]).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions) - } - else { + } else { ch_gene_bed = Channel.value(file(gene_bed)) } } @@ -139,8 +125,8 @@ workflow PREPARE_GENOME { // CUSTOM_GETCHROMSIZES(ch_fasta.map { [[:], it] }) ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] } - ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] } - ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) + ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] } + ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) // // Prepare genome intervals for filtering by removing regions in blacklist file @@ -152,7 +138,7 @@ workflow PREPARE_GENOME { ch_blacklist.ifEmpty([]) ) ch_genome_filtered_bed = GENOME_BLACKLIST_REGIONS.out.bed - ch_versions = ch_versions.mix(GENOME_BLACKLIST_REGIONS.out.versions) + ch_versions = ch_versions.mix(GENOME_BLACKLIST_REGIONS.out.versions) // // Uncompress BWA index or generate from scratch if required @@ -162,15 +148,13 @@ workflow PREPARE_GENOME { if (bwa_index) { if (bwa_index.endsWith('.tar.gz')) { ch_bwa_index = UNTAR_BWA_INDEX([[:], bwa_index]).untar - ch_versions = ch_versions.mix(UNTAR_BWA_INDEX.out.versions) - } - else { + ch_versions = ch_versions.mix(UNTAR_BWA_INDEX.out.versions) + } else { ch_bwa_index = [[:], file(bwa_index)] } - } - else { + } else { ch_bwa_index = BWA_INDEX(ch_fasta.map { [[:], it] }).index - ch_versions = ch_versions.mix(BWA_INDEX.out.versions) + ch_versions = ch_versions.mix(BWA_INDEX.out.versions) } } @@ -182,15 +166,13 @@ workflow PREPARE_GENOME { if (bowtie2_index) { if (bowtie2_index.endsWith('.tar.gz')) { ch_bowtie2_index = UNTAR_BOWTIE2_INDEX([[:], bowtie2_index]).untar - ch_versions = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions) - } - else { + ch_versions = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions) + } else { ch_bowtie2_index = [[:], file(bowtie2_index)] } - } - else { + } else { ch_bowtie2_index = BOWTIE2_BUILD(ch_fasta.map { [[:], it] }).index - ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) + ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) } } @@ -202,15 +184,13 @@ workflow PREPARE_GENOME { if (chromap_index) { if (chromap_index.endsWith('.tar.gz')) { ch_chromap_index = UNTARFILES([[:], chromap_index]).files - ch_versions = ch_versions.mix(UNTARFILES.out.versions) - } - else { + ch_versions = ch_versions.mix(UNTARFILES.out.versions) + } else { ch_chromap_index = [[:], file(chromap_index)] } - } - else { + } else { ch_chromap_index = CHROMAP_INDEX(ch_fasta.map { [[:], it] }).index - ch_versions = ch_versions.mix(CHROMAP_INDEX.out.versions) + ch_versions = ch_versions.mix(CHROMAP_INDEX.out.versions) } } @@ -222,28 +202,26 @@ workflow PREPARE_GENOME { if (star_index) { if (star_index.endsWith('.tar.gz')) { ch_star_index = UNTAR_STAR_INDEX([[:], star_index]).untar.map { it[1] } - ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions) - } - else { + ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions) + } else { ch_star_index = Channel.value(file(star_index)) } - } - else { + } else { ch_star_index = STAR_GENOMEGENERATE(ch_fasta, ch_gtf).index - ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) + ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) } } emit: - fasta = ch_fasta // path: genome.fasta - fai = ch_fai // path: genome.fai - gtf = ch_gtf // path: genome.gtf - gene_bed = ch_gene_bed // path: gene.bed - chrom_sizes = ch_chrom_sizes // path: genome.sizes - filtered_bed = ch_genome_filtered_bed // path: *.include_regions.bed - bwa_index = ch_bwa_index // path: bwa/index/ - bowtie2_index = ch_bowtie2_index // path: bowtie2/index/ - chromap_index = ch_chromap_index // path: genome.index - star_index = ch_star_index // path: star/index/ + fasta = ch_fasta // path: genome.fasta + fai = ch_fai // path: genome.fai + gtf = ch_gtf // path: genome.gtf + gene_bed = ch_gene_bed // path: gene.bed + chrom_sizes = ch_chrom_sizes // path: genome.sizes + filtered_bed = ch_genome_filtered_bed // path: *.include_regions.bed + bwa_index = ch_bwa_index // path: bwa/index/ + bowtie2_index = ch_bowtie2_index // path: bowtie2/index/ + chromap_index = ch_chromap_index // path: genome.index + star_index = ch_star_index // path: star/index/ versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] }