From 1abc2fffed9c7394eec6d38243b20081960edc7b Mon Sep 17 00:00:00 2001
From: Francesco Tabaro <francesco.tabaro@embl.it>
Date: Mon, 25 Nov 2024 16:26:40 +0100
Subject: [PATCH] implement suggestions

---
 subworkflows/local/prepare_genome.nf | 108 +++++++++++----------------
 1 file changed, 43 insertions(+), 65 deletions(-)

diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf
index 526ca9f5..b053c219 100644
--- a/subworkflows/local/prepare_genome.nf
+++ b/subworkflows/local/prepare_genome.nf
@@ -31,7 +31,7 @@ workflow PREPARE_GENOME {
     take:
     genome             //  string: genome name
     genomes            //     map: genome attributes
-    prepare_tool_index // string : tool to prepare index for
+    prepare_tool_index //  string: tool to prepare index for
     fasta              //    path: path to genome fasta file
     gtf                //    file: /path/to/genome.gtf
     gff                //    file: /path/to/genome.gff
@@ -51,7 +51,7 @@ workflow PREPARE_GENOME {
     //
     ch_fasta = Channel.empty()
     if (fasta.endsWith('.gz')) {
-        ch_fasta = GUNZIP_FASTA([[:], fasta]).gunzip.map { it[1] }
+        ch_fasta    = GUNZIP_FASTA([[:], fasta]).gunzip.map { it[1] }
         ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions)
     }
     else {
@@ -63,30 +63,20 @@ workflow PREPARE_GENOME {
     //
     if (gtf) {
         if (gtf.endsWith('.gz')) {
-            ch_gtf = GUNZIP_GTF([[:], gtf]).gunzip.map { it[1] }
+            ch_gtf      = GUNZIP_GTF([[:], gtf]).gunzip.map { it[1] }
             ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions)
-        }
-        else {
+        } else {
             ch_gtf = Channel.value(file(gtf, checkIfExists: true))
         }
-    }
-    else if (gff) {
+    } else if (gff) {
         if (gff.endsWith('.gz')) {
-            ch_gff = GUNZIP_GFF([[:], file(gff, checkIfExists: true)]).gunzip.map { it[1] }
-            ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions)
-        }
-        else {
+            ch_gff      = GUNZIP_GFF([[:], file(gff, checkIfExists: true)]).gunzip.map { it[1] }
+            ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions).map { [ [:], it ] }
+        } else {
             ch_gff = Channel.value(file(gff, checkIfExists: true))
         }
 
-        //
-        // Detect gff file name stripped of extension and .gz
-        //        
-        extension = (gff - '.gz').tokenize('.')[-1]
-        id = file(gff).baseName.toString() - '.gz' - ".${extension}"
-
-        ch_gtf = GFFREAD(ch_gff.map { [[id: id], it] }, []).gtf.map { it[1] }
-
+        ch_gtf      = GFFREAD(ch_gff, []).gtf.map { it[1] }
         ch_versions = ch_versions.mix(GFFREAD.out.versions)
     }
 
@@ -97,9 +87,8 @@ workflow PREPARE_GENOME {
     if (blacklist) {
         if (blacklist.endsWith('.gz')) {
             ch_blacklist = GUNZIP_BLACKLIST([[:], blacklist]).gunzip.map { it[1] }
-            ch_versions = ch_versions.mix(GUNZIP_BLACKLIST.out.versions)
-        }
-        else {
+            ch_versions  = ch_versions.mix(GUNZIP_BLACKLIST.out.versions)
+        } else {
             ch_blacklist = Channel.value(file(blacklist))
         }
     }
@@ -113,8 +102,7 @@ workflow PREPARE_GENOME {
     def make_bed = false
     if (!gene_bed) {
         make_bed = true
-    }
-    else if (genome && gtf) {
+    } else if (genome && gtf) {
         if (genomes[genome].gtf != gtf) {
             make_bed = true
         }
@@ -123,13 +111,11 @@ workflow PREPARE_GENOME {
     if (make_bed) {
         ch_gene_bed = GTF2BED(ch_gtf).bed
         ch_versions = ch_versions.mix(GTF2BED.out.versions)
-    }
-    else {
+    } else {
         if (gene_bed.endsWith('.gz')) {
             ch_gene_bed = GUNZIP_GENE_BED([[:], gene_bed]).gunzip.map { it[1] }
             ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions)
-        }
-        else {
+        } else {
             ch_gene_bed = Channel.value(file(gene_bed))
         }
     }
@@ -139,8 +125,8 @@ workflow PREPARE_GENOME {
     //
     CUSTOM_GETCHROMSIZES(ch_fasta.map { [[:], it] })
     ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] }
-    ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] }
-    ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions)
+    ch_fai         = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] }
+    ch_versions    = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions)
 
     //
     // Prepare genome intervals for filtering by removing regions in blacklist file
@@ -152,7 +138,7 @@ workflow PREPARE_GENOME {
         ch_blacklist.ifEmpty([])
     )
     ch_genome_filtered_bed = GENOME_BLACKLIST_REGIONS.out.bed
-    ch_versions = ch_versions.mix(GENOME_BLACKLIST_REGIONS.out.versions)
+    ch_versions            = ch_versions.mix(GENOME_BLACKLIST_REGIONS.out.versions)
 
     //
     // Uncompress BWA index or generate from scratch if required
@@ -162,15 +148,13 @@ workflow PREPARE_GENOME {
         if (bwa_index) {
             if (bwa_index.endsWith('.tar.gz')) {
                 ch_bwa_index = UNTAR_BWA_INDEX([[:], bwa_index]).untar
-                ch_versions = ch_versions.mix(UNTAR_BWA_INDEX.out.versions)
-            }
-            else {
+                ch_versions  = ch_versions.mix(UNTAR_BWA_INDEX.out.versions)
+            } else {
                 ch_bwa_index = [[:], file(bwa_index)]
             }
-        }
-        else {
+        } else {
             ch_bwa_index = BWA_INDEX(ch_fasta.map { [[:], it] }).index
-            ch_versions = ch_versions.mix(BWA_INDEX.out.versions)
+            ch_versions  = ch_versions.mix(BWA_INDEX.out.versions)
         }
     }
 
@@ -182,15 +166,13 @@ workflow PREPARE_GENOME {
         if (bowtie2_index) {
             if (bowtie2_index.endsWith('.tar.gz')) {
                 ch_bowtie2_index = UNTAR_BOWTIE2_INDEX([[:], bowtie2_index]).untar
-                ch_versions = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions)
-            }
-            else {
+                ch_versions      = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions)
+            } else {
                 ch_bowtie2_index = [[:], file(bowtie2_index)]
             }
-        }
-        else {
+        } else {
             ch_bowtie2_index = BOWTIE2_BUILD(ch_fasta.map { [[:], it] }).index
-            ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions)
+            ch_versions      = ch_versions.mix(BOWTIE2_BUILD.out.versions)
         }
     }
 
@@ -202,15 +184,13 @@ workflow PREPARE_GENOME {
         if (chromap_index) {
             if (chromap_index.endsWith('.tar.gz')) {
                 ch_chromap_index = UNTARFILES([[:], chromap_index]).files
-                ch_versions = ch_versions.mix(UNTARFILES.out.versions)
-            }
-            else {
+                ch_versions      = ch_versions.mix(UNTARFILES.out.versions)
+            } else {
                 ch_chromap_index = [[:], file(chromap_index)]
             }
-        }
-        else {
+        } else {
             ch_chromap_index = CHROMAP_INDEX(ch_fasta.map { [[:], it] }).index
-            ch_versions = ch_versions.mix(CHROMAP_INDEX.out.versions)
+            ch_versions      = ch_versions.mix(CHROMAP_INDEX.out.versions)
         }
     }
 
@@ -222,28 +202,26 @@ workflow PREPARE_GENOME {
         if (star_index) {
             if (star_index.endsWith('.tar.gz')) {
                 ch_star_index = UNTAR_STAR_INDEX([[:], star_index]).untar.map { it[1] }
-                ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions)
-            }
-            else {
+                ch_versions   = ch_versions.mix(UNTAR_STAR_INDEX.out.versions)
+            } else {
                 ch_star_index = Channel.value(file(star_index))
             }
-        }
-        else {
+        } else {
             ch_star_index = STAR_GENOMEGENERATE(ch_fasta, ch_gtf).index
-            ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
+            ch_versions   = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
         }
     }
 
     emit:
-    fasta         = ch_fasta //    path: genome.fasta
-    fai           = ch_fai //    path: genome.fai
-    gtf           = ch_gtf //    path: genome.gtf
-    gene_bed      = ch_gene_bed //    path: gene.bed
-    chrom_sizes   = ch_chrom_sizes //    path: genome.sizes
-    filtered_bed  = ch_genome_filtered_bed //    path: *.include_regions.bed
-    bwa_index     = ch_bwa_index //    path: bwa/index/
-    bowtie2_index = ch_bowtie2_index //    path: bowtie2/index/
-    chromap_index = ch_chromap_index //    path: genome.index
-    star_index    = ch_star_index //    path: star/index/
+    fasta         = ch_fasta                  //    path: genome.fasta
+    fai           = ch_fai                    //    path: genome.fai
+    gtf           = ch_gtf                    //    path: genome.gtf
+    gene_bed      = ch_gene_bed               //    path: gene.bed
+    chrom_sizes   = ch_chrom_sizes            //    path: genome.sizes
+    filtered_bed  = ch_genome_filtered_bed    //    path: *.include_regions.bed
+    bwa_index     = ch_bwa_index              //    path: bwa/index/
+    bowtie2_index = ch_bowtie2_index          //    path: bowtie2/index/
+    chromap_index = ch_chromap_index          //    path: genome.index
+    star_index    = ch_star_index             //    path: star/index/
     versions      = ch_versions.ifEmpty(null) // channel: [ versions.yml ]
 }