Skip to content

Fix GFFREAD #435

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 26, 2024
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
190 changes: 108 additions & 82 deletions subworkflows/local/prepare_genome.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,35 @@
//

include {
GUNZIP as GUNZIP_FASTA
GUNZIP as GUNZIP_GTF
GUNZIP as GUNZIP_GFF
GUNZIP as GUNZIP_GENE_BED
GUNZIP as GUNZIP_BLACKLIST } from '../../modules/nf-core/gunzip/main'
GUNZIP as GUNZIP_FASTA ;
GUNZIP as GUNZIP_GTF ;
GUNZIP as GUNZIP_GFF ;
GUNZIP as GUNZIP_GENE_BED ;
GUNZIP as GUNZIP_BLACKLIST
} from '../../modules/nf-core/gunzip/main'

include {
UNTAR as UNTAR_BWA_INDEX
UNTAR as UNTAR_BOWTIE2_INDEX
UNTAR as UNTAR_STAR_INDEX } from '../../modules/nf-core/untar/main'

include { UNTARFILES } from '../../modules/nf-core/untarfiles/main'
include { GFFREAD } from '../../modules/nf-core/gffread/main'
include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main'
include { BWA_INDEX } from '../../modules/nf-core/bwa/index/main'
include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main'
include { CHROMAP_INDEX } from '../../modules/nf-core/chromap/index/main'

include { GTF2BED } from '../../modules/local/gtf2bed'
include { GENOME_BLACKLIST_REGIONS } from '../../modules/local/genome_blacklist_regions'
include { STAR_GENOMEGENERATE } from '../../modules/local/star_genomegenerate'
UNTAR as UNTAR_BWA_INDEX ;
UNTAR as UNTAR_BOWTIE2_INDEX ;
UNTAR as UNTAR_STAR_INDEX
} from '../../modules/nf-core/untar/main'

include { UNTARFILES } from '../../modules/nf-core/untarfiles/main'
include { GFFREAD } from '../../modules/nf-core/gffread/main'
include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main'
include { BWA_INDEX } from '../../modules/nf-core/bwa/index/main'
include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main'
include { CHROMAP_INDEX } from '../../modules/nf-core/chromap/index/main'

include { GTF2BED } from '../../modules/local/gtf2bed'
include { GENOME_BLACKLIST_REGIONS } from '../../modules/local/genome_blacklist_regions'
include { STAR_GENOMEGENERATE } from '../../modules/local/star_genomegenerate'

workflow PREPARE_GENOME {
take:
genome // string: genome name
genomes // map: genome attributes
prepare_tool_index // string : tool to prepare index for
prepare_tool_index // string : tool to prepare index for
fasta // path: path to genome fasta file
gtf // file: /path/to/genome.gtf
gff // file: /path/to/genome.gff
Expand All @@ -49,30 +51,42 @@ workflow PREPARE_GENOME {
//
ch_fasta = Channel.empty()
if (fasta.endsWith('.gz')) {
ch_fasta = GUNZIP_FASTA ( [ [:], fasta ] ).gunzip.map{ it[1] }
ch_fasta = GUNZIP_FASTA([[:], fasta]).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions)
} else {
ch_fasta = Channel.value(file(fasta))
}
else {
ch_fasta = Channel.value(file(fasta, checkIfExists: true))
}

//
// Uncompress GTF annotation file or create from GFF3 if required
//
if (gtf) {
if (gtf.endsWith('.gz')) {
ch_gtf = GUNZIP_GTF ( [ [:], gtf ] ).gunzip.map{ it[1] }
ch_gtf = GUNZIP_GTF([[:], gtf]).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions)
} else {
ch_gtf = Channel.value(file(gtf))
}
} else if (gff) {
else {
ch_gtf = Channel.value(file(gtf, checkIfExists: true))
}
}
else if (gff) {
if (gff.endsWith('.gz')) {
ch_gff = GUNZIP_GFF ( [ [:], gff ] ).gunzip.map{ it[1] }
ch_gff = GUNZIP_GFF([[:], file(gff, checkIfExists: true)]).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions)
} else {
ch_gff = Channel.value(file(gff))
}
ch_gtf = GFFREAD ( [ [:], ch_gff ] ).gtf
else {
ch_gff = Channel.value(file(gff, checkIfExists: true))
}

//
// Detect gff file name stripped of extension and .gz
//
extension = (gff - '.gz').tokenize('.')[-1]
id = file(gff).baseName.toString() - '.gz' - ".${extension}"

ch_gtf = GFFREAD(ch_gff.map { [[id: id], it] }, []).gtf.map { it[1] }

ch_versions = ch_versions.mix(GFFREAD.out.versions)
}

Expand All @@ -82,9 +96,10 @@ workflow PREPARE_GENOME {
ch_blacklist = Channel.empty()
if (blacklist) {
if (blacklist.endsWith('.gz')) {
ch_blacklist = GUNZIP_BLACKLIST ( [ [:], blacklist ] ).gunzip.map{ it[1] }
ch_versions = ch_versions.mix(GUNZIP_BLACKLIST.out.versions)
} else {
ch_blacklist = GUNZIP_BLACKLIST([[:], blacklist]).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_BLACKLIST.out.versions)
}
else {
ch_blacklist = Channel.value(file(blacklist))
}
}
Expand All @@ -98,38 +113,41 @@ workflow PREPARE_GENOME {
def make_bed = false
if (!gene_bed) {
make_bed = true
} else if (genome && gtf) {
if (genomes[ genome ].gtf != gtf) {
}
else if (genome && gtf) {
if (genomes[genome].gtf != gtf) {
make_bed = true
}
}

if (make_bed) {
ch_gene_bed = GTF2BED ( ch_gtf ).bed
ch_gene_bed = GTF2BED(ch_gtf).bed
ch_versions = ch_versions.mix(GTF2BED.out.versions)
} else {
}
else {
if (gene_bed.endsWith('.gz')) {
ch_gene_bed = GUNZIP_GENE_BED ( [ [:], gene_bed ] ).gunzip.map{ it[1] }
ch_gene_bed = GUNZIP_GENE_BED([[:], gene_bed]).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions)
} else {
}
else {
ch_gene_bed = Channel.value(file(gene_bed))
}
}

//
// Create chromosome sizes file
//
CUSTOM_GETCHROMSIZES ( ch_fasta.map { [ [:], it ] } )
CUSTOM_GETCHROMSIZES(ch_fasta.map { [[:], it] })
ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] }
ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map{ it[1] }
ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions)
ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] }
ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions)

//
// Prepare genome intervals for filtering by removing regions in blacklist file
//
ch_genome_filtered_bed = Channel.empty()

GENOME_BLACKLIST_REGIONS (
GENOME_BLACKLIST_REGIONS(
ch_chrom_sizes,
ch_blacklist.ifEmpty([])
)
Expand All @@ -143,14 +161,16 @@ workflow PREPARE_GENOME {
if (prepare_tool_index == 'bwa') {
if (bwa_index) {
if (bwa_index.endsWith('.tar.gz')) {
ch_bwa_index = UNTAR_BWA_INDEX ( [ [:], bwa_index ] ).untar
ch_versions = ch_versions.mix(UNTAR_BWA_INDEX.out.versions)
} else {
ch_bwa_index = [ [:], file(bwa_index) ]
ch_bwa_index = UNTAR_BWA_INDEX([[:], bwa_index]).untar
ch_versions = ch_versions.mix(UNTAR_BWA_INDEX.out.versions)
}
else {
ch_bwa_index = [[:], file(bwa_index)]
}
} else {
ch_bwa_index = BWA_INDEX ( ch_fasta.map { [ [:], it ] } ).index
ch_versions = ch_versions.mix(BWA_INDEX.out.versions)
}
else {
ch_bwa_index = BWA_INDEX(ch_fasta.map { [[:], it] }).index
ch_versions = ch_versions.mix(BWA_INDEX.out.versions)
}
}

Expand All @@ -161,14 +181,16 @@ workflow PREPARE_GENOME {
if (prepare_tool_index == 'bowtie2') {
if (bowtie2_index) {
if (bowtie2_index.endsWith('.tar.gz')) {
ch_bowtie2_index = UNTAR_BOWTIE2_INDEX ( [ [:], bowtie2_index ] ).untar
ch_versions = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions)
} else {
ch_bowtie2_index = [ [:], file(bowtie2_index) ]
ch_bowtie2_index = UNTAR_BOWTIE2_INDEX([[:], bowtie2_index]).untar
ch_versions = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions)
}
else {
ch_bowtie2_index = [[:], file(bowtie2_index)]
}
} else {
ch_bowtie2_index = BOWTIE2_BUILD ( ch_fasta.map { [ [:], it ] } ).index
ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions)
}
else {
ch_bowtie2_index = BOWTIE2_BUILD(ch_fasta.map { [[:], it] }).index
ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions)
}
}

Expand All @@ -179,14 +201,16 @@ workflow PREPARE_GENOME {
if (prepare_tool_index == 'chromap') {
if (chromap_index) {
if (chromap_index.endsWith('.tar.gz')) {
ch_chromap_index = UNTARFILES ( [ [:], chromap_index ] ).files
ch_versions = ch_versions.mix(UNTARFILES.out.versions)
} else {
ch_chromap_index = [ [:], file(chromap_index) ]
ch_chromap_index = UNTARFILES([[:], chromap_index]).files
ch_versions = ch_versions.mix(UNTARFILES.out.versions)
}
else {
ch_chromap_index = [[:], file(chromap_index)]
}
} else {
ch_chromap_index = CHROMAP_INDEX ( ch_fasta.map { [ [:], it ] } ).index
ch_versions = ch_versions.mix(CHROMAP_INDEX.out.versions)
}
else {
ch_chromap_index = CHROMAP_INDEX(ch_fasta.map { [[:], it] }).index
ch_versions = ch_versions.mix(CHROMAP_INDEX.out.versions)
}
}

Expand All @@ -197,27 +221,29 @@ workflow PREPARE_GENOME {
if (prepare_tool_index == 'star') {
if (star_index) {
if (star_index.endsWith('.tar.gz')) {
ch_star_index = UNTAR_STAR_INDEX ( [ [:], star_index ] ).untar.map{ it[1] }
ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions)
} else {
ch_star_index = UNTAR_STAR_INDEX([[:], star_index]).untar.map { it[1] }
ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions)
}
else {
ch_star_index = Channel.value(file(star_index))
}
} else {
ch_star_index = STAR_GENOMEGENERATE ( ch_fasta, ch_gtf ).index
ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
}
else {
ch_star_index = STAR_GENOMEGENERATE(ch_fasta, ch_gtf).index
ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
}
}

emit:
fasta = ch_fasta // path: genome.fasta
fai = ch_fai // path: genome.fai
gtf = ch_gtf // path: genome.gtf
gene_bed = ch_gene_bed // path: gene.bed
chrom_sizes = ch_chrom_sizes // path: genome.sizes
filtered_bed = ch_genome_filtered_bed // path: *.include_regions.bed
bwa_index = ch_bwa_index // path: bwa/index/
bowtie2_index = ch_bowtie2_index // path: bowtie2/index/
chromap_index = ch_chromap_index // path: genome.index
star_index = ch_star_index // path: star/index/
versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ]
fasta = ch_fasta // path: genome.fasta
fai = ch_fai // path: genome.fai
gtf = ch_gtf // path: genome.gtf
gene_bed = ch_gene_bed // path: gene.bed
chrom_sizes = ch_chrom_sizes // path: genome.sizes
filtered_bed = ch_genome_filtered_bed // path: *.include_regions.bed
bwa_index = ch_bwa_index // path: bwa/index/
bowtie2_index = ch_bowtie2_index // path: bowtie2/index/
chromap_index = ch_chromap_index // path: genome.index
star_index = ch_star_index // path: star/index/
versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ]
}