From fca14007588d27b6b9c0ae87630f2d79ea94ee84 Mon Sep 17 00:00:00 2001 From: Yuk Kei Wan <41866052+yuukiiwa@users.noreply.github.com> Date: Thu, 28 May 2020 17:20:04 +0800 Subject: [PATCH] Update readWrite.R Former-commit-id: 183d3a7ed187329f3319ef8998ea8b7ff91339e3 --- R/readWrite.R | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/R/readWrite.R b/R/readWrite.R index 76445b61..eb267be7 100644 --- a/R/readWrite.R +++ b/R/readWrite.R @@ -87,3 +87,38 @@ read.gtf <- function(file){ } return (grlist) } + +#' Outputs GRangesList object from reading a GTF file for running bambu +#' @title convert a GTF file into a GRangesList +#' @param file a GTF file +#' @return grlist a GRangesList object +#' @export +prepareAnnotationsFromGTF_draft <- function(file){ + if (missing(file)){ + stop('A GTF file is required.') + }else{ + data <- read.delim(file,header=FALSE,,comment.char='#') + colnames(data) <- c("seqname","source","type","start","end","score","strand","frame","attribute") + data <- data[data$type=='exon',] + data$strand[data$strand=='.'] <- '*' + data$GENEID = gsub('gene_id (.*?);.*','\\1',data$attribute) + data$TXNAME=gsub('.*transcript_id (.*?);.*', '\\1',data$attribute) + data$exon_rank=gsub('.*exon_number (.*?);.*', '\\1',data$attribute) + geneData=unique(data[,c('TXNAME', 'GENEID')]) + grlist <- makeGRangesListFromDataFrame( + data,split.field='TXNAME', names.field='TXNAME',keep.extra.columns = TRUE) + geneData=(unique(data[,c('TXNAME', 'GENEID')])) + unlistedExons <- unlist(grlist, use.names = FALSE) + partitioning <- PartitioningByEnd(cumsum(elementNROWS(grlist)), names=NULL) + txIdForReorder <- togroup(PartitioningByWidth(grlist)) + unlistedExons <- unlistedExons[order(txIdForReorder, unlistedExons$exon_rank)] #'exonsByTx' is always sorted by exon rank, not by strand, make sure that this is the case here + unlistedExons$exon_endRank <- unlist(sapply(elementNROWS(grlist),seq,to=1), use.names=FALSE) + unlistedExons <- unlistedExons[order(txIdForReorder, start(unlistedExons))] + mcols(unlistedExons) <- mcols(unlistedExons)[,c('exon_rank','exon_endRank')] + grlist <- relist(unlistedExons, partitioning) + minEqClasses <- getMinimumEqClassByTx(grlist) + mcols(grlist) <- DataFrame(geneData[(match(names(grlist), geneData$TXNAME)),]) + mcols(grlist)$eqClass <- minEqClasses$eqClass[match(names(grlist),minEqClasses$queryTxId)] + } + return (grlist) +}