From fca14007588d27b6b9c0ae87630f2d79ea94ee84 Mon Sep 17 00:00:00 2001
From: Yuk Kei Wan <41866052+yuukiiwa@users.noreply.github.com>
Date: Thu, 28 May 2020 17:20:04 +0800
Subject: [PATCH] Update readWrite.R

Former-commit-id: 183d3a7ed187329f3319ef8998ea8b7ff91339e3
---
 R/readWrite.R | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/R/readWrite.R b/R/readWrite.R
index 76445b61..eb267be7 100644
--- a/R/readWrite.R
+++ b/R/readWrite.R
@@ -87,3 +87,38 @@ read.gtf <- function(file){
   }
   return (grlist)
 }
+
+#' Outputs GRangesList object from reading a GTF file for running bambu
+#' @title convert a GTF file into a GRangesList
+#' @param file a GTF file
+#' @return grlist a GRangesList object
+#' @export
+prepareAnnotationsFromGTF_draft <- function(file){
+  if (missing(file)){
+    stop('A GTF file is required.')
+  }else{
+    data <- read.delim(file,header=FALSE,,comment.char='#')
+    colnames(data) <- c("seqname","source","type","start","end","score","strand","frame","attribute")
+    data <- data[data$type=='exon',]
+    data$strand[data$strand=='.'] <- '*'
+    data$GENEID = gsub('gene_id (.*?);.*','\\1',data$attribute)
+    data$TXNAME=gsub('.*transcript_id (.*?);.*', '\\1',data$attribute)
+    data$exon_rank=gsub('.*exon_number (.*?);.*', '\\1',data$attribute)
+    geneData=unique(data[,c('TXNAME', 'GENEID')])
+    grlist <- makeGRangesListFromDataFrame(
+      data,split.field='TXNAME', names.field='TXNAME',keep.extra.columns = TRUE)
+    geneData=(unique(data[,c('TXNAME', 'GENEID')]))
+    unlistedExons <- unlist(grlist, use.names = FALSE)
+    partitioning <- PartitioningByEnd(cumsum(elementNROWS(grlist)), names=NULL)
+    txIdForReorder <- togroup(PartitioningByWidth(grlist))
+    unlistedExons <- unlistedExons[order(txIdForReorder, unlistedExons$exon_rank)] #'exonsByTx' is always sorted by exon rank, not by strand, make sure that this is the case here
+    unlistedExons$exon_endRank <- unlist(sapply(elementNROWS(grlist),seq,to=1), use.names=FALSE)
+    unlistedExons <- unlistedExons[order(txIdForReorder, start(unlistedExons))]
+    mcols(unlistedExons) <- mcols(unlistedExons)[,c('exon_rank','exon_endRank')]
+    grlist <- relist(unlistedExons, partitioning)
+    minEqClasses <- getMinimumEqClassByTx(grlist)
+    mcols(grlist) <- DataFrame(geneData[(match(names(grlist), geneData$TXNAME)),])
+    mcols(grlist)$eqClass <- minEqClasses$eqClass[match(names(grlist),minEqClasses$queryTxId)]
+  }
+  return (grlist)
+}