Skip to content

Commit

Permalink
minor update to prepareAnnotationsFromGTF_draft
Browse files Browse the repository at this point in the history
removed names from exons

Former-commit-id: cda44b3
  • Loading branch information
jonathangoeke authored May 28, 2020
1 parent fca1400 commit 7e28d96
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions R/readWrite.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,24 +97,24 @@ prepareAnnotationsFromGTF_draft <- function(file){
if (missing(file)){
stop('A GTF file is required.')
}else{
data <- read.delim(file,header=FALSE,,comment.char='#')
data <- read.delim(file,header=FALSE,comment.char='#')
colnames(data) <- c("seqname","source","type","start","end","score","strand","frame","attribute")
data <- data[data$type=='exon',]
data$strand[data$strand=='.'] <- '*'
data$GENEID = gsub('gene_id (.*?);.*','\\1',data$attribute)
data$TXNAME=gsub('.*transcript_id (.*?);.*', '\\1',data$attribute)
data$exon_rank=gsub('.*exon_number (.*?);.*', '\\1',data$attribute)
data$exon_rank=as.integer(gsub('.*exon_number (.*?);.*', '\\1',data$attribute))
geneData=unique(data[,c('TXNAME', 'GENEID')])
grlist <- makeGRangesListFromDataFrame(
data,split.field='TXNAME', names.field='TXNAME',keep.extra.columns = TRUE)
geneData=(unique(data[,c('TXNAME', 'GENEID')]))
data[,c('seqname', 'start','end','strand','exon_rank','TXNAME')],split.field='TXNAME',keep.extra.columns = TRUE)

unlistedExons <- unlist(grlist, use.names = FALSE)
partitioning <- PartitioningByEnd(cumsum(elementNROWS(grlist)), names=NULL)
txIdForReorder <- togroup(PartitioningByWidth(grlist))
unlistedExons <- unlistedExons[order(txIdForReorder, unlistedExons$exon_rank)] #'exonsByTx' is always sorted by exon rank, not by strand, make sure that this is the case here
unlistedExons$exon_endRank <- unlist(sapply(elementNROWS(grlist),seq,to=1), use.names=FALSE)
unlistedExons <- unlistedExons[order(txIdForReorder, start(unlistedExons))]
mcols(unlistedExons) <- mcols(unlistedExons)[,c('exon_rank','exon_endRank')]
# mcols(unlistedExons) <- mcols(unlistedExons)[,c('exon_rank','exon_endRank')]
grlist <- relist(unlistedExons, partitioning)
minEqClasses <- getMinimumEqClassByTx(grlist)
mcols(grlist) <- DataFrame(geneData[(match(names(grlist), geneData$TXNAME)),])
Expand Down

0 comments on commit 7e28d96

Please sign in to comment.