Skip to content

Commit

Permalink
Merge pull request #106 from GoekeLab/bambu-devel
Browse files Browse the repository at this point in the history
Update documentations in master branch

Former-commit-id: 91eae31
  • Loading branch information
cying111 authored May 29, 2020
2 parents 8dbfafb + 36e1456 commit 5fa25de
Show file tree
Hide file tree
Showing 76 changed files with 6,543 additions and 329 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
^\.Rproj\.user$
^packrat/
^\.Rprofile$
^doc$
^Meta$
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,5 @@ vignettes/*.pdf
.Renviron
packrat/lib*/
.Rproj.user
doc
Meta
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
Package: bambu
Type: Package
Title: Reference-guided isoform reconstruction and quantification for long read RNA-Seq data
Version: 0.9.0
Authors@R: c(person("Jonathan Goeke", "Developer", role = "aut",
email = "gokej@gis.a-star.edu.sg"),
person("Ying Chen", "Developer", role = "cre",email = "chen_ying@gis.a-star.edu.sg"))
Version: 0.1.0
Authors@R: c(person("Ying Chen", "Developer", role = "cre",email = "chen_ying@gis.a-star.edu.sg"),
person("Jonathan Goeke", "Developer", role = "aut",
email = "gokej@gis.a-star.edu.sg"))
Description: Multi-sample transcript discovery and quantification using long read RNA-Seq data.
License: GPL-3
Encoding: UTF-8
Expand Down
53 changes: 32 additions & 21 deletions R/abundance_quantification.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,37 @@
#' @title transcript_abundance_quantification
#' @param method A string variable indicates the whether a one-step or two-step approach will be used. See \code{Details}
#' for details on one-step and two-step approach.
#' @param read_classDT A \code{data.table} with columns
#' @param readClassDt A \code{data.table} with columns
#' @importFrom BiocParallel bplapply
#' @noRd
abundance_quantification <- function(read_classDT,ncore = 1,
bias_correction = TRUE,
abundance_quantification <- function(readClassDt,ncore = 1,
bias = TRUE,
maxiter = 20000,
conv.control = 10^(-8)){
gene_sidList <- unique(read_classDT$gene_sid)

bpParameters <- BiocParallel::bpparam()
bpParameters$workers <- ncore

conv = 10^(-8)){
gene_sidList <- unique(readClassDt$gene_sid)

if(ncore == 1){

emResultsList <- lapply(as.list(gene_sidList),
run_parallel,
conv = conv,
bias = bias,
maxiter = maxiter,
readClassDt = readClassDt)
}else{
bpParameters <- BiocParallel::bpparam()
bpParameters$workers <- ncore

emResultsList <- BiocParallel::bplapply(as.list(gene_sidList),
run_parallel,
conv = conv,
bias = bias,
maxiter = maxiter,
readClassDt = readClassDt,
BPPARAM=bpParameters)
}


emResultsList <- BiocParallel::bplapply(as.list(gene_sidList),
run_parallel,
conv.control = conv.control,
bias_correction = bias_correction,
maxiter = maxiter,
read_classDT = read_classDT,
BPPARAM=bpParameters)


estimates <- list(do.call('rbind',lapply(1:length(emResultsList), function(x) emResultsList[[x]][[1]])),
Expand All @@ -30,8 +41,8 @@ abundance_quantification <- function(read_classDT,ncore = 1,
return(estimates)
}

run_parallel <- function(g,conv.control,bias_correction,maxiter, read_classDT){
tmp <- read_classDT[gene_sid==g]
run_parallel <- function(g,conv,bias,maxiter, readClassDt){
tmp <- readClassDt[gene_sid==g]
if((nrow(tmp)==1)){
out <- list(data.table(tx_sid = tmp$tx_sid,
estimates = tmp$nobs,
Expand All @@ -58,12 +69,12 @@ run_parallel <- function(g,conv.control,bias_correction,maxiter, read_classDT){
est_output <- emWithL1(X = as.matrix(a_mat),
Y = n.obs,
lambda = lambda,
d = bias_correction,
d = bias,
maxiter = maxiter,
conv = conv.control)
conv = conv)
t_est <- as.numeric(t(est_output[["theta"]]))

if(bias_correction){
if(bias){
b_est <- as.numeric(t(est_output[["b"]]))
}else{
b_est <- rep(0,ncol(a_mat))
Expand Down
69 changes: 41 additions & 28 deletions R/annotationFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,39 +34,52 @@ prepareAnnotations <- function(txdb) {
}


#' Prepare annotations from gtf
#' @title prepare annotations from gtf file
#' @param gtf.file A string variable indicates the path to a gtf file.
#' @param organism as described in \code{\link{makeTxDbFromGFF}}.
#' @param dataSource as described in \code{\link{makeTxDbFromGFF}}.
#' @param taxonomyId as described in \code{\link{makeTxDbFromGFF}}.
#' @param chrominfo as described in \code{\link{makeTxDbFromGFF}}.
#' @param miRBaseBuild as described in \code{\link{makeTxDbFromGFF}}.
#' @param metadata as described in \code{\link{makeTxDbFromGFF}}.
#' @param dbxrefTag as described in \code{\link{makeTxDbFromGFF}}.
#' @param ... see \code{\link{makeTxDbFromGFF}}.
#' @return A \code{\link{GrangesList}} object
#' Prepare annotation granges object from GTF file
#' @title Prepare annotation granges object from GTF file into a GRangesList object
#' @param file a GTF file
#' @return grlist a \code{\link{GRangesList}} object, unlike \code\link{readFromGTF}},
#' this function finds out the equivalence classes between the transcripts,
#' with \code{\link{mcols}} data having three columns:
#' \itemize{
#' \item TXNAME specifying prefix for new gene Ids (genePrefix.number), defaults to empty
#' \item GENEID indicating whether filter to remove read classes which are a subset of known transcripts(), defaults to TRUE
#' \item eqClass specifying minimun read count to consider a read class valid in a sample, defaults to 2
#' }
#'
#' @export
prepareAnnotationsFromGTF <- function(gtf.file, dataSource=NA,
organism="Homo sapiens",
taxonomyId=NA,
chrominfo=NULL,
miRBaseBuild=NA,
metadata=NULL,
dbxrefTag,...){
return(prepareAnnotations(GenomicFeatures::makeTxDbFromGFF(gtf.file, format = "gtf",
organism = organism,
dataSource = dataSource,
taxonomyId = taxonomyId,
chrominfo = chrominfo,
miRBaseBuild = miRBaseBuild,
metadata = metadata,
dbxrefTag = dbxrefTag
)))
prepareAnnotationsFromGTF <- function(file){
if (missing(file)){
stop('A GTF file is required.')
}else{
data <- read.delim(file,header=FALSE,comment.char='#')
colnames(data) <- c("seqname","source","type","start","end","score","strand","frame","attribute")
data <- data[data$type=='exon',]
data$strand[data$strand=='.'] <- '*'
data$GENEID = gsub('gene_id (.*?);.*','\\1',data$attribute)
data$TXNAME=gsub('.*transcript_id (.*?);.*', '\\1',data$attribute)
data$exon_rank=as.integer(gsub('.*exon_number (.*?);.*', '\\1',data$attribute))
geneData=unique(data[,c('TXNAME', 'GENEID')])
grlist <- makeGRangesListFromDataFrame(
data[,c('seqname', 'start','end','strand','exon_rank','TXNAME')],split.field='TXNAME',keep.extra.columns = TRUE)

unlistedExons <- unlist(grlist, use.names = FALSE)
partitioning <- PartitioningByEnd(cumsum(elementNROWS(grlist)), names=NULL)
txIdForReorder <- togroup(PartitioningByWidth(grlist))
unlistedExons <- unlistedExons[order(txIdForReorder, unlistedExons$exon_rank)] #'exonsByTx' is always sorted by exon rank, not by strand, make sure that this is the case here
unlistedExons$exon_endRank <- unlist(sapply(elementNROWS(grlist),seq,to=1), use.names=FALSE)
unlistedExons <- unlistedExons[order(txIdForReorder, start(unlistedExons))]
# mcols(unlistedExons) <- mcols(unlistedExons)[,c('exon_rank','exon_endRank')]
grlist <- relist(unlistedExons, partitioning)
minEqClasses <- getMinimumEqClassByTx(grlist)
mcols(grlist) <- DataFrame(geneData[(match(names(grlist), geneData$TXNAME)),])
mcols(grlist)$eqClass <- minEqClasses$eqClass[match(names(grlist),minEqClasses$queryTxId)]
}
return (grlist)
}




#' Get minimum equivalent class by Transcript
#' @param exonsByTranscripts exonsByTranscripts
#' @noRd
Expand Down
Loading

0 comments on commit 5fa25de

Please sign in to comment.