Skip to content

Commit

Permalink
parallelization enabled
Browse files Browse the repository at this point in the history
  • Loading branch information
piyalkarum committed Jul 26, 2024
1 parent 978cadc commit c416d68
Show file tree
Hide file tree
Showing 20 changed files with 512 additions and 431 deletions.
8 changes: 7 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ export(vcf.stat)
export(vst)
export(vstPermutation)
importFrom(R.utils,gzip)
importFrom(colorspace,rainbow_hcl)
importFrom(colorspace,terrain_hcl)
importFrom(data.table,fread)
importFrom(grDevices,as.raster)
Expand All @@ -45,6 +44,13 @@ importFrom(graphics,par)
importFrom(graphics,polygon)
importFrom(graphics,rasterImage)
importFrom(graphics,text)
importFrom(parallel,clusterExport)
importFrom(parallel,detectCores)
importFrom(parallel,makeCluster)
importFrom(parallel,parApply)
importFrom(parallel,parLapply)
importFrom(parallel,parSapply)
importFrom(parallel,stopCluster)
importFrom(qgraph,qgraph)
importFrom(stats,as.dist)
importFrom(stats,chisq.test)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# rCNV 1.3.0 (third update)
* parallelization enabled
* dupValidate function revised
* per site Fis added to deviant detection
* vstPermutation function added
* maf modified to remove multi-allelic sites
* FIT correction added
Expand Down
16 changes: 8 additions & 8 deletions R/TMM_normalz.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ TMM <- function(obs, ref, logratioTrim=.3, sumTrim=0.05, Weighting=TRUE, Acutoff
}


TMMex <- function(obs,ref, logratioTrim=.3, sumTrim=0.05, Weighting=TRUE, Acutoff=-1e10)
TMMex <- function(obs,ref, logratioTrim=.3, sumTrim=0.05, Weighting=TRUE, Acutoff=-1e10,cl)
# Gordon Smyth
# adopted from edgeR package (Robinson et al. 2010)
{
Expand Down Expand Up @@ -132,18 +132,18 @@ TMMex <- function(obs,ref, logratioTrim=.3, sumTrim=0.05, Weighting=TRUE, Acutof

## quantile normalization according to Robinson MD, and Oshlack A (2010)
# qn1
quantile_normalisation <- function(df,het.table=NULL,verbose=verbose){
df_rank <- apply(df,2,rank,ties.method="min")
quantile_normalisation <- function(df,het.table=NULL,verbose=verbose,cl){
df_rank <- parApply(cl=cl,df,2,rank,ties.method="min")
df_sorted <- data.frame(apply(df, 2, sort,na.last=TRUE))
df_mean <- apply(df_sorted, 1, mean,na.rm=TRUE)
df_mean <- parApply(cl=cl,df_sorted, 1, mean,na.rm=TRUE)
if(!is.null(het.table)){
het.table<-het.table[,-c(1:4)]
}
if(verbose){
message("\ncalculating normalized depth")
df_final <- lapply_pb(1:ncol(df_rank), index_to_mean, my_mean=df_mean,indx=df_rank,al=het.table)
df_final <- parLapply(cl=cl,1:ncol(df_rank), index_to_mean, my_mean=df_mean,indx=df_rank,al=het.table)
} else {
df_final <- lapply(1:ncol(df_rank), index_to_mean, my_mean=df_mean,indx=df_rank,al=het.table)
df_final <- parLapply(cl=cl,1:ncol(df_rank), index_to_mean, my_mean=df_mean,indx=df_rank,al=het.table)
}
return(df_final)
}
Expand Down Expand Up @@ -309,7 +309,7 @@ cpm.normal <- function(het.table, method=c("MedR","QN","pca","TMM","TMMex"),
message("calculating normalization factor")
pb <- txtProgressBar(min = 0, max = pb_Total, width = 50, style = 3)
}
# tdep<-rCNV:::apply_pb(het.table[,-c(1:4)],2,function(tmp){
# tdep<-parApply(cl=cl,(het.table[,-c(1:4)],2,function(tmp){
y1 <- y2 <- matrix(NA_integer_, dm[1], pb_Total)
for(i in seq_len(pb_Total)){
if (verbose) setTxtProgressBar(pb, i)
Expand Down Expand Up @@ -353,7 +353,7 @@ cpm.normal <- function(het.table, method=c("MedR","QN","pca","TMM","TMMex"),
out <- paste0(y1, ",", y2)
attributes(out) <- attributes(tdep)
} else if(method=="QN"){
out <- do.call(cbind,quantile_normalisation(tdep,het.table,verbose=verbose))
out <- do.call(cbind,quantile_normalisation(tdep,het.table,verbose=verbose,cl=cl))
} else if(method=="pca"){
if(verbose){message("\ncalculating normalized depth")}
new.mat <- t(tdep) ### check the direction to confirm if this step need to be done
Expand Down
223 changes: 76 additions & 147 deletions R/bias.det.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ get.pvals<-function(x,df,p.cal){
#' \code{hetTgen} (non-normalized)
#' @param x.norm a data frame of normalized allele coverage, output of
#' \code{cpm.normal}. If not provided, calculated using \code{X}.
#' @param Fis numeric. Inbreeding coefficient calculated using \code{h.zygosity()} function
#' @param method character. method to be used for normalization
#' (see \code{cpm.normal} details). Default \code{TMM}
#' @param logratioTrim numeric. percentage value (0 - 1) of variation to be
Expand All @@ -73,9 +74,11 @@ get.pvals<-function(x,df,p.cal){
#' @param plot.allele.cov logical, plot comparative plots of allele depth
#' coverage in homozygotes and heterozygotes
#' @param verbose logical, whether to print progress
#' @param parallel logical. whether to parallelize the process
#' @param \dots further arguments to be passed to \code{plot}
#'
#' @importFrom stats pchisq pnorm na.omit
#' @importFrom parallel parApply detectCores parLapply stopCluster
#'
#' @details
#' Allele information generated here are individual SNP based and presents the
Expand Down Expand Up @@ -105,43 +108,19 @@ get.pvals<-function(x,df,p.cal){
#' AI<-allele.info(ADtable,x.norm=ADnorm)}
#'
#' @export
allele.info<-function(X,x.norm=NULL,method=c("MedR","QN","pca","TMM","TMMex"),logratioTrim = 0.3,sumTrim = 0.05,Weighting = TRUE,Acutoff = -1e+10,plot.allele.cov=TRUE,verbose = TRUE,...){
allele.info<-function(X,x.norm=NULL,Fis,method=c("MedR","QN","pca","TMM","TMMex"),logratioTrim = 0.3,sumTrim = 0.05,Weighting = TRUE,Acutoff = -1e+10,plot.allele.cov=TRUE,verbose = TRUE,parallel=FALSE,...){
method=match.arg(method)
if(is.null(x.norm)){
x.norm<-cpm.normal(X,method=method,logratioTrim=logratioTrim,sumTrim = sumTrim,Weighting = Weighting,Acutoff = Acutoff,verbose = verbose)
}
if(!inherits(x.norm,"list")){x.norm<-list(AD=x.norm)}
if(inherits(x.norm,"list")){x.norm<-x.norm$AD}
if(verbose){
message("calculating probability values of alleles")
p.cal<-apply_pb(x.norm[,-c(1:4)],1,function(snp1){
if(is.character(unname(unlist(snp1[1])))){
y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
y[,1]<-as.integer(y[,1])
y[,2]<-as.integer(y[,2])} else {y<-snp1}
rs<-rowSums(y)
rs[rs==0L]<-NA
cv<-sd(unlist(rs),na.rm = TRUE)/mean(unlist(rs),na.rm = TRUE)
rr1<-y[,2]/rs
snp1het<-y[-which(rr1 == 0 | rr1 == 1 | is.na(rr1)==TRUE),]
homalt<-sum(rr1==1,na.rm=TRUE)
homref<-sum(rr1==0,na.rm=TRUE)
covrefhomo<-sum(y[c(rr1 == 0,na.rm = TRUE),],na.rm = TRUE)
covalthomo<-sum(y[c(rr1 == 1,na.rm = TRUE),],na.rm = TRUE)
covalt<-sum(y[,2],na.rm = TRUE)
covref<-sum(y[,1],na.rm = TRUE)
NHet<-nrow(snp1het)
if(NHet>3){
p.all<-(covalt/(NHet+(2*homalt)))/((covalt/(NHet+(2*homalt)))+(covref/(NHet+(2*homref))))
p.sum<-sum(snp1het[,2])/sum(snp1het)
ll <-data.frame(p.all,p.sum,mean.a.homo=covalthomo/(2*homalt),mean.r.homo=covrefhomo/(2*homref),mean.a.het=sum(snp1het[,2])/NHet,mean.r.het=sum(snp1het[,1])/NHet,cv=cv)
} else {
ll<-NA
}
return(ll)
})
} else {
p.cal<-apply(x.norm[,-c(1:4)],1,function(snp1){

if(parallel){
numCores<-detectCores()-1
cl<-makeCluster(cl)

p.cal<-parApply(cl=cl,x.norm[,-c(1:4)],1,function(snp1){
if(is.character(unname(unlist(snp1[1])))){
y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
y[,1]<-as.integer(y[,1])
Expand All @@ -167,121 +146,66 @@ allele.info<-function(X,x.norm=NULL,method=c("MedR","QN","pca","TMM","TMMex"),lo
}
return(ll)
})
}
if(is.list(p.cal)){
p.cal<-do.call(rbind,p.cal)
} else {
p.cal<-t(p.cal)
}
p.cal[p.cal=="NaN"]<-0
if(plot.allele.cov){
p.list<-list(...)
if(is.null(p.list$pch)) p.list$pch=19
if(is.null(p.list$cex)) p.list$cex=0.6
if(is.null(p.list$col)) p.list$col<-makeTransparent(colorspace::heat_hcl(12,h=c(0,-100),c=c(40,80),l=c(75,40),power=1)[11])
if(is.null(p.list$lcol)) p.list$lcol="tomato"
par(mfrow=c(2,2))
par(mar=c(4,5,2,2))
plot(p.cal$mean.a.homo,p.cal$mean.r.homo,pch=p.list$pch,cex=p.list$cex,
col=p.list$col,xlab="Mean coverage of \nalt. allele in homozygotes",
ylab="Mean coverage of \n ref. allele in homozygotes",cex.lab=0.8)
abline(0,1,col=p.list$lcol)
plot(p.cal$mean.a.het,p.cal$mean.r.het,pch=p.list$pch,cex=p.list$cex,
col=p.list$col,xlab="Mean coverage of \nalt. allele in heterozygotes",
ylab="Mean coverage of \n ref. allele in heterozygotes",cex.lab=0.8)
abline(0,1,col=p.list$lcol)
plot(p.cal$mean.a.het,p.cal$mean.a.homo,pch=p.list$pch,cex=p.list$cex,
col=p.list$col,xlab="Mean coverage of \nalt. allele in heterozygotes",
ylab="Mean coverage of \n alt. allele in homozygotes",cex.lab=0.8)
abline(0,1,col=p.list$lcol)
plot(p.cal$mean.r.het,p.cal$mean.r.homo,pch=p.list$pch,cex=p.list$cex,
col=p.list$col,xlab="Mean coverage of \nref. allele in heterozygotes",
ylab="Mean coverage of \n ref. allele in homozygotes",cex.lab=0.8)
abline(0,1,col=p.list$lcol)
par(mfrow=c(1,1))
}

if(verbose){
message("calculating chi-square significance")
pvals<-lapply_pb(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
} else {
pvals<-lapply(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
if(verbose){
message("calculating probability values of alleles")
p.cal<-apply_pb(x.norm[,-c(1:4)],1,function(snp1){
if(is.character(unname(unlist(snp1[1])))){
y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
y[,1]<-as.integer(y[,1])
y[,2]<-as.integer(y[,2])} else {y<-snp1}
rs<-rowSums(y)
rs[rs==0L]<-NA
cv<-sd(unlist(rs),na.rm = TRUE)/mean(unlist(rs),na.rm = TRUE)
rr1<-y[,2]/rs
snp1het<-y[-which(rr1 == 0 | rr1 == 1 | is.na(rr1)==TRUE),]
homalt<-sum(rr1==1,na.rm=TRUE)
homref<-sum(rr1==0,na.rm=TRUE)
covrefhomo<-sum(y[c(rr1 == 0,na.rm = TRUE),],na.rm = TRUE)
covalthomo<-sum(y[c(rr1 == 1,na.rm = TRUE),],na.rm = TRUE)
covalt<-sum(y[,2],na.rm = TRUE)
covref<-sum(y[,1],na.rm = TRUE)
NHet<-nrow(snp1het)
if(NHet>3){
p.all<-(covalt/(NHet+(2*homalt)))/((covalt/(NHet+(2*homalt)))+(covref/(NHet+(2*homref))))
p.sum<-sum(snp1het[,2])/sum(snp1het)
ll <-data.frame(p.all,p.sum,mean.a.homo=covalthomo/(2*homalt),mean.r.homo=covrefhomo/(2*homref),mean.a.het=sum(snp1het[,2])/NHet,mean.r.het=sum(snp1het[,1])/NHet,cv=cv)
} else {
ll<-NA
}
return(ll)
})
} else {
p.cal<-apply(x.norm[,-c(1:4)],1,function(snp1){
if(is.character(unname(unlist(snp1[1])))){
y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
y[,1]<-as.integer(y[,1])
y[,2]<-as.integer(y[,2])} else {y<-snp1}
rs<-rowSums(y)
rs[rs==0]<-NA
cv<-sd(unlist(rs),na.rm = TRUE)/mean(unlist(rs),na.rm = TRUE)
rr1<-y[,2]/rs
snp1het<-y[-which(rr1 == 0 | rr1 == 1 | is.na(rr1)==TRUE),]
homalt<-sum(rr1==1,na.rm=TRUE)
homref<-sum(rr1==0,na.rm=TRUE)
covrefhomo<-sum(y[c(rr1 == 0,na.rm = TRUE),],na.rm = TRUE)
covalthomo<-sum(y[c(rr1 == 1,na.rm = TRUE),],na.rm = TRUE)
covalt<-sum(y[,2],na.rm = TRUE)
covref<-sum(y[,1],na.rm = TRUE)
NHet<-nrow(snp1het)
if(NHet>3){
p.all<-(covalt/(NHet+(2*homalt)))/((covalt/(NHet+(2*homalt)))+(covref/(NHet+(2*homref))))
p.sum<-sum(snp1het[,2])/sum(snp1het)
ll <-data.frame(p.all,p.sum,mean.a.homo=covalthomo/(2*homalt),mean.r.homo=covrefhomo/(2*homref),mean.a.het=sum(snp1het[,2])/NHet,mean.r.het=sum(snp1het[,1])/NHet,cv=cv)
} else {
ll<-NA
}
return(ll)
})
}
}
pvals<-do.call(rbind,pvals)
pvals<-cbind(X[,1:3],pvals)
pvals<-na.omit(pvals)
ht<-sig.hets(pvals,plot = FALSE, verbose = verbose)
pvals<-data.frame(pvals,eH.pval=ht[,"eH.pval"],eH.delta=ht[,"eH.delta"],cv=na.omit(p.cal[,"cv"]))

return(pvals)
}



allele.info_old<-function(X,x.norm=NULL,method=c("MedR","QN","pca","TMM","TMMex"),logratioTrim = 0.3,sumTrim = 0.05,Weighting = TRUE,Acutoff = -1e+10,plot.allele.cov=TRUE,verbose = TRUE,...){
method=match.arg(method)
if(is.null(x.norm)){
x.norm<-cpm.normal(X,method=method,logratioTrim=logratioTrim,sumTrim = sumTrim,Weighting = Weighting,Acutoff = Acutoff,verbose = verbose)
}
if(!inherits(x.norm,"list")){x.norm<-list(AD=x.norm)}
if(inherits(x.norm,"list")){x.norm<-x.norm$AD}

if(verbose){
message("calculating probability values of alleles")
p.cal<-apply_pb(x.norm[,-c(1:4)],1,function(snp1){
if(is.character(unname(unlist(snp1[1])))){
y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
y[,1]<-as.integer(y[,1])
y[,2]<-as.integer(y[,2])} else {y<-snp1}
rs<-rowSums(y)
rs[rs==0L]<-NA
cv<-sd(unlist(y),na.rm = TRUE)/mean(unlist(y),na.rm = TRUE)
rr1<-y[,2]/rs
snp1het<-y[-which(rr1 == 0 | rr1 == 1 | is.na(rr1)==TRUE),]
homalt<-sum(rr1==1,na.rm=TRUE)
homref<-sum(rr1==0,na.rm=TRUE)
covrefhomo<-sum(y[c(rr1 == 0,na.rm = TRUE),],na.rm = TRUE)
covalthomo<-sum(y[c(rr1 == 1,na.rm = TRUE),],na.rm = TRUE)
covalt<-sum(y[,2],na.rm = TRUE)
covref<-sum(y[,1],na.rm = TRUE)
NHet<-nrow(snp1het)
if(NHet>3){
p.all<-(covalt/(NHet+(2*homalt)))/((covalt/(NHet+(2*homalt)))+(covref/(NHet+(2*homref))))
p.sum<-sum(snp1het[,2])/sum(snp1het)
ll <-data.frame(p.all,p.sum,mean.a.homo=covalthomo/(2*homalt),mean.r.homo=covrefhomo/(2*homref),mean.a.het=sum(snp1het[,2])/NHet,mean.r.het=sum(snp1het[,1])/NHet,cv=cv)
} else {
ll<-NA
}
return(ll)
})
} else {
p.cal<-apply(x.norm[,-c(1:4)],1,function(snp1){
if(is.character(unname(unlist(snp1[1])))){
y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
y[,1]<-as.integer(y[,1])
y[,2]<-as.integer(y[,2])} else {y<-snp1}
rs<-rowSums(y)
rs[rs==0]<-NA
cv<-sd(unlist(y),na.rm = TRUE)/mean(unlist(y),na.rm = TRUE)
rr1<-y[,2]/rs
snp1het<-y[-which(rr1 == 0 | rr1 == 1 | is.na(rr1)==TRUE),]
homalt<-sum(rr1==1,na.rm=TRUE)
homref<-sum(rr1==0,na.rm=TRUE)
covrefhomo<-sum(y[c(rr1 == 0,na.rm = TRUE),],na.rm = TRUE)
covalthomo<-sum(y[c(rr1 == 1,na.rm = TRUE),],na.rm = TRUE)
covalt<-sum(y[,2],na.rm = TRUE)
covref<-sum(y[,1],na.rm = TRUE)
NHet<-nrow(snp1het)
if(NHet>3){
p.all<-(covalt/(NHet+(2*homalt)))/((covalt/(NHet+(2*homalt)))+(covref/(NHet+(2*homref))))
p.sum<-sum(snp1het[,2])/sum(snp1het)
ll <-data.frame(p.all,p.sum,mean.a.homo=covalthomo/(2*homalt),mean.r.homo=covrefhomo/(2*homref),mean.a.het=sum(snp1het[,2])/NHet,mean.r.het=sum(snp1het[,1])/NHet,cv=cv)
} else {
ll<-NA
}
return(ll)
})
}
if(is.list(p.cal)){
p.cal<-do.call(rbind,p.cal)
} else {
Expand Down Expand Up @@ -315,16 +239,21 @@ allele.info_old<-function(X,x.norm=NULL,method=c("MedR","QN","pca","TMM","TMMex"
par(mfrow=c(1,1))
}

if(verbose){
message("calculating chi-square significance")
pvals<-lapply_pb(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
if(parallel){
pvals<-parLapply(1:nrow(X),get.pvals,df=X,p.cal=p.cal,cl=cl)
stopCluster(cl)
} else {
pvals<-lapply(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
if(verbose){
message("calculating chi-square significance")
pvals<-lapply_pb(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
} else {
pvals<-lapply(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
}
}
pvals<-do.call(rbind,pvals)
pvals<-cbind(X[,1:3],pvals)
pvals<-na.omit(pvals)
ht<-sig.hets(pvals,plot = FALSE, verbose = verbose)
ht<-sig.hets(pvals,Fis,plot = FALSE, verbose = verbose)
pvals<-data.frame(pvals,eH.pval=ht[,"eH.pval"],eH.delta=ht[,"eH.delta"],cv=na.omit(p.cal[,"cv"]))

return(pvals)
Expand Down
Loading

0 comments on commit c416d68

Please sign in to comment.