piyalkarum
diff --git a/‎NAMESPACE
+7-1 b/‎NAMESPACE
+7-1
diff --git a/‎NEWS.md
+3 b/‎NEWS.md
+3
diff --git a/‎R/TMM_normalz.R
+8-8 b/‎R/TMM_normalz.R
+8-8
diff --git a/‎R/bias.det.R
+76-147 b/‎R/bias.det.R
+76-147
@@ -25,7 +25,6 @@ export(vcf.stat)
 export(vst)
 export(vstPermutation)
 importFrom(R.utils,gzip)
-importFrom(colorspace,rainbow_hcl)
 importFrom(colorspace,terrain_hcl)
 importFrom(data.table,fread)
 importFrom(grDevices,as.raster)
@@ -45,6 +44,13 @@ importFrom(graphics,par)
 importFrom(graphics,polygon)
 importFrom(graphics,rasterImage)
 importFrom(graphics,text)
+importFrom(parallel,clusterExport)
+importFrom(parallel,detectCores)
+importFrom(parallel,makeCluster)
+importFrom(parallel,parApply)
+importFrom(parallel,parLapply)
+importFrom(parallel,parSapply)
+importFrom(parallel,stopCluster)
 importFrom(qgraph,qgraph)
 importFrom(stats,as.dist)
 importFrom(stats,chisq.test)
 
@@ -1,4 +1,7 @@
 # rCNV 1.3.0 (third update)
+* parallelization enabled
+* dupValidate function revised
+* per site Fis added to deviant detection
 * vstPermutation function added
 * maf modified to remove multi-allelic sites
 * FIT correction added
 
@@ -54,7 +54,7 @@ TMM <- function(obs, ref, logratioTrim=.3, sumTrim=0.05, Weighting=TRUE, Acutoff
 }
 
 
-TMMex <- function(obs,ref, logratioTrim=.3, sumTrim=0.05, Weighting=TRUE, Acutoff=-1e10)
+TMMex <- function(obs,ref, logratioTrim=.3, sumTrim=0.05, Weighting=TRUE, Acutoff=-1e10,cl)
   #	Gordon Smyth
   #	adopted from edgeR package (Robinson et al. 2010)
 {
@@ -132,18 +132,18 @@ TMMex <- function(obs,ref, logratioTrim=.3, sumTrim=0.05, Weighting=TRUE, Acutof
 
 ## quantile normalization according to Robinson MD, and Oshlack A (2010)
 # qn1
-quantile_normalisation <- function(df,het.table=NULL,verbose=verbose){
-  df_rank <- apply(df,2,rank,ties.method="min")
+quantile_normalisation <- function(df,het.table=NULL,verbose=verbose,cl){
+  df_rank <- parApply(cl=cl,df,2,rank,ties.method="min")
   df_sorted <- data.frame(apply(df, 2, sort,na.last=TRUE))
-  df_mean <- apply(df_sorted, 1, mean,na.rm=TRUE)
+  df_mean <- parApply(cl=cl,df_sorted, 1, mean,na.rm=TRUE)
   if(!is.null(het.table)){
     het.table<-het.table[,-c(1:4)]
   }
   if(verbose){
     message("\ncalculating normalized depth")
-    df_final <- lapply_pb(1:ncol(df_rank), index_to_mean, my_mean=df_mean,indx=df_rank,al=het.table)
+    df_final <- parLapply(cl=cl,1:ncol(df_rank), index_to_mean, my_mean=df_mean,indx=df_rank,al=het.table)
   } else {
-    df_final <- lapply(1:ncol(df_rank), index_to_mean, my_mean=df_mean,indx=df_rank,al=het.table)
+    df_final <- parLapply(cl=cl,1:ncol(df_rank), index_to_mean, my_mean=df_mean,indx=df_rank,al=het.table)
   }
   return(df_final)
 }
@@ -309,7 +309,7 @@ cpm.normal <- function(het.table, method=c("MedR","QN","pca","TMM","TMMex"),
     message("calculating normalization factor")
     pb <- txtProgressBar(min = 0, max = pb_Total, width = 50, style = 3)
   }
-  #  tdep<-rCNV:::apply_pb(het.table[,-c(1:4)],2,function(tmp){
+  #  tdep<-parApply(cl=cl,(het.table[,-c(1:4)],2,function(tmp){
   y1 <- y2 <- matrix(NA_integer_, dm[1], pb_Total)
   for(i in seq_len(pb_Total)){
     if (verbose) setTxtProgressBar(pb, i)
@@ -353,7 +353,7 @@ cpm.normal <- function(het.table, method=c("MedR","QN","pca","TMM","TMMex"),
     out <- paste0(y1, ",", y2)
     attributes(out) <- attributes(tdep)
   } else if(method=="QN"){
-    out <- do.call(cbind,quantile_normalisation(tdep,het.table,verbose=verbose))
+    out <- do.call(cbind,quantile_normalisation(tdep,het.table,verbose=verbose,cl=cl))
   } else if(method=="pca"){
     if(verbose){message("\ncalculating normalized depth")}
     new.mat <- t(tdep) ### check the direction to confirm if this step need to be done
 
@@ -61,6 +61,7 @@ get.pvals<-function(x,df,p.cal){
 #' \code{hetTgen} (non-normalized)
 #' @param x.norm a data frame of normalized allele coverage, output of
 #' \code{cpm.normal}. If not provided, calculated using \code{X}.
+#' @param Fis numeric. Inbreeding coefficient calculated using \code{h.zygosity()} function
 #' @param method character. method to be used for normalization
 #' (see \code{cpm.normal} details). Default \code{TMM}
 #' @param logratioTrim numeric. percentage value (0 - 1) of variation to be
@@ -73,9 +74,11 @@ get.pvals<-function(x,df,p.cal){
 #' @param plot.allele.cov logical, plot comparative plots of allele depth
 #' coverage in homozygotes and heterozygotes
 #' @param verbose logical, whether to print progress
+#' @param parallel logical. whether to parallelize the process
 #' @param \dots further arguments to be passed to \code{plot}
 #'
 #' @importFrom stats pchisq pnorm na.omit
+#' @importFrom parallel parApply detectCores parLapply stopCluster
 #'
 #' @details
 #' Allele information generated here are individual SNP based and presents the
@@ -105,43 +108,19 @@ get.pvals<-function(x,df,p.cal){
 #' AI<-allele.info(ADtable,x.norm=ADnorm)}
 #'
 #' @export
-allele.info<-function(X,x.norm=NULL,method=c("MedR","QN","pca","TMM","TMMex"),logratioTrim = 0.3,sumTrim = 0.05,Weighting = TRUE,Acutoff = -1e+10,plot.allele.cov=TRUE,verbose = TRUE,...){
+allele.info<-function(X,x.norm=NULL,Fis,method=c("MedR","QN","pca","TMM","TMMex"),logratioTrim = 0.3,sumTrim = 0.05,Weighting = TRUE,Acutoff = -1e+10,plot.allele.cov=TRUE,verbose = TRUE,parallel=FALSE,...){
   method=match.arg(method)
   if(is.null(x.norm)){
     x.norm<-cpm.normal(X,method=method,logratioTrim=logratioTrim,sumTrim = sumTrim,Weighting = Weighting,Acutoff = Acutoff,verbose = verbose)
   }
   if(!inherits(x.norm,"list")){x.norm<-list(AD=x.norm)}
   if(inherits(x.norm,"list")){x.norm<-x.norm$AD}
-  if(verbose){
-    message("calculating probability values of alleles")
-    p.cal<-apply_pb(x.norm[,-c(1:4)],1,function(snp1){
-      if(is.character(unname(unlist(snp1[1])))){
-        y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
-        y[,1]<-as.integer(y[,1])
-        y[,2]<-as.integer(y[,2])} else {y<-snp1}
-      rs<-rowSums(y)
-      rs[rs==0L]<-NA
-      cv<-sd(unlist(rs),na.rm = TRUE)/mean(unlist(rs),na.rm = TRUE)
-      rr1<-y[,2]/rs
-      snp1het<-y[-which(rr1 == 0 | rr1 == 1 | is.na(rr1)==TRUE),]
-      homalt<-sum(rr1==1,na.rm=TRUE)
-      homref<-sum(rr1==0,na.rm=TRUE)
-      covrefhomo<-sum(y[c(rr1 == 0,na.rm = TRUE),],na.rm = TRUE)
-      covalthomo<-sum(y[c(rr1 == 1,na.rm = TRUE),],na.rm = TRUE)
-      covalt<-sum(y[,2],na.rm = TRUE)
-      covref<-sum(y[,1],na.rm = TRUE)
-      NHet<-nrow(snp1het)
-      if(NHet>3){
-        p.all<-(covalt/(NHet+(2*homalt)))/((covalt/(NHet+(2*homalt)))+(covref/(NHet+(2*homref))))
-        p.sum<-sum(snp1het[,2])/sum(snp1het)
-        ll <-data.frame(p.all,p.sum,mean.a.homo=covalthomo/(2*homalt),mean.r.homo=covrefhomo/(2*homref),mean.a.het=sum(snp1het[,2])/NHet,mean.r.het=sum(snp1het[,1])/NHet,cv=cv)
-      } else {
-        ll<-NA
-      }
-      return(ll)
-    })
-  } else {
-    p.cal<-apply(x.norm[,-c(1:4)],1,function(snp1){
+
+  if(parallel){
+    numCores<-detectCores()-1
+    cl<-makeCluster(cl)
+
+    p.cal<-parApply(cl=cl,x.norm[,-c(1:4)],1,function(snp1){
       if(is.character(unname(unlist(snp1[1])))){
         y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
         y[,1]<-as.integer(y[,1])
@@ -167,121 +146,66 @@ allele.info<-function(X,x.norm=NULL,method=c("MedR","QN","pca","TMM","TMMex"),lo
       }
       return(ll)
     })
-  }
-  if(is.list(p.cal)){
-    p.cal<-do.call(rbind,p.cal)
-  } else {
-    p.cal<-t(p.cal)
-  }
-  p.cal[p.cal=="NaN"]<-0
-  if(plot.allele.cov){
-    p.list<-list(...)
-    if(is.null(p.list$pch)) p.list$pch=19
-    if(is.null(p.list$cex)) p.list$cex=0.6
-    if(is.null(p.list$col)) p.list$col<-makeTransparent(colorspace::heat_hcl(12,h=c(0,-100),c=c(40,80),l=c(75,40),power=1)[11])
-    if(is.null(p.list$lcol)) p.list$lcol="tomato"
-    par(mfrow=c(2,2))
-    par(mar=c(4,5,2,2))
-    plot(p.cal$mean.a.homo,p.cal$mean.r.homo,pch=p.list$pch,cex=p.list$cex,
-         col=p.list$col,xlab="Mean coverage of \nalt. allele in homozygotes",
-         ylab="Mean coverage of \n ref. allele in homozygotes",cex.lab=0.8)
-    abline(0,1,col=p.list$lcol)
-    plot(p.cal$mean.a.het,p.cal$mean.r.het,pch=p.list$pch,cex=p.list$cex,
-         col=p.list$col,xlab="Mean coverage of \nalt. allele in heterozygotes",
-         ylab="Mean coverage of \n ref. allele in heterozygotes",cex.lab=0.8)
-    abline(0,1,col=p.list$lcol)
-    plot(p.cal$mean.a.het,p.cal$mean.a.homo,pch=p.list$pch,cex=p.list$cex,
-         col=p.list$col,xlab="Mean coverage of \nalt. allele in heterozygotes",
-         ylab="Mean coverage of \n alt. allele in homozygotes",cex.lab=0.8)
-    abline(0,1,col=p.list$lcol)
-    plot(p.cal$mean.r.het,p.cal$mean.r.homo,pch=p.list$pch,cex=p.list$cex,
-         col=p.list$col,xlab="Mean coverage of \nref. allele in heterozygotes",
-         ylab="Mean coverage of \n ref. allele in homozygotes",cex.lab=0.8)
-    abline(0,1,col=p.list$lcol)
-    par(mfrow=c(1,1))
-  }
 
-  if(verbose){
-    message("calculating chi-square significance")
-    pvals<-lapply_pb(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
   } else {
-    pvals<-lapply(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
+    if(verbose){
+      message("calculating probability values of alleles")
+      p.cal<-apply_pb(x.norm[,-c(1:4)],1,function(snp1){
+        if(is.character(unname(unlist(snp1[1])))){
+          y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
+          y[,1]<-as.integer(y[,1])
+          y[,2]<-as.integer(y[,2])} else {y<-snp1}
+        rs<-rowSums(y)
+        rs[rs==0L]<-NA
+        cv<-sd(unlist(rs),na.rm = TRUE)/mean(unlist(rs),na.rm = TRUE)
+        rr1<-y[,2]/rs
+        snp1het<-y[-which(rr1 == 0 | rr1 == 1 | is.na(rr1)==TRUE),]
+        homalt<-sum(rr1==1,na.rm=TRUE)
+        homref<-sum(rr1==0,na.rm=TRUE)
+        covrefhomo<-sum(y[c(rr1 == 0,na.rm = TRUE),],na.rm = TRUE)
+        covalthomo<-sum(y[c(rr1 == 1,na.rm = TRUE),],na.rm = TRUE)
+        covalt<-sum(y[,2],na.rm = TRUE)
+        covref<-sum(y[,1],na.rm = TRUE)
+        NHet<-nrow(snp1het)
+        if(NHet>3){
+          p.all<-(covalt/(NHet+(2*homalt)))/((covalt/(NHet+(2*homalt)))+(covref/(NHet+(2*homref))))
+          p.sum<-sum(snp1het[,2])/sum(snp1het)
+          ll <-data.frame(p.all,p.sum,mean.a.homo=covalthomo/(2*homalt),mean.r.homo=covrefhomo/(2*homref),mean.a.het=sum(snp1het[,2])/NHet,mean.r.het=sum(snp1het[,1])/NHet,cv=cv)
+        } else {
+          ll<-NA
+        }
+        return(ll)
+      })
+    } else {
+      p.cal<-apply(x.norm[,-c(1:4)],1,function(snp1){
+        if(is.character(unname(unlist(snp1[1])))){
+          y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
+          y[,1]<-as.integer(y[,1])
+          y[,2]<-as.integer(y[,2])} else {y<-snp1}
+        rs<-rowSums(y)
+        rs[rs==0]<-NA
+        cv<-sd(unlist(rs),na.rm = TRUE)/mean(unlist(rs),na.rm = TRUE)
+        rr1<-y[,2]/rs
+        snp1het<-y[-which(rr1 == 0 | rr1 == 1 | is.na(rr1)==TRUE),]
+        homalt<-sum(rr1==1,na.rm=TRUE)
+        homref<-sum(rr1==0,na.rm=TRUE)
+        covrefhomo<-sum(y[c(rr1 == 0,na.rm = TRUE),],na.rm = TRUE)
+        covalthomo<-sum(y[c(rr1 == 1,na.rm = TRUE),],na.rm = TRUE)
+        covalt<-sum(y[,2],na.rm = TRUE)
+        covref<-sum(y[,1],na.rm = TRUE)
+        NHet<-nrow(snp1het)
+        if(NHet>3){
+          p.all<-(covalt/(NHet+(2*homalt)))/((covalt/(NHet+(2*homalt)))+(covref/(NHet+(2*homref))))
+          p.sum<-sum(snp1het[,2])/sum(snp1het)
+          ll <-data.frame(p.all,p.sum,mean.a.homo=covalthomo/(2*homalt),mean.r.homo=covrefhomo/(2*homref),mean.a.het=sum(snp1het[,2])/NHet,mean.r.het=sum(snp1het[,1])/NHet,cv=cv)
+        } else {
+          ll<-NA
+        }
+        return(ll)
+      })
+    }
   }
-  pvals<-do.call(rbind,pvals)
-  pvals<-cbind(X[,1:3],pvals)
-  pvals<-na.omit(pvals)
-  ht<-sig.hets(pvals,plot = FALSE, verbose = verbose)
-  pvals<-data.frame(pvals,eH.pval=ht[,"eH.pval"],eH.delta=ht[,"eH.delta"],cv=na.omit(p.cal[,"cv"]))
-
-  return(pvals)
-}
-
-
-
-allele.info_old<-function(X,x.norm=NULL,method=c("MedR","QN","pca","TMM","TMMex"),logratioTrim = 0.3,sumTrim = 0.05,Weighting = TRUE,Acutoff = -1e+10,plot.allele.cov=TRUE,verbose = TRUE,...){
-  method=match.arg(method)
-  if(is.null(x.norm)){
-    x.norm<-cpm.normal(X,method=method,logratioTrim=logratioTrim,sumTrim = sumTrim,Weighting = Weighting,Acutoff = Acutoff,verbose = verbose)
-  }
-  if(!inherits(x.norm,"list")){x.norm<-list(AD=x.norm)}
-  if(inherits(x.norm,"list")){x.norm<-x.norm$AD}
 
-  if(verbose){
-    message("calculating probability values of alleles")
-    p.cal<-apply_pb(x.norm[,-c(1:4)],1,function(snp1){
-      if(is.character(unname(unlist(snp1[1])))){
-        y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
-        y[,1]<-as.integer(y[,1])
-        y[,2]<-as.integer(y[,2])} else {y<-snp1}
-      rs<-rowSums(y)
-      rs[rs==0L]<-NA
-      cv<-sd(unlist(y),na.rm = TRUE)/mean(unlist(y),na.rm = TRUE)
-      rr1<-y[,2]/rs
-      snp1het<-y[-which(rr1 == 0 | rr1 == 1 | is.na(rr1)==TRUE),]
-      homalt<-sum(rr1==1,na.rm=TRUE)
-      homref<-sum(rr1==0,na.rm=TRUE)
-      covrefhomo<-sum(y[c(rr1 == 0,na.rm = TRUE),],na.rm = TRUE)
-      covalthomo<-sum(y[c(rr1 == 1,na.rm = TRUE),],na.rm = TRUE)
-      covalt<-sum(y[,2],na.rm = TRUE)
-      covref<-sum(y[,1],na.rm = TRUE)
-      NHet<-nrow(snp1het)
-      if(NHet>3){
-        p.all<-(covalt/(NHet+(2*homalt)))/((covalt/(NHet+(2*homalt)))+(covref/(NHet+(2*homref))))
-        p.sum<-sum(snp1het[,2])/sum(snp1het)
-        ll <-data.frame(p.all,p.sum,mean.a.homo=covalthomo/(2*homalt),mean.r.homo=covrefhomo/(2*homref),mean.a.het=sum(snp1het[,2])/NHet,mean.r.het=sum(snp1het[,1])/NHet,cv=cv)
-      } else {
-        ll<-NA
-      }
-      return(ll)
-    })
-  } else {
-    p.cal<-apply(x.norm[,-c(1:4)],1,function(snp1){
-      if(is.character(unname(unlist(snp1[1])))){
-        y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
-        y[,1]<-as.integer(y[,1])
-        y[,2]<-as.integer(y[,2])} else {y<-snp1}
-      rs<-rowSums(y)
-      rs[rs==0]<-NA
-      cv<-sd(unlist(y),na.rm = TRUE)/mean(unlist(y),na.rm = TRUE)
-      rr1<-y[,2]/rs
-      snp1het<-y[-which(rr1 == 0 | rr1 == 1 | is.na(rr1)==TRUE),]
-      homalt<-sum(rr1==1,na.rm=TRUE)
-      homref<-sum(rr1==0,na.rm=TRUE)
-      covrefhomo<-sum(y[c(rr1 == 0,na.rm = TRUE),],na.rm = TRUE)
-      covalthomo<-sum(y[c(rr1 == 1,na.rm = TRUE),],na.rm = TRUE)
-      covalt<-sum(y[,2],na.rm = TRUE)
-      covref<-sum(y[,1],na.rm = TRUE)
-      NHet<-nrow(snp1het)
-      if(NHet>3){
-        p.all<-(covalt/(NHet+(2*homalt)))/((covalt/(NHet+(2*homalt)))+(covref/(NHet+(2*homref))))
-        p.sum<-sum(snp1het[,2])/sum(snp1het)
-        ll <-data.frame(p.all,p.sum,mean.a.homo=covalthomo/(2*homalt),mean.r.homo=covrefhomo/(2*homref),mean.a.het=sum(snp1het[,2])/NHet,mean.r.het=sum(snp1het[,1])/NHet,cv=cv)
-      } else {
-        ll<-NA
-      }
-      return(ll)
-    })
-  }
   if(is.list(p.cal)){
     p.cal<-do.call(rbind,p.cal)
   } else {
@@ -315,16 +239,21 @@ allele.info_old<-function(X,x.norm=NULL,method=c("MedR","QN","pca","TMM","TMMex"
     par(mfrow=c(1,1))
   }
 
-  if(verbose){
-    message("calculating chi-square significance")
-    pvals<-lapply_pb(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
+  if(parallel){
+    pvals<-parLapply(1:nrow(X),get.pvals,df=X,p.cal=p.cal,cl=cl)
+    stopCluster(cl)
   } else {
-    pvals<-lapply(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
+    if(verbose){
+      message("calculating chi-square significance")
+      pvals<-lapply_pb(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
+    } else {
+      pvals<-lapply(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
+    }
   }
   pvals<-do.call(rbind,pvals)
   pvals<-cbind(X[,1:3],pvals)
   pvals<-na.omit(pvals)
-  ht<-sig.hets(pvals,plot = FALSE, verbose = verbose)
+  ht<-sig.hets(pvals,Fis,plot = FALSE, verbose = verbose)
   pvals<-data.frame(pvals,eH.pval=ht[,"eH.pval"],eH.delta=ht[,"eH.delta"],cv=na.omit(p.cal[,"cv"]))
 
   return(pvals)