Skip to content

Commit c416d68

Browse files
committed
parallelization enabled
1 parent 978cadc commit c416d68

20 files changed

+512
-431
lines changed

NAMESPACE

+7-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ export(vcf.stat)
2525
export(vst)
2626
export(vstPermutation)
2727
importFrom(R.utils,gzip)
28-
importFrom(colorspace,rainbow_hcl)
2928
importFrom(colorspace,terrain_hcl)
3029
importFrom(data.table,fread)
3130
importFrom(grDevices,as.raster)
@@ -45,6 +44,13 @@ importFrom(graphics,par)
4544
importFrom(graphics,polygon)
4645
importFrom(graphics,rasterImage)
4746
importFrom(graphics,text)
47+
importFrom(parallel,clusterExport)
48+
importFrom(parallel,detectCores)
49+
importFrom(parallel,makeCluster)
50+
importFrom(parallel,parApply)
51+
importFrom(parallel,parLapply)
52+
importFrom(parallel,parSapply)
53+
importFrom(parallel,stopCluster)
4854
importFrom(qgraph,qgraph)
4955
importFrom(stats,as.dist)
5056
importFrom(stats,chisq.test)

NEWS.md

+3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
# rCNV 1.3.0 (third update)
2+
* parallelization enabled
3+
* dupValidate function revised
4+
* per site Fis added to deviant detection
25
* vstPermutation function added
36
* maf modified to remove multi-allelic sites
47
* FIT correction added

R/TMM_normalz.R

+8-8
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ TMM <- function(obs, ref, logratioTrim=.3, sumTrim=0.05, Weighting=TRUE, Acutoff
5454
}
5555

5656

57-
TMMex <- function(obs,ref, logratioTrim=.3, sumTrim=0.05, Weighting=TRUE, Acutoff=-1e10)
57+
TMMex <- function(obs,ref, logratioTrim=.3, sumTrim=0.05, Weighting=TRUE, Acutoff=-1e10,cl)
5858
# Gordon Smyth
5959
# adopted from edgeR package (Robinson et al. 2010)
6060
{
@@ -132,18 +132,18 @@ TMMex <- function(obs,ref, logratioTrim=.3, sumTrim=0.05, Weighting=TRUE, Acutof
132132

133133
## quantile normalization according to Robinson MD, and Oshlack A (2010)
134134
# qn1
135-
quantile_normalisation <- function(df,het.table=NULL,verbose=verbose){
136-
df_rank <- apply(df,2,rank,ties.method="min")
135+
quantile_normalisation <- function(df,het.table=NULL,verbose=verbose,cl){
136+
df_rank <- parApply(cl=cl,df,2,rank,ties.method="min")
137137
df_sorted <- data.frame(apply(df, 2, sort,na.last=TRUE))
138-
df_mean <- apply(df_sorted, 1, mean,na.rm=TRUE)
138+
df_mean <- parApply(cl=cl,df_sorted, 1, mean,na.rm=TRUE)
139139
if(!is.null(het.table)){
140140
het.table<-het.table[,-c(1:4)]
141141
}
142142
if(verbose){
143143
message("\ncalculating normalized depth")
144-
df_final <- lapply_pb(1:ncol(df_rank), index_to_mean, my_mean=df_mean,indx=df_rank,al=het.table)
144+
df_final <- parLapply(cl=cl,1:ncol(df_rank), index_to_mean, my_mean=df_mean,indx=df_rank,al=het.table)
145145
} else {
146-
df_final <- lapply(1:ncol(df_rank), index_to_mean, my_mean=df_mean,indx=df_rank,al=het.table)
146+
df_final <- parLapply(cl=cl,1:ncol(df_rank), index_to_mean, my_mean=df_mean,indx=df_rank,al=het.table)
147147
}
148148
return(df_final)
149149
}
@@ -309,7 +309,7 @@ cpm.normal <- function(het.table, method=c("MedR","QN","pca","TMM","TMMex"),
309309
message("calculating normalization factor")
310310
pb <- txtProgressBar(min = 0, max = pb_Total, width = 50, style = 3)
311311
}
312-
# tdep<-rCNV:::apply_pb(het.table[,-c(1:4)],2,function(tmp){
312+
# tdep<-parApply(cl=cl,(het.table[,-c(1:4)],2,function(tmp){
313313
y1 <- y2 <- matrix(NA_integer_, dm[1], pb_Total)
314314
for(i in seq_len(pb_Total)){
315315
if (verbose) setTxtProgressBar(pb, i)
@@ -353,7 +353,7 @@ cpm.normal <- function(het.table, method=c("MedR","QN","pca","TMM","TMMex"),
353353
out <- paste0(y1, ",", y2)
354354
attributes(out) <- attributes(tdep)
355355
} else if(method=="QN"){
356-
out <- do.call(cbind,quantile_normalisation(tdep,het.table,verbose=verbose))
356+
out <- do.call(cbind,quantile_normalisation(tdep,het.table,verbose=verbose,cl=cl))
357357
} else if(method=="pca"){
358358
if(verbose){message("\ncalculating normalized depth")}
359359
new.mat <- t(tdep) ### check the direction to confirm if this step need to be done

R/bias.det.R

+76-147
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ get.pvals<-function(x,df,p.cal){
6161
#' \code{hetTgen} (non-normalized)
6262
#' @param x.norm a data frame of normalized allele coverage, output of
6363
#' \code{cpm.normal}. If not provided, calculated using \code{X}.
64+
#' @param Fis numeric. Inbreeding coefficient calculated using \code{h.zygosity()} function
6465
#' @param method character. method to be used for normalization
6566
#' (see \code{cpm.normal} details). Default \code{TMM}
6667
#' @param logratioTrim numeric. percentage value (0 - 1) of variation to be
@@ -73,9 +74,11 @@ get.pvals<-function(x,df,p.cal){
7374
#' @param plot.allele.cov logical, plot comparative plots of allele depth
7475
#' coverage in homozygotes and heterozygotes
7576
#' @param verbose logical, whether to print progress
77+
#' @param parallel logical. whether to parallelize the process
7678
#' @param \dots further arguments to be passed to \code{plot}
7779
#'
7880
#' @importFrom stats pchisq pnorm na.omit
81+
#' @importFrom parallel parApply detectCores parLapply stopCluster
7982
#'
8083
#' @details
8184
#' Allele information generated here are individual SNP based and presents the
@@ -105,43 +108,19 @@ get.pvals<-function(x,df,p.cal){
105108
#' AI<-allele.info(ADtable,x.norm=ADnorm)}
106109
#'
107110
#' @export
108-
allele.info<-function(X,x.norm=NULL,method=c("MedR","QN","pca","TMM","TMMex"),logratioTrim = 0.3,sumTrim = 0.05,Weighting = TRUE,Acutoff = -1e+10,plot.allele.cov=TRUE,verbose = TRUE,...){
111+
allele.info<-function(X,x.norm=NULL,Fis,method=c("MedR","QN","pca","TMM","TMMex"),logratioTrim = 0.3,sumTrim = 0.05,Weighting = TRUE,Acutoff = -1e+10,plot.allele.cov=TRUE,verbose = TRUE,parallel=FALSE,...){
109112
method=match.arg(method)
110113
if(is.null(x.norm)){
111114
x.norm<-cpm.normal(X,method=method,logratioTrim=logratioTrim,sumTrim = sumTrim,Weighting = Weighting,Acutoff = Acutoff,verbose = verbose)
112115
}
113116
if(!inherits(x.norm,"list")){x.norm<-list(AD=x.norm)}
114117
if(inherits(x.norm,"list")){x.norm<-x.norm$AD}
115-
if(verbose){
116-
message("calculating probability values of alleles")
117-
p.cal<-apply_pb(x.norm[,-c(1:4)],1,function(snp1){
118-
if(is.character(unname(unlist(snp1[1])))){
119-
y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
120-
y[,1]<-as.integer(y[,1])
121-
y[,2]<-as.integer(y[,2])} else {y<-snp1}
122-
rs<-rowSums(y)
123-
rs[rs==0L]<-NA
124-
cv<-sd(unlist(rs),na.rm = TRUE)/mean(unlist(rs),na.rm = TRUE)
125-
rr1<-y[,2]/rs
126-
snp1het<-y[-which(rr1 == 0 | rr1 == 1 | is.na(rr1)==TRUE),]
127-
homalt<-sum(rr1==1,na.rm=TRUE)
128-
homref<-sum(rr1==0,na.rm=TRUE)
129-
covrefhomo<-sum(y[c(rr1 == 0,na.rm = TRUE),],na.rm = TRUE)
130-
covalthomo<-sum(y[c(rr1 == 1,na.rm = TRUE),],na.rm = TRUE)
131-
covalt<-sum(y[,2],na.rm = TRUE)
132-
covref<-sum(y[,1],na.rm = TRUE)
133-
NHet<-nrow(snp1het)
134-
if(NHet>3){
135-
p.all<-(covalt/(NHet+(2*homalt)))/((covalt/(NHet+(2*homalt)))+(covref/(NHet+(2*homref))))
136-
p.sum<-sum(snp1het[,2])/sum(snp1het)
137-
ll <-data.frame(p.all,p.sum,mean.a.homo=covalthomo/(2*homalt),mean.r.homo=covrefhomo/(2*homref),mean.a.het=sum(snp1het[,2])/NHet,mean.r.het=sum(snp1het[,1])/NHet,cv=cv)
138-
} else {
139-
ll<-NA
140-
}
141-
return(ll)
142-
})
143-
} else {
144-
p.cal<-apply(x.norm[,-c(1:4)],1,function(snp1){
118+
119+
if(parallel){
120+
numCores<-detectCores()-1
121+
cl<-makeCluster(cl)
122+
123+
p.cal<-parApply(cl=cl,x.norm[,-c(1:4)],1,function(snp1){
145124
if(is.character(unname(unlist(snp1[1])))){
146125
y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
147126
y[,1]<-as.integer(y[,1])
@@ -167,121 +146,66 @@ allele.info<-function(X,x.norm=NULL,method=c("MedR","QN","pca","TMM","TMMex"),lo
167146
}
168147
return(ll)
169148
})
170-
}
171-
if(is.list(p.cal)){
172-
p.cal<-do.call(rbind,p.cal)
173-
} else {
174-
p.cal<-t(p.cal)
175-
}
176-
p.cal[p.cal=="NaN"]<-0
177-
if(plot.allele.cov){
178-
p.list<-list(...)
179-
if(is.null(p.list$pch)) p.list$pch=19
180-
if(is.null(p.list$cex)) p.list$cex=0.6
181-
if(is.null(p.list$col)) p.list$col<-makeTransparent(colorspace::heat_hcl(12,h=c(0,-100),c=c(40,80),l=c(75,40),power=1)[11])
182-
if(is.null(p.list$lcol)) p.list$lcol="tomato"
183-
par(mfrow=c(2,2))
184-
par(mar=c(4,5,2,2))
185-
plot(p.cal$mean.a.homo,p.cal$mean.r.homo,pch=p.list$pch,cex=p.list$cex,
186-
col=p.list$col,xlab="Mean coverage of \nalt. allele in homozygotes",
187-
ylab="Mean coverage of \n ref. allele in homozygotes",cex.lab=0.8)
188-
abline(0,1,col=p.list$lcol)
189-
plot(p.cal$mean.a.het,p.cal$mean.r.het,pch=p.list$pch,cex=p.list$cex,
190-
col=p.list$col,xlab="Mean coverage of \nalt. allele in heterozygotes",
191-
ylab="Mean coverage of \n ref. allele in heterozygotes",cex.lab=0.8)
192-
abline(0,1,col=p.list$lcol)
193-
plot(p.cal$mean.a.het,p.cal$mean.a.homo,pch=p.list$pch,cex=p.list$cex,
194-
col=p.list$col,xlab="Mean coverage of \nalt. allele in heterozygotes",
195-
ylab="Mean coverage of \n alt. allele in homozygotes",cex.lab=0.8)
196-
abline(0,1,col=p.list$lcol)
197-
plot(p.cal$mean.r.het,p.cal$mean.r.homo,pch=p.list$pch,cex=p.list$cex,
198-
col=p.list$col,xlab="Mean coverage of \nref. allele in heterozygotes",
199-
ylab="Mean coverage of \n ref. allele in homozygotes",cex.lab=0.8)
200-
abline(0,1,col=p.list$lcol)
201-
par(mfrow=c(1,1))
202-
}
203149

204-
if(verbose){
205-
message("calculating chi-square significance")
206-
pvals<-lapply_pb(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
207150
} else {
208-
pvals<-lapply(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
151+
if(verbose){
152+
message("calculating probability values of alleles")
153+
p.cal<-apply_pb(x.norm[,-c(1:4)],1,function(snp1){
154+
if(is.character(unname(unlist(snp1[1])))){
155+
y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
156+
y[,1]<-as.integer(y[,1])
157+
y[,2]<-as.integer(y[,2])} else {y<-snp1}
158+
rs<-rowSums(y)
159+
rs[rs==0L]<-NA
160+
cv<-sd(unlist(rs),na.rm = TRUE)/mean(unlist(rs),na.rm = TRUE)
161+
rr1<-y[,2]/rs
162+
snp1het<-y[-which(rr1 == 0 | rr1 == 1 | is.na(rr1)==TRUE),]
163+
homalt<-sum(rr1==1,na.rm=TRUE)
164+
homref<-sum(rr1==0,na.rm=TRUE)
165+
covrefhomo<-sum(y[c(rr1 == 0,na.rm = TRUE),],na.rm = TRUE)
166+
covalthomo<-sum(y[c(rr1 == 1,na.rm = TRUE),],na.rm = TRUE)
167+
covalt<-sum(y[,2],na.rm = TRUE)
168+
covref<-sum(y[,1],na.rm = TRUE)
169+
NHet<-nrow(snp1het)
170+
if(NHet>3){
171+
p.all<-(covalt/(NHet+(2*homalt)))/((covalt/(NHet+(2*homalt)))+(covref/(NHet+(2*homref))))
172+
p.sum<-sum(snp1het[,2])/sum(snp1het)
173+
ll <-data.frame(p.all,p.sum,mean.a.homo=covalthomo/(2*homalt),mean.r.homo=covrefhomo/(2*homref),mean.a.het=sum(snp1het[,2])/NHet,mean.r.het=sum(snp1het[,1])/NHet,cv=cv)
174+
} else {
175+
ll<-NA
176+
}
177+
return(ll)
178+
})
179+
} else {
180+
p.cal<-apply(x.norm[,-c(1:4)],1,function(snp1){
181+
if(is.character(unname(unlist(snp1[1])))){
182+
y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
183+
y[,1]<-as.integer(y[,1])
184+
y[,2]<-as.integer(y[,2])} else {y<-snp1}
185+
rs<-rowSums(y)
186+
rs[rs==0]<-NA
187+
cv<-sd(unlist(rs),na.rm = TRUE)/mean(unlist(rs),na.rm = TRUE)
188+
rr1<-y[,2]/rs
189+
snp1het<-y[-which(rr1 == 0 | rr1 == 1 | is.na(rr1)==TRUE),]
190+
homalt<-sum(rr1==1,na.rm=TRUE)
191+
homref<-sum(rr1==0,na.rm=TRUE)
192+
covrefhomo<-sum(y[c(rr1 == 0,na.rm = TRUE),],na.rm = TRUE)
193+
covalthomo<-sum(y[c(rr1 == 1,na.rm = TRUE),],na.rm = TRUE)
194+
covalt<-sum(y[,2],na.rm = TRUE)
195+
covref<-sum(y[,1],na.rm = TRUE)
196+
NHet<-nrow(snp1het)
197+
if(NHet>3){
198+
p.all<-(covalt/(NHet+(2*homalt)))/((covalt/(NHet+(2*homalt)))+(covref/(NHet+(2*homref))))
199+
p.sum<-sum(snp1het[,2])/sum(snp1het)
200+
ll <-data.frame(p.all,p.sum,mean.a.homo=covalthomo/(2*homalt),mean.r.homo=covrefhomo/(2*homref),mean.a.het=sum(snp1het[,2])/NHet,mean.r.het=sum(snp1het[,1])/NHet,cv=cv)
201+
} else {
202+
ll<-NA
203+
}
204+
return(ll)
205+
})
206+
}
209207
}
210-
pvals<-do.call(rbind,pvals)
211-
pvals<-cbind(X[,1:3],pvals)
212-
pvals<-na.omit(pvals)
213-
ht<-sig.hets(pvals,plot = FALSE, verbose = verbose)
214-
pvals<-data.frame(pvals,eH.pval=ht[,"eH.pval"],eH.delta=ht[,"eH.delta"],cv=na.omit(p.cal[,"cv"]))
215-
216-
return(pvals)
217-
}
218-
219-
220-
221-
allele.info_old<-function(X,x.norm=NULL,method=c("MedR","QN","pca","TMM","TMMex"),logratioTrim = 0.3,sumTrim = 0.05,Weighting = TRUE,Acutoff = -1e+10,plot.allele.cov=TRUE,verbose = TRUE,...){
222-
method=match.arg(method)
223-
if(is.null(x.norm)){
224-
x.norm<-cpm.normal(X,method=method,logratioTrim=logratioTrim,sumTrim = sumTrim,Weighting = Weighting,Acutoff = Acutoff,verbose = verbose)
225-
}
226-
if(!inherits(x.norm,"list")){x.norm<-list(AD=x.norm)}
227-
if(inherits(x.norm,"list")){x.norm<-x.norm$AD}
228208

229-
if(verbose){
230-
message("calculating probability values of alleles")
231-
p.cal<-apply_pb(x.norm[,-c(1:4)],1,function(snp1){
232-
if(is.character(unname(unlist(snp1[1])))){
233-
y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
234-
y[,1]<-as.integer(y[,1])
235-
y[,2]<-as.integer(y[,2])} else {y<-snp1}
236-
rs<-rowSums(y)
237-
rs[rs==0L]<-NA
238-
cv<-sd(unlist(y),na.rm = TRUE)/mean(unlist(y),na.rm = TRUE)
239-
rr1<-y[,2]/rs
240-
snp1het<-y[-which(rr1 == 0 | rr1 == 1 | is.na(rr1)==TRUE),]
241-
homalt<-sum(rr1==1,na.rm=TRUE)
242-
homref<-sum(rr1==0,na.rm=TRUE)
243-
covrefhomo<-sum(y[c(rr1 == 0,na.rm = TRUE),],na.rm = TRUE)
244-
covalthomo<-sum(y[c(rr1 == 1,na.rm = TRUE),],na.rm = TRUE)
245-
covalt<-sum(y[,2],na.rm = TRUE)
246-
covref<-sum(y[,1],na.rm = TRUE)
247-
NHet<-nrow(snp1het)
248-
if(NHet>3){
249-
p.all<-(covalt/(NHet+(2*homalt)))/((covalt/(NHet+(2*homalt)))+(covref/(NHet+(2*homref))))
250-
p.sum<-sum(snp1het[,2])/sum(snp1het)
251-
ll <-data.frame(p.all,p.sum,mean.a.homo=covalthomo/(2*homalt),mean.r.homo=covrefhomo/(2*homref),mean.a.het=sum(snp1het[,2])/NHet,mean.r.het=sum(snp1het[,1])/NHet,cv=cv)
252-
} else {
253-
ll<-NA
254-
}
255-
return(ll)
256-
})
257-
} else {
258-
p.cal<-apply(x.norm[,-c(1:4)],1,function(snp1){
259-
if(is.character(unname(unlist(snp1[1])))){
260-
y<-data.frame(stringr::str_split_fixed(snp1,",",n=2L))
261-
y[,1]<-as.integer(y[,1])
262-
y[,2]<-as.integer(y[,2])} else {y<-snp1}
263-
rs<-rowSums(y)
264-
rs[rs==0]<-NA
265-
cv<-sd(unlist(y),na.rm = TRUE)/mean(unlist(y),na.rm = TRUE)
266-
rr1<-y[,2]/rs
267-
snp1het<-y[-which(rr1 == 0 | rr1 == 1 | is.na(rr1)==TRUE),]
268-
homalt<-sum(rr1==1,na.rm=TRUE)
269-
homref<-sum(rr1==0,na.rm=TRUE)
270-
covrefhomo<-sum(y[c(rr1 == 0,na.rm = TRUE),],na.rm = TRUE)
271-
covalthomo<-sum(y[c(rr1 == 1,na.rm = TRUE),],na.rm = TRUE)
272-
covalt<-sum(y[,2],na.rm = TRUE)
273-
covref<-sum(y[,1],na.rm = TRUE)
274-
NHet<-nrow(snp1het)
275-
if(NHet>3){
276-
p.all<-(covalt/(NHet+(2*homalt)))/((covalt/(NHet+(2*homalt)))+(covref/(NHet+(2*homref))))
277-
p.sum<-sum(snp1het[,2])/sum(snp1het)
278-
ll <-data.frame(p.all,p.sum,mean.a.homo=covalthomo/(2*homalt),mean.r.homo=covrefhomo/(2*homref),mean.a.het=sum(snp1het[,2])/NHet,mean.r.het=sum(snp1het[,1])/NHet,cv=cv)
279-
} else {
280-
ll<-NA
281-
}
282-
return(ll)
283-
})
284-
}
285209
if(is.list(p.cal)){
286210
p.cal<-do.call(rbind,p.cal)
287211
} else {
@@ -315,16 +239,21 @@ allele.info_old<-function(X,x.norm=NULL,method=c("MedR","QN","pca","TMM","TMMex"
315239
par(mfrow=c(1,1))
316240
}
317241

318-
if(verbose){
319-
message("calculating chi-square significance")
320-
pvals<-lapply_pb(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
242+
if(parallel){
243+
pvals<-parLapply(1:nrow(X),get.pvals,df=X,p.cal=p.cal,cl=cl)
244+
stopCluster(cl)
321245
} else {
322-
pvals<-lapply(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
246+
if(verbose){
247+
message("calculating chi-square significance")
248+
pvals<-lapply_pb(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
249+
} else {
250+
pvals<-lapply(1:nrow(X),get.pvals,df=X,p.cal=p.cal)
251+
}
323252
}
324253
pvals<-do.call(rbind,pvals)
325254
pvals<-cbind(X[,1:3],pvals)
326255
pvals<-na.omit(pvals)
327-
ht<-sig.hets(pvals,plot = FALSE, verbose = verbose)
256+
ht<-sig.hets(pvals,Fis,plot = FALSE, verbose = verbose)
328257
pvals<-data.frame(pvals,eH.pval=ht[,"eH.pval"],eH.delta=ht[,"eH.delta"],cv=na.omit(p.cal[,"cv"]))
329258

330259
return(pvals)

0 commit comments

Comments
 (0)