@@ -61,6 +61,7 @@ get.pvals<-function(x,df,p.cal){
61
61
# ' \code{hetTgen} (non-normalized)
62
62
# ' @param x.norm a data frame of normalized allele coverage, output of
63
63
# ' \code{cpm.normal}. If not provided, calculated using \code{X}.
64
+ # ' @param Fis numeric. Inbreeding coefficient calculated using \code{h.zygosity()} function
64
65
# ' @param method character. method to be used for normalization
65
66
# ' (see \code{cpm.normal} details). Default \code{TMM}
66
67
# ' @param logratioTrim numeric. percentage value (0 - 1) of variation to be
@@ -73,9 +74,11 @@ get.pvals<-function(x,df,p.cal){
73
74
# ' @param plot.allele.cov logical, plot comparative plots of allele depth
74
75
# ' coverage in homozygotes and heterozygotes
75
76
# ' @param verbose logical, whether to print progress
77
+ # ' @param parallel logical. whether to parallelize the process
76
78
# ' @param \dots further arguments to be passed to \code{plot}
77
79
# '
78
80
# ' @importFrom stats pchisq pnorm na.omit
81
+ # ' @importFrom parallel parApply detectCores parLapply stopCluster
79
82
# '
80
83
# ' @details
81
84
# ' Allele information generated here are individual SNP based and presents the
@@ -105,43 +108,19 @@ get.pvals<-function(x,df,p.cal){
105
108
# ' AI<-allele.info(ADtable,x.norm=ADnorm)}
106
109
# '
107
110
# ' @export
108
- allele.info <- function (X ,x.norm = NULL ,method = c(" MedR" ," QN" ," pca" ," TMM" ," TMMex" ),logratioTrim = 0.3 ,sumTrim = 0.05 ,Weighting = TRUE ,Acutoff = - 1e+10 ,plot.allele.cov = TRUE ,verbose = TRUE ,... ){
111
+ allele.info <- function (X ,x.norm = NULL ,Fis , method = c(" MedR" ," QN" ," pca" ," TMM" ," TMMex" ),logratioTrim = 0.3 ,sumTrim = 0.05 ,Weighting = TRUE ,Acutoff = - 1e+10 ,plot.allele.cov = TRUE ,verbose = TRUE , parallel = FALSE ,... ){
109
112
method = match.arg(method )
110
113
if (is.null(x.norm )){
111
114
x.norm <- cpm.normal(X ,method = method ,logratioTrim = logratioTrim ,sumTrim = sumTrim ,Weighting = Weighting ,Acutoff = Acutoff ,verbose = verbose )
112
115
}
113
116
if (! inherits(x.norm ," list" )){x.norm <- list (AD = x.norm )}
114
117
if (inherits(x.norm ," list" )){x.norm <- x.norm $ AD }
115
- if (verbose ){
116
- message(" calculating probability values of alleles" )
117
- p.cal <- apply_pb(x.norm [,- c(1 : 4 )],1 ,function (snp1 ){
118
- if (is.character(unname(unlist(snp1 [1 ])))){
119
- y <- data.frame (stringr :: str_split_fixed(snp1 ," ," ,n = 2L ))
120
- y [,1 ]<- as.integer(y [,1 ])
121
- y [,2 ]<- as.integer(y [,2 ])} else {y <- snp1 }
122
- rs <- rowSums(y )
123
- rs [rs == 0L ]<- NA
124
- cv <- sd(unlist(rs ),na.rm = TRUE )/ mean(unlist(rs ),na.rm = TRUE )
125
- rr1 <- y [,2 ]/ rs
126
- snp1het <- y [- which(rr1 == 0 | rr1 == 1 | is.na(rr1 )== TRUE ),]
127
- homalt <- sum(rr1 == 1 ,na.rm = TRUE )
128
- homref <- sum(rr1 == 0 ,na.rm = TRUE )
129
- covrefhomo <- sum(y [c(rr1 == 0 ,na.rm = TRUE ),],na.rm = TRUE )
130
- covalthomo <- sum(y [c(rr1 == 1 ,na.rm = TRUE ),],na.rm = TRUE )
131
- covalt <- sum(y [,2 ],na.rm = TRUE )
132
- covref <- sum(y [,1 ],na.rm = TRUE )
133
- NHet <- nrow(snp1het )
134
- if (NHet > 3 ){
135
- p.all <- (covalt / (NHet + (2 * homalt )))/ ((covalt / (NHet + (2 * homalt )))+ (covref / (NHet + (2 * homref ))))
136
- p.sum <- sum(snp1het [,2 ])/ sum(snp1het )
137
- ll <- data.frame (p.all ,p.sum ,mean.a.homo = covalthomo / (2 * homalt ),mean.r.homo = covrefhomo / (2 * homref ),mean.a.het = sum(snp1het [,2 ])/ NHet ,mean.r.het = sum(snp1het [,1 ])/ NHet ,cv = cv )
138
- } else {
139
- ll <- NA
140
- }
141
- return (ll )
142
- })
143
- } else {
144
- p.cal <- apply(x.norm [,- c(1 : 4 )],1 ,function (snp1 ){
118
+
119
+ if (parallel ){
120
+ numCores <- detectCores()- 1
121
+ cl <- makeCluster(cl )
122
+
123
+ p.cal <- parApply(cl = cl ,x.norm [,- c(1 : 4 )],1 ,function (snp1 ){
145
124
if (is.character(unname(unlist(snp1 [1 ])))){
146
125
y <- data.frame (stringr :: str_split_fixed(snp1 ," ," ,n = 2L ))
147
126
y [,1 ]<- as.integer(y [,1 ])
@@ -167,121 +146,66 @@ allele.info<-function(X,x.norm=NULL,method=c("MedR","QN","pca","TMM","TMMex"),lo
167
146
}
168
147
return (ll )
169
148
})
170
- }
171
- if (is.list(p.cal )){
172
- p.cal <- do.call(rbind ,p.cal )
173
- } else {
174
- p.cal <- t(p.cal )
175
- }
176
- p.cal [p.cal == " NaN" ]<- 0
177
- if (plot.allele.cov ){
178
- p.list <- list (... )
179
- if (is.null(p.list $ pch )) p.list $ pch = 19
180
- if (is.null(p.list $ cex )) p.list $ cex = 0.6
181
- if (is.null(p.list $ col )) p.list $ col <- makeTransparent(colorspace :: heat_hcl(12 ,h = c(0 ,- 100 ),c = c(40 ,80 ),l = c(75 ,40 ),power = 1 )[11 ])
182
- if (is.null(p.list $ lcol )) p.list $ lcol = " tomato"
183
- par(mfrow = c(2 ,2 ))
184
- par(mar = c(4 ,5 ,2 ,2 ))
185
- plot(p.cal $ mean.a.homo ,p.cal $ mean.r.homo ,pch = p.list $ pch ,cex = p.list $ cex ,
186
- col = p.list $ col ,xlab = " Mean coverage of \n alt. allele in homozygotes" ,
187
- ylab = " Mean coverage of \n ref. allele in homozygotes" ,cex.lab = 0.8 )
188
- abline(0 ,1 ,col = p.list $ lcol )
189
- plot(p.cal $ mean.a.het ,p.cal $ mean.r.het ,pch = p.list $ pch ,cex = p.list $ cex ,
190
- col = p.list $ col ,xlab = " Mean coverage of \n alt. allele in heterozygotes" ,
191
- ylab = " Mean coverage of \n ref. allele in heterozygotes" ,cex.lab = 0.8 )
192
- abline(0 ,1 ,col = p.list $ lcol )
193
- plot(p.cal $ mean.a.het ,p.cal $ mean.a.homo ,pch = p.list $ pch ,cex = p.list $ cex ,
194
- col = p.list $ col ,xlab = " Mean coverage of \n alt. allele in heterozygotes" ,
195
- ylab = " Mean coverage of \n alt. allele in homozygotes" ,cex.lab = 0.8 )
196
- abline(0 ,1 ,col = p.list $ lcol )
197
- plot(p.cal $ mean.r.het ,p.cal $ mean.r.homo ,pch = p.list $ pch ,cex = p.list $ cex ,
198
- col = p.list $ col ,xlab = " Mean coverage of \n ref. allele in heterozygotes" ,
199
- ylab = " Mean coverage of \n ref. allele in homozygotes" ,cex.lab = 0.8 )
200
- abline(0 ,1 ,col = p.list $ lcol )
201
- par(mfrow = c(1 ,1 ))
202
- }
203
149
204
- if (verbose ){
205
- message(" calculating chi-square significance" )
206
- pvals <- lapply_pb(1 : nrow(X ),get.pvals ,df = X ,p.cal = p.cal )
207
150
} else {
208
- pvals <- lapply(1 : nrow(X ),get.pvals ,df = X ,p.cal = p.cal )
151
+ if (verbose ){
152
+ message(" calculating probability values of alleles" )
153
+ p.cal <- apply_pb(x.norm [,- c(1 : 4 )],1 ,function (snp1 ){
154
+ if (is.character(unname(unlist(snp1 [1 ])))){
155
+ y <- data.frame (stringr :: str_split_fixed(snp1 ," ," ,n = 2L ))
156
+ y [,1 ]<- as.integer(y [,1 ])
157
+ y [,2 ]<- as.integer(y [,2 ])} else {y <- snp1 }
158
+ rs <- rowSums(y )
159
+ rs [rs == 0L ]<- NA
160
+ cv <- sd(unlist(rs ),na.rm = TRUE )/ mean(unlist(rs ),na.rm = TRUE )
161
+ rr1 <- y [,2 ]/ rs
162
+ snp1het <- y [- which(rr1 == 0 | rr1 == 1 | is.na(rr1 )== TRUE ),]
163
+ homalt <- sum(rr1 == 1 ,na.rm = TRUE )
164
+ homref <- sum(rr1 == 0 ,na.rm = TRUE )
165
+ covrefhomo <- sum(y [c(rr1 == 0 ,na.rm = TRUE ),],na.rm = TRUE )
166
+ covalthomo <- sum(y [c(rr1 == 1 ,na.rm = TRUE ),],na.rm = TRUE )
167
+ covalt <- sum(y [,2 ],na.rm = TRUE )
168
+ covref <- sum(y [,1 ],na.rm = TRUE )
169
+ NHet <- nrow(snp1het )
170
+ if (NHet > 3 ){
171
+ p.all <- (covalt / (NHet + (2 * homalt )))/ ((covalt / (NHet + (2 * homalt )))+ (covref / (NHet + (2 * homref ))))
172
+ p.sum <- sum(snp1het [,2 ])/ sum(snp1het )
173
+ ll <- data.frame (p.all ,p.sum ,mean.a.homo = covalthomo / (2 * homalt ),mean.r.homo = covrefhomo / (2 * homref ),mean.a.het = sum(snp1het [,2 ])/ NHet ,mean.r.het = sum(snp1het [,1 ])/ NHet ,cv = cv )
174
+ } else {
175
+ ll <- NA
176
+ }
177
+ return (ll )
178
+ })
179
+ } else {
180
+ p.cal <- apply(x.norm [,- c(1 : 4 )],1 ,function (snp1 ){
181
+ if (is.character(unname(unlist(snp1 [1 ])))){
182
+ y <- data.frame (stringr :: str_split_fixed(snp1 ," ," ,n = 2L ))
183
+ y [,1 ]<- as.integer(y [,1 ])
184
+ y [,2 ]<- as.integer(y [,2 ])} else {y <- snp1 }
185
+ rs <- rowSums(y )
186
+ rs [rs == 0 ]<- NA
187
+ cv <- sd(unlist(rs ),na.rm = TRUE )/ mean(unlist(rs ),na.rm = TRUE )
188
+ rr1 <- y [,2 ]/ rs
189
+ snp1het <- y [- which(rr1 == 0 | rr1 == 1 | is.na(rr1 )== TRUE ),]
190
+ homalt <- sum(rr1 == 1 ,na.rm = TRUE )
191
+ homref <- sum(rr1 == 0 ,na.rm = TRUE )
192
+ covrefhomo <- sum(y [c(rr1 == 0 ,na.rm = TRUE ),],na.rm = TRUE )
193
+ covalthomo <- sum(y [c(rr1 == 1 ,na.rm = TRUE ),],na.rm = TRUE )
194
+ covalt <- sum(y [,2 ],na.rm = TRUE )
195
+ covref <- sum(y [,1 ],na.rm = TRUE )
196
+ NHet <- nrow(snp1het )
197
+ if (NHet > 3 ){
198
+ p.all <- (covalt / (NHet + (2 * homalt )))/ ((covalt / (NHet + (2 * homalt )))+ (covref / (NHet + (2 * homref ))))
199
+ p.sum <- sum(snp1het [,2 ])/ sum(snp1het )
200
+ ll <- data.frame (p.all ,p.sum ,mean.a.homo = covalthomo / (2 * homalt ),mean.r.homo = covrefhomo / (2 * homref ),mean.a.het = sum(snp1het [,2 ])/ NHet ,mean.r.het = sum(snp1het [,1 ])/ NHet ,cv = cv )
201
+ } else {
202
+ ll <- NA
203
+ }
204
+ return (ll )
205
+ })
206
+ }
209
207
}
210
- pvals <- do.call(rbind ,pvals )
211
- pvals <- cbind(X [,1 : 3 ],pvals )
212
- pvals <- na.omit(pvals )
213
- ht <- sig.hets(pvals ,plot = FALSE , verbose = verbose )
214
- pvals <- data.frame (pvals ,eH.pval = ht [," eH.pval" ],eH.delta = ht [," eH.delta" ],cv = na.omit(p.cal [," cv" ]))
215
-
216
- return (pvals )
217
- }
218
-
219
-
220
-
221
- allele.info_old <- function (X ,x.norm = NULL ,method = c(" MedR" ," QN" ," pca" ," TMM" ," TMMex" ),logratioTrim = 0.3 ,sumTrim = 0.05 ,Weighting = TRUE ,Acutoff = - 1e+10 ,plot.allele.cov = TRUE ,verbose = TRUE ,... ){
222
- method = match.arg(method )
223
- if (is.null(x.norm )){
224
- x.norm <- cpm.normal(X ,method = method ,logratioTrim = logratioTrim ,sumTrim = sumTrim ,Weighting = Weighting ,Acutoff = Acutoff ,verbose = verbose )
225
- }
226
- if (! inherits(x.norm ," list" )){x.norm <- list (AD = x.norm )}
227
- if (inherits(x.norm ," list" )){x.norm <- x.norm $ AD }
228
208
229
- if (verbose ){
230
- message(" calculating probability values of alleles" )
231
- p.cal <- apply_pb(x.norm [,- c(1 : 4 )],1 ,function (snp1 ){
232
- if (is.character(unname(unlist(snp1 [1 ])))){
233
- y <- data.frame (stringr :: str_split_fixed(snp1 ," ," ,n = 2L ))
234
- y [,1 ]<- as.integer(y [,1 ])
235
- y [,2 ]<- as.integer(y [,2 ])} else {y <- snp1 }
236
- rs <- rowSums(y )
237
- rs [rs == 0L ]<- NA
238
- cv <- sd(unlist(y ),na.rm = TRUE )/ mean(unlist(y ),na.rm = TRUE )
239
- rr1 <- y [,2 ]/ rs
240
- snp1het <- y [- which(rr1 == 0 | rr1 == 1 | is.na(rr1 )== TRUE ),]
241
- homalt <- sum(rr1 == 1 ,na.rm = TRUE )
242
- homref <- sum(rr1 == 0 ,na.rm = TRUE )
243
- covrefhomo <- sum(y [c(rr1 == 0 ,na.rm = TRUE ),],na.rm = TRUE )
244
- covalthomo <- sum(y [c(rr1 == 1 ,na.rm = TRUE ),],na.rm = TRUE )
245
- covalt <- sum(y [,2 ],na.rm = TRUE )
246
- covref <- sum(y [,1 ],na.rm = TRUE )
247
- NHet <- nrow(snp1het )
248
- if (NHet > 3 ){
249
- p.all <- (covalt / (NHet + (2 * homalt )))/ ((covalt / (NHet + (2 * homalt )))+ (covref / (NHet + (2 * homref ))))
250
- p.sum <- sum(snp1het [,2 ])/ sum(snp1het )
251
- ll <- data.frame (p.all ,p.sum ,mean.a.homo = covalthomo / (2 * homalt ),mean.r.homo = covrefhomo / (2 * homref ),mean.a.het = sum(snp1het [,2 ])/ NHet ,mean.r.het = sum(snp1het [,1 ])/ NHet ,cv = cv )
252
- } else {
253
- ll <- NA
254
- }
255
- return (ll )
256
- })
257
- } else {
258
- p.cal <- apply(x.norm [,- c(1 : 4 )],1 ,function (snp1 ){
259
- if (is.character(unname(unlist(snp1 [1 ])))){
260
- y <- data.frame (stringr :: str_split_fixed(snp1 ," ," ,n = 2L ))
261
- y [,1 ]<- as.integer(y [,1 ])
262
- y [,2 ]<- as.integer(y [,2 ])} else {y <- snp1 }
263
- rs <- rowSums(y )
264
- rs [rs == 0 ]<- NA
265
- cv <- sd(unlist(y ),na.rm = TRUE )/ mean(unlist(y ),na.rm = TRUE )
266
- rr1 <- y [,2 ]/ rs
267
- snp1het <- y [- which(rr1 == 0 | rr1 == 1 | is.na(rr1 )== TRUE ),]
268
- homalt <- sum(rr1 == 1 ,na.rm = TRUE )
269
- homref <- sum(rr1 == 0 ,na.rm = TRUE )
270
- covrefhomo <- sum(y [c(rr1 == 0 ,na.rm = TRUE ),],na.rm = TRUE )
271
- covalthomo <- sum(y [c(rr1 == 1 ,na.rm = TRUE ),],na.rm = TRUE )
272
- covalt <- sum(y [,2 ],na.rm = TRUE )
273
- covref <- sum(y [,1 ],na.rm = TRUE )
274
- NHet <- nrow(snp1het )
275
- if (NHet > 3 ){
276
- p.all <- (covalt / (NHet + (2 * homalt )))/ ((covalt / (NHet + (2 * homalt )))+ (covref / (NHet + (2 * homref ))))
277
- p.sum <- sum(snp1het [,2 ])/ sum(snp1het )
278
- ll <- data.frame (p.all ,p.sum ,mean.a.homo = covalthomo / (2 * homalt ),mean.r.homo = covrefhomo / (2 * homref ),mean.a.het = sum(snp1het [,2 ])/ NHet ,mean.r.het = sum(snp1het [,1 ])/ NHet ,cv = cv )
279
- } else {
280
- ll <- NA
281
- }
282
- return (ll )
283
- })
284
- }
285
209
if (is.list(p.cal )){
286
210
p.cal <- do.call(rbind ,p.cal )
287
211
} else {
@@ -315,16 +239,21 @@ allele.info_old<-function(X,x.norm=NULL,method=c("MedR","QN","pca","TMM","TMMex"
315
239
par(mfrow = c(1 ,1 ))
316
240
}
317
241
318
- if (verbose ){
319
- message( " calculating chi-square significance " )
320
- pvals <- lapply_pb( 1 : nrow( X ), get.pvals , df = X , p.cal = p.cal )
242
+ if (parallel ){
243
+ pvals <- parLapply( 1 : nrow( X ), get.pvals , df = X , p.cal = p.cal , cl = cl )
244
+ stopCluster( cl )
321
245
} else {
322
- pvals <- lapply(1 : nrow(X ),get.pvals ,df = X ,p.cal = p.cal )
246
+ if (verbose ){
247
+ message(" calculating chi-square significance" )
248
+ pvals <- lapply_pb(1 : nrow(X ),get.pvals ,df = X ,p.cal = p.cal )
249
+ } else {
250
+ pvals <- lapply(1 : nrow(X ),get.pvals ,df = X ,p.cal = p.cal )
251
+ }
323
252
}
324
253
pvals <- do.call(rbind ,pvals )
325
254
pvals <- cbind(X [,1 : 3 ],pvals )
326
255
pvals <- na.omit(pvals )
327
- ht <- sig.hets(pvals ,plot = FALSE , verbose = verbose )
256
+ ht <- sig.hets(pvals ,Fis , plot = FALSE , verbose = verbose )
328
257
pvals <- data.frame (pvals ,eH.pval = ht [," eH.pval" ],eH.delta = ht [," eH.delta" ],cv = na.omit(p.cal [," cv" ]))
329
258
330
259
return (pvals )
0 commit comments