Skip to content

Commit

Permalink
tutorial updated
Browse files Browse the repository at this point in the history
  • Loading branch information
piyalkarum committed Sep 16, 2024
1 parent 1515224 commit 8483bd7
Show file tree
Hide file tree
Showing 106 changed files with 11,727 additions and 1,730 deletions.
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,19 @@ Authors@R: c(person(given="Piyal",family="Karunarathne", email="piyalkarumail@ya
Maintainer: Piyal Karunarathne <piyalkarumail@yahoo.com>
Description: Functions in this package will import filtered variant call format (VCF) files of SNPs data and generate data sets to detect copy number variants, visualize them and do downstream analyses with copy number variants(e.g. Environmental association analyses).
License: AGPL (>= 3)
Imports: data.table, graphics, colorspace, R.utils, qgraph, stringr
Imports: data.table, graphics, colorspace, R.utils, qgraph, stringr, Rcpp
LinkingTo: Rcpp
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
Depends:
R (>= 3.6.0)
Suggests:
rmarkdown,
knitr,
testthat (>= 3.0.0),
covr
VignetteBuilder: knitr
##VignetteBuilder: knitr
Config/testthat/edition: 3
Roxygen: list(markdown = TRUE)
URL: https://piyalkarum.github.io/rCNV/, https://cran.r-project.org/package=rCNV
Expand Down
11 changes: 11 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

process_snp <- function(X, AD) {
.Call('_rCNV_process_snp', PACKAGE = 'rCNV', X, AD)
}

process_column1 <- function(column, info_type, max_adn, ind_adn) {
.Call('_rCNV_process_column1', PACKAGE = 'rCNV', column, info_type, max_adn, ind_adn)
}

8 changes: 6 additions & 2 deletions R/bias.det.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,12 @@ get.pvals<-function(x,df,p.cal){
#' }
#'
#' @examples
#' \dontrun{data(ADtable)
#' AI<-allele.info(ADtable,x.norm=ADnorm)}
#' \dontrun{
#' hz<-h.zygosity(vcf,verbose=FALSE)
#' Fis<-mean(hz$Fis,na.rm = TRUE)
#' data(ADtable)
#' AI<-allele.info(ADtable,x.norm=ADnorm,Fis=0.11)
#' }
#'
#' @export
allele.info<-function(X,x.norm=NULL,Fis,method=c("MedR","QN","pca","TMM","TMMex"),logratioTrim = 0.3,sumTrim = 0.05,Weighting = TRUE,Acutoff = -1e+10,plot.allele.cov=TRUE,verbose = TRUE,parallel=FALSE,...){
Expand Down
13 changes: 13 additions & 0 deletions R/cpp_calls.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# isquare<-function(x){
# .Call('_rCNV_square_cpp',x)
# }
#
#
# mply<-function(x,multiplier){
# .Call('_rCNV_multiply_cpp',x,multiplier)
# }

# process_maf<-function(X,AD=TRUE){
# .Call('process_snp',X,AD)
# }

95 changes: 90 additions & 5 deletions R/raw_process.R
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,27 @@ gg<-function(x){
#' \dontrun{mf<-maf(ADtable)}
#'
#' @export
maf<-function(h.table,AD=TRUE,verbose=TRUE,parallel=FALSE){
maf <- function(h.table, AD = TRUE, verbose = TRUE, parallel = FALSE) {
htab <- h.table[, -c(1:4)]
if (parallel) {
numCores <- parallel::detectCores() - 1
cl <- parallel::makeCluster(numCores)
glt <- parallel::parApply(cl = cl, htab, 1, function(X) { process_snp(X, AD) })
parallel::stopCluster(cl)
} else {
if (verbose) {
glt <- apply_pb(htab, 1, function(X) { process_snp(X, AD) })
} else {
glt <- apply(htab, 1, function(X) { process_snp(X, AD) })
}
}
glt <- data.frame(h.table[, 1:4], t(glt))
colnames(glt) <- colnames(h.table)
return(glt)
}


maf_0<-function(h.table,AD=TRUE,verbose=TRUE,parallel=FALSE){
htab<-h.table[,-c(1:4)]
if(parallel){
numCores<-detectCores()-1
Expand All @@ -168,7 +188,7 @@ maf<-function(h.table,AD=TRUE,verbose=TRUE,parallel=FALSE){
if(AD){return(paste0(gg[,1],",",gg[,2]))}else{return(gg)}
})
}else{
glt<-apply_pb(htab,1,function(X){
glt<-apply(htab,1,function(X){
gg<-do.call(rbind,lapply(X,function(x){as.numeric(strsplit(x,",")[[1]])}))
if(ncol(gg)>2)gg<-gg[,-which.min(colMeans(proportions(gg,1),na.rm=T))]
if(AD){return(paste0(gg[,1],",",gg[,2]))}else{return(gg)}
Expand Down Expand Up @@ -253,7 +273,7 @@ hetTgen <- function(vcf, info.type = c("AD", "AD-tot", "GT", "GT-012", "GT-AB",

h.table <- matrix(NA, nrow(xx), ncol(xx))

process_column <- function(i) {
process_column_0 <- function(i) {
if (info.type == "AD-tot") {
tmp <- stringr::str_split_fixed(xx[,i], ":", max_adn)[ind]
tmp <- stringr::str_split_fixed(tmp, ",", 2L)
Expand All @@ -276,15 +296,15 @@ hetTgen <- function(vcf, info.type = c("AD", "AD-tot", "GT", "GT-012", "GT-AB",
clusterExport(cl, varlist = c("xx", "max_adn", "ind", "info.type", "process_column"), envir = environment())

results <- parLapply(cl, seq_len(ncol(xx)), function(i) {
res <- process_column(i)
res <- process_column_0(i)
res
})
stopCluster(cl)

h.table <- do.call(cbind, results)
} else {
for (i in seq_len(ncol(xx))) {
h.table[, i] <- process_column(i)
h.table[, i] <- process_column_0(i)
if (verbose) {
setTxtProgressBar(pb, i)
}
Expand Down Expand Up @@ -320,6 +340,71 @@ hetTgen <- function(vcf, info.type = c("AD", "AD-tot", "GT", "GT-012", "GT-AB",
}



hetTgen_cpp <- function(vcf, info.type = c("AD", "AD-tot", "GT", "GT-012", "GT-AB", "DP"), verbose = TRUE, parallel = FALSE) {
if (inherits(vcf, "list")) { vcf <- vcf$vcf }
if (inherits(vcf, "data.frame")) { vcf <- data.table::data.table(vcf) }
if (any(nchar(vcf$ALT) > 1)) {
warning("vcf file contains multi-allelic variants: \nonly bi-allelic SNPs allowed\nUse maf() to remove non-bi-allilic snps or drop minimum frequency alleles")
}

info.type <- match.arg(info.type)
itype <- substr(info.type, 1, 2)

adn <- sapply(strsplit(unlist(vcf[,"FORMAT"], use.names = FALSE), ":"), function(x) match(itype, x))
max_adn <- max(adn) + 1L
ind <- data.frame(cbind(seq_along(adn), adn))
xx <- data.frame(vcf[,-c(1:9)])

if (verbose & parallel==FALSE) {
message("generating table")
}

if (parallel) {
numCores <- detectCores() - 1
cl <- makeCluster(numCores)
clusterExport(cl, varlist = c("xx", "max_adn", "ind", "info.type", "process_column"), envir = environment())

h.table <- parAapply(cl, xx,2,process_column1,info_type = info.type,max_adn = max_adn,ind_adn = ind$adn)
stopCluster(cl)
} else {
if(verbose){
h.table <- rCNV:::apply_pb(xx,2,process_column1,info_type = info.type,max_adn = max_adn,ind_adn = ind$adn)
} else {
h.table <- apply(xx,2,process_column1,info_type = info.type,max_adn = max_adn,ind_adn = ind$adn)
}
}

if (info.type == "GT-012") {
h.table[h.table == "0/0"] <- 0
h.table[h.table == "1/1"] <- 1
h.table[h.table == "1/0" | h.table == "0/1"] <- 2
h.table[h.table == "./." | h.table == "."] <- NA
}
if (info.type == "GT-AB") {
h.table[h.table == "0/0"] <- "AA"
h.table[h.table == "1/1"] <- "BB"
h.table[h.table == "1/0" | h.table == "0/1"] <- "AB"
h.table[h.table == "./." | h.table == "."] <- -9
}
if (info.type == "AD") {
h.table[h.table == "./." | h.table == "." | is.na(h.table)] <- "0,0"
}
if (info.type == "DP") {
h.table[is.character(h.table)] <- 0
h.table[is.na(h.table)] <- 0
}

het.table <- as.data.frame(cbind(vcf[, c(1:3, 5)], h.table))
colnames(het.table) <- c("CHROM", colnames(vcf)[c(2, 3, 5, 10:ncol(vcf))])
return(het.table)
}






#' Get missingness of individuals in raw vcf
#'
#' A function to get the percentage of missing data of snps per SNP and per
Expand Down
62 changes: 22 additions & 40 deletions docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 8483bd7

Please sign in to comment.