From 3cc9356a58b6a02324b50dd4520d115a357cb260 Mon Sep 17 00:00:00 2001 From: "Oleksii.Nikolaienko" Date: Mon, 16 Dec 2024 20:19:56 +0100 Subject: [PATCH 01/12] +beinf --- DESCRIPTION | 75 ++++++++++++++++++++++++++------------------------- NAMESPACE | 3 +++ R/getAMR.R | 39 +++++++++++++++++++++++---- man/getAMR.Rd | 2 +- 4 files changed, 77 insertions(+), 42 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b0d5336..f4cb581 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,46 +1,49 @@ Package: ramr Title: Detection of Rare Aberrantly Methylated Regions in Array and NGS Data -Version: 1.15.0 +Version: 1.15.1 Authors@R: - person(given = "Oleksii", - family = "Nikolaienko", - role = c("aut", "cre"), - email = "oleksii.nikolaienko@gmail.com", - comment = c(ORCID = "0000-0002-5910-4934")) + person(given = "Oleksii", + family = "Nikolaienko", + role = c("aut", "cre"), + email = "oleksii.nikolaienko@gmail.com", + comment = c(ORCID = "0000-0002-5910-4934")) Comment: Maintainer: Oleksii Nikolaienko -Description: ramr is an R package for detection of low-frequency aberrant - methylation events in large data sets obtained by methylation profiling - using array or high-throughput bisulfite sequencing. In addition, package - provides functions to visualize found aberrantly methylated regions (AMRs), - to generate sets of all possible regions to be used as reference sets for - enrichment analysis, and to generate biologically relevant test data sets - for performance evaluation of AMR/DMR search algorithms. +Description: ramr is an R package for detection of epimutations (i.e., + infrequent aberrant DNA methylation events) + in large data sets obtained by methylation profiling using array or + high-throughput methylation sequencing. In addition, package + provides functions to visualize found aberrantly methylated regions (AMRs), + to generate sets of all possible regions to be used as reference sets for + enrichment analysis, and to generate biologically relevant test data sets + for performance evaluation of AMR/DMR search algorithms. Depends: - R (>= 4.1), - GenomicRanges, - parallel, - doParallel, - foreach, - doRNG, - methods + R (>= 4.1), + GenomicRanges, + parallel, + doParallel, + foreach, + doRNG, + methods Imports: - IRanges, - BiocGenerics, - ggplot2, - reshape2, - EnvStats, - ExtDist, - matrixStats, - S4Vectors + IRanges, + BiocGenerics, + ggplot2, + reshape2, + EnvStats, + ExtDist, + matrixStats, + S4Vectors, + gamlss, + gamlss.dist Suggests: - RUnit, - knitr, - rmarkdown, - gridExtra, - annotatr, - LOLA, - org.Hs.eg.db, - TxDb.Hsapiens.UCSC.hg19.knownGene + RUnit, + knitr, + rmarkdown, + gridExtra, + annotatr, + LOLA, + org.Hs.eg.db, + TxDb.Hsapiens.UCSC.hg19.knownGene License: Artistic-2.0 URL: https://github.com/BBCG/ramr BugReports: https://github.com/BBCG/ramr/issues diff --git a/NAMESPACE b/NAMESPACE index 7ed7a51..10ac23a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,9 @@ importFrom(S4Vectors,queryHits) importFrom(doParallel,registerDoParallel) importFrom(doRNG,"%dorng%") importFrom(foreach,foreach) +importFrom(gamlss,gamlss) +importFrom(gamlss,gamlss.control) +importFrom(gamlss.dist,pBEINF) importFrom(matrixStats,rowIQRs) importFrom(matrixStats,rowMedians) importFrom(methods,as) diff --git a/R/getAMR.R b/R/getAMR.R index d1cb0c4..14284b7 100644 --- a/R/getAMR.R +++ b/R/getAMR.R @@ -92,10 +92,12 @@ #' @importFrom doRNG %dorng% #' @importFrom methods as is #' @importFrom stats median pbeta +#' @importFrom gamlss gamlss gamlss.control +#' @importFrom gamlss.dist pBEINF #' @export getAMR <- function (data.ranges, data.samples=NULL, - ramr.method="IQR", + ramr.method=c("IQR", "beta", "wbeta", "beinf"), iqr.cutoff=5, pval.cutoff=5e-2, qval.cutoff=NULL, @@ -115,6 +117,7 @@ getAMR <- function (data.ranges, stop("'data.ranges' metadata must include 'data.samples'") if (length(data.samples)<3) stop("at least three 'data.samples' must be provided") + ramr.method <- match.arg(ramr.method) ##################################################################################### @@ -134,7 +137,7 @@ getAMR <- function (data.ranges, x.median <- stats::median(x, na.rm=TRUE) x[is.na(x)] <- x.median # weight directly correlates with bin contents (number of values per bin) - # and inversly - with the distance from the median value, thus narrowing + # and inversely - with the distance from the median value, thus narrowing # the estimated distribution and emphasizing outliers c <- cut(x, c(0:100)/100) b <- table(c) @@ -146,6 +149,30 @@ getAMR <- function (data.ranges, }) return(t(chunk.filt)) } + inv.logit <- function (x) exp(x)/(1+exp(x)) + getPValues.beinf <- function (data.chunk, ...) { + chunk.filt <- apply(data.chunk, 1, function (x) { + x.median <- stats::median(x, na.rm=TRUE) + x[is.na(x)] <- x.median + beinf.fit <- gamlss::gamlss( + as.numeric(x)~1, sigma.formula=~1, nu.formula=~1, tau.formula=~1, + family=gamlss.dist::BEINF(mu.link="logit", sigma.link="logit", + nu.link="log", tau.link="log"), + control=gamlss::gamlss.control(trace=FALSE), ... + ) + pvals <- gamlss.dist::pBEINF( + q=x, mu=inv.logit(beinf.fit$mu.coefficients), + sigma=inv.logit(beinf.fit$sigma.coefficients), + nu=exp(beinf.fit$nu.coefficients), + tau=exp(beinf.fit$tau.coefficients), + lower.tail=TRUE, log.p=FALSE + ) + pvals[x>x.median] <- 1 - pvals[x>x.median] + return(pvals) + }) + return(t(chunk.filt)) + } + ##################################################################################### @@ -178,6 +205,10 @@ getAMR <- function (data.ranges, betas.filtered <- foreach (chunk=chunks) %dorng% getPValues.wbeta(betas[chunk,], ...) betas.filtered <- do.call(rbind, betas.filtered) betas.filtered[betas.filtered>=qval.cutoff] <- NA + } else if (ramr.method=="beinf") { + betas.filtered <- foreach (chunk=chunks) %dorng% getPValues.beinf(betas[chunk,], ...) + betas.filtered <- do.call(rbind, betas.filtered) + betas.filtered[betas.filtered>=qval.cutoff] <- NA } else { stop("unknown 'ramr.method'") } @@ -201,9 +232,7 @@ getAMR <- function (data.ranges, ranges$xiqr <- vapply(ranges$revmap, function (revmap) { mean(betas.filtered[not.na[revmap],column,drop=FALSE], na.rm=TRUE) }, numeric(1)) - } - - if (ramr.method=="beta" | ramr.method=="wbeta") { + } else { ranges$pval <- vapply(ranges$revmap, function (revmap) { return( 10**mean(log10(betas.filtered[not.na[revmap],column] + .Machine$double.xmin), na.rm=TRUE) ) }, numeric(1)) diff --git a/man/getAMR.Rd b/man/getAMR.Rd index c5b7f3c..884e269 100644 --- a/man/getAMR.Rd +++ b/man/getAMR.Rd @@ -7,7 +7,7 @@ getAMR( data.ranges, data.samples = NULL, - ramr.method = "IQR", + ramr.method = c("IQR", "beta", "wbeta", "beinf"), iqr.cutoff = 5, pval.cutoff = 0.05, qval.cutoff = NULL, From df98954c93f3f2443d449995377ec33a8893cd55 Mon Sep 17 00:00:00 2001 From: "Oleksii.Nikolaienko" Date: Tue, 17 Dec 2024 09:48:58 +0100 Subject: [PATCH 02/12] + tests --- inst/unitTests/test_getAMR.R | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/inst/unitTests/test_getAMR.R b/inst/unitTests/test_getAMR.R index 055cb09..e4104b1 100644 --- a/inst/unitTests/test_getAMR.R +++ b/inst/unitTests/test_getAMR.R @@ -1,5 +1,6 @@ test_getAMR <- function () { data(ramr) + RUnit::checkException( getAMR(c()) ) @@ -12,20 +13,39 @@ test_getAMR <- function () { RUnit::checkException( getAMR(ramr.data, ramr.samples, ramr.method="zzz") ) + + amr.iqr.1 <- getAMR(ramr.data, ramr.method="IQR", min.cpgs=5, merge.window=10000, iqr.cutoff=5, cores=1) + amr.iqr.2 <- getAMR(ramr.data, ramr.method="IQR", min.cpgs=5, merge.window=10000, iqr.cutoff=5, cores=2) + RUnit::checkIdentical( + amr.iqr.1, + amr.iqr.2 + ) + RUnit::checkEquals( + c(sum(GenomicRanges::countOverlaps(amr.iqr.2, ramr.tp.unique)), sum(GenomicRanges::countOverlaps(amr.iqr.2, ramr.tp.nonunique))), + c(6, 45) + ) + + amr.beta <- getAMR(ramr.data, ramr.samples, ramr.method="beta", min.cpgs=5, merge.window=10000, qval.cutoff=1e-3, cores=2) RUnit::checkEquals( - length( getAMR(ramr.data, ramr.method="IQR", min.cpgs=5, merge.window=10000, iqr.cutoff=5, cores=1) ), - length( c(ramr.tp.unique,ramr.tp.nonunique) ) + c(sum(GenomicRanges::countOverlaps(amr.beta, ramr.tp.unique)), sum(GenomicRanges::countOverlaps(amr.beta, ramr.tp.nonunique))), + c(6, 45) ) + + amr.wbeta <- getAMR(ramr.data, ramr.samples, ramr.method="wbeta", min.cpgs=5, merge.window=10000, qval.cutoff=1e-4, cores=2) RUnit::checkEquals( - length( getAMR(ramr.data, ramr.samples, ramr.method="beta", min.cpgs=5, merge.window=10000, qval.cutoff=1e-3, cores=2) ), - length( c(ramr.tp.unique,ramr.tp.nonunique) ) + c(sum(GenomicRanges::countOverlaps(amr.wbeta, ramr.tp.unique)), sum(GenomicRanges::countOverlaps(amr.wbeta, ramr.tp.nonunique))), + c(6, 45) ) + + amr.iqr.3 <- getAMR(ramr.data, ramr.samples, ramr.method="IQR", min.cpgs=5, merge.window=10000, cores=2, exclude.range=c(0.1,0.9)) RUnit::checkEquals( - length( getAMR(ramr.data, ramr.samples, ramr.method="wbeta", min.cpgs=5, merge.window=10000, qval.cutoff=1e-4, cores=2) ), - length( c(ramr.tp.unique,ramr.tp.nonunique) ) + c(sum(GenomicRanges::countOverlaps(amr.iqr.3, ramr.tp.unique)), sum(GenomicRanges::countOverlaps(amr.iqr.3, ramr.tp.nonunique))), + c(2, 18) ) + + amr.beinf <- getAMR(ramr.data, ramr.samples, ramr.method="beinf", min.cpgs=5, merge.window=10000, qval.cutoff=1e-3, cores=2) RUnit::checkEquals( - length( getAMR(ramr.data, ramr.samples, ramr.method="IQR", min.cpgs=5, merge.window=10000, qval.cutoff=1e-3, cores=2, exclude.range=c(0.1,0.9)) ), - 8 + c(sum(GenomicRanges::countOverlaps(amr.beinf, ramr.tp.unique)), sum(GenomicRanges::countOverlaps(amr.beinf, ramr.tp.nonunique))), + c(6, 45) ) } From 6b863c5a17b7c59189061604319df99d6c30f32c Mon Sep 17 00:00:00 2001 From: "Oleksii.Nikolaienko" Date: Tue, 17 Dec 2024 11:52:28 +0100 Subject: [PATCH 03/12] better docs --- man/getAMR.Rd | 17 +++++++++++------ man/plotAMR.Rd | 4 ++++ vignettes/ramr.Rmd | 4 +++- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/man/getAMR.Rd b/man/getAMR.Rd index 884e269..f12f5f6 100644 --- a/man/getAMR.Rd +++ b/man/getAMR.Rd @@ -30,8 +30,9 @@ columns) are included in the analysis.} \item{ramr.method}{A character scalar: when ramr.method is "IQR" (the default), the filtering based on interquantile range is used (`iqr.cutoff` -value is then used as a threshold). When "beta" or "wbeta" - filtering based -on fitting non-weighted (`EnvStats::ebeta`) or weighted (`ExtDist::eBeta`) +value is then used as a threshold). When "beta", "wbeta" or "beinf" - +filtering based on fitting non-weighted (`EnvStats::ebeta`), weighted +(`ExtDist::eBeta`) or zero-and-one inflated (`gamlss.dist::BEINF`) beta distributions, respectively, is used, and `pval.cutoff` or `qval.cutoff` (if not `NULL`) is used as a threshold. For "wbeta", weights directly correlate with bin contents (number of values per bin) and inversly - with @@ -98,12 +99,16 @@ regions (AMRs) for all samples in a data set. \details{ In the provided data set, `getAMR` compares methylation beta values of each sample with other samples to identify rare long-range methylation -aberrations. For `ramr.method=="IQR"`: for every genomic location (CpG) in +aberrations (epimutations). +For `ramr.method=="IQR"`: for every genomic location (CpG) in `data.ranges` the IQR-normalized deviation from the median value is calculated, and all CpGs with such normalized deviation not smaller than the -`iqr.cutoff` are retained. For `ramr.method=="*beta"`: parameters of beta -distribution are estimated by means of `EnvStats::ebeta` or `ExtDist::eBeta` -functions, and then used to calculate the probability values, followed by the +`iqr.cutoff` are retained. For +`ramr.method %in% c("beta", "wbeta", "beinf")`: parameters of beta +distribution are estimated by means of `EnvStats::ebeta` (beta distribution), +`ExtDist::eBeta` (weighted beta destribution), or `gamlss.dist::BEINF` (zero +and one inflated beta distribution) functions, respectively. These +parameters are then used to calculate the probability values, followed by the filtering when all CpGs with p-values not greater than `qval.cutoff` are retained. Another filtering is then performed to exclude all CpGs within `exclude.range`. Next, the retained (significant) CpGs are merged within diff --git a/man/plotAMR.Rd b/man/plotAMR.Rd index b982dfb..428e551 100644 --- a/man/plotAMR.Rd +++ b/man/plotAMR.Rd @@ -10,6 +10,7 @@ plotAMR( amr.ranges, highlight = NULL, title = NULL, + labs = c("genomic position", "beta value"), window = 300 ) } @@ -31,6 +32,9 @@ default), will contain sample IDs from the `sample` metadata column of \item{title}{An optional title for the plot. If NULL (the default), plot title is set to a genomic location of particular AMR.} +\item{labs}{Optional axis labels for the plot. Default: c("genomic position", +"beta value").} + \item{window}{An optional integer constant to expand genomic ranges of the `amr.ranges` object (the default: 300).} } diff --git a/vignettes/ramr.Rmd b/vignettes/ramr.Rmd index 8cb4331..b4a3eae 100644 --- a/vignettes/ramr.Rmd +++ b/vignettes/ramr.Rmd @@ -24,7 +24,7 @@ options(width=100) # Introduction -*`ramr`* is an R package for detection of low-frequency aberrant methylation events in large data sets +*`ramr`* is an R package for detection of low-frequency aberrant methylation events (epimutations) in large data sets obtained by methylation profiling using array or high-throughput bisulfite sequencing. In addition, package provides functions to visualize found aberrantly methylated regions (AMRs), to generate sets of all possible regions to be used as reference sets for enrichment analysis, and to generate biologically relevant test data sets for @@ -62,6 +62,8 @@ GRanges object with 383788 ranges and 845 metadata columns: *`ramr`* package is supplied with a sample data, which was simulated using GSE51032 data set as described in the *`ramr`* reference paper. Sample data set *`ramr.data`* contains beta values for 10000 CpGs and 100 samples (*`ramr.samples`*), and carries 6 unique (*`ramr.tp.unique`*) and 15 non-unique (*`ramr.tp.nonunique`*) true positive AMRs containing at least 10 CpGs with their beta values increased/decreased by 0.5 ```{r, fig.width=10, fig.height=4, out.width="100%", out.height="100%"} +library(GenomicRanges) +library(ggplot2) library(ramr) data(ramr) From 6eb042bebb61f8cc3e151da737673431ee2497de Mon Sep 17 00:00:00 2001 From: "Oleksii.Nikolaienko" Date: Tue, 17 Dec 2024 11:52:49 +0100 Subject: [PATCH 04/12] more globals --- R/globals.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/globals.R b/R/globals.R index d1130f7..36371b4 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1 +1,4 @@ -utils::globalVariables(c("chunk", "column", "ncpg", "width")) +utils::globalVariables(c( + "chunk", "column", "ncpg", "width", "..data.samples", ":=", "alpha", "color", + "size", "start" +)) From fd2d0497ebbc3417c84fad58308789f4a7c42b11 Mon Sep 17 00:00:00 2001 From: "Oleksii.Nikolaienko" Date: Tue, 17 Dec 2024 11:53:11 +0100 Subject: [PATCH 05/12] better docs --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c00c93e..30b9165 100644 --- a/README.md +++ b/README.md @@ -18,11 +18,12 @@ This readme contains condensed info on *`ramr`* usage. For more, please check fu ## Current Features * Identification of aberrantly methylated regions (AMRs) + - filtering by interquartile range (IQR) + - filtering by fitting non-weighted, weighted, or one-and-zero inflated beta distributions * AMR visualization * Generation of reference sets for third-party analyses (e.g. enrichment) * Generation of test data sets for performance evaluation of algorithms for search of differentially (DMR) or aberrantly (AMR) methylated regions - ------- ## Installation From 67c311bbee69d8f2e7b4cebe79e5463e3d9cc145 Mon Sep 17 00:00:00 2001 From: "Oleksii.Nikolaienko" Date: Tue, 17 Dec 2024 11:53:45 +0100 Subject: [PATCH 06/12] dependencies in tests --- inst/unitTests/test_getUniverse.R | 2 +- inst/unitTests/test_plotAMR.R | 4 ++-- inst/unitTests/test_simulateAMR.R | 10 ++++++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/inst/unitTests/test_getUniverse.R b/inst/unitTests/test_getUniverse.R index b8eab00..7cc3218 100644 --- a/inst/unitTests/test_getUniverse.R +++ b/inst/unitTests/test_getUniverse.R @@ -8,6 +8,6 @@ test_getUniverse <- function () { length(ramr.data) ) RUnit::checkTrue( - all( width( getUniverse(ramr.data, min.cpgs=10, merge.window=1000) ) > 1000 ) + all( GenomicRanges::width( getUniverse(ramr.data, min.cpgs=10, merge.window=1000) ) > 1000 ) ) } diff --git a/inst/unitTests/test_plotAMR.R b/inst/unitTests/test_plotAMR.R index 4f410f9..0feebb7 100644 --- a/inst/unitTests/test_plotAMR.R +++ b/inst/unitTests/test_plotAMR.R @@ -2,10 +2,10 @@ test_plotAMR <- function () { data(ramr) RUnit::checkEquals( length(plotAMR(ramr.data, ramr.samples, ramr.tp.nonunique)), - length(reduce(ramr.tp.nonunique)) + length(GenomicRanges::reduce(ramr.tp.nonunique)) ) RUnit::checkEquals( length(plotAMR(ramr.data, NULL, ramr.tp.unique)), - length(reduce(ramr.tp.unique)) + length(GenomicRanges::reduce(ramr.tp.unique)) ) } diff --git a/inst/unitTests/test_simulateAMR.R b/inst/unitTests/test_simulateAMR.R index 2212e1f..92d21ea 100644 --- a/inst/unitTests/test_simulateAMR.R +++ b/inst/unitTests/test_simulateAMR.R @@ -18,10 +18,12 @@ test_simulateAMR <- function () { unique <- simulateAMR(ramr.data, nsamples=29, regions.per.sample=2, samples.per.region=1) nonunique <- simulateAMR(ramr.data, nsamples=20, exclude.ranges=unique, merge.window=1000, regions.per.sample=5, samples.per.region=3) noise <- simulateAMR(ramr.data, nsamples=100, exclude.ranges=c(unique,nonunique), merge.window=1, min.cpgs=1, max.cpgs=1, regions.per.sample=1000, samples.per.region=1) - RUnit::checkTrue( - all(unique %outside% nonunique) + RUnit::checkEquals( + sum(GenomicRanges::countOverlaps(unique, nonunique)), + 0 ) - RUnit::checkTrue( - all(noise %outside% c(unique,nonunique)) + RUnit::checkEquals( + sum(GenomicRanges::countOverlaps(noise, c(unique,nonunique))), + 0 ) } From 9e279bec64508e21c14100ec8bc833f1304ce9d6 Mon Sep 17 00:00:00 2001 From: "Oleksii.Nikolaienko" Date: Tue, 17 Dec 2024 11:54:22 +0100 Subject: [PATCH 07/12] cleaner dependencies --- R/plotAMR.R | 62 +++++++++++++++++++++++++++++-------------------- R/simulateAMR.R | 4 ++-- 2 files changed, 39 insertions(+), 27 deletions(-) diff --git a/R/plotAMR.R b/R/plotAMR.R index c20a96c..b32630c 100644 --- a/R/plotAMR.R +++ b/R/plotAMR.R @@ -23,6 +23,8 @@ #' `amr.ranges` object. #' @param title An optional title for the plot. If NULL (the default), plot #' title is set to a genomic location of particular AMR. +#' @param labs Optional axis labels for the plot. Default: c("genomic position", +#' "beta value"). #' @param window An optional integer constant to expand genomic ranges of the #' `amr.ranges` object (the default: 300). #' @return The output is a list of `ggplot` objects. @@ -39,9 +41,7 @@ #' c(plotAMR(ramr.data, ramr.samples, ramr.tp.nonunique), ncol=2)) #' @importFrom BiocGenerics relist #' @importFrom GenomicRanges reduce findOverlaps mcols -#' @import ggplot2 -#' @importFrom matrixStats rowMedians -#' @importFrom reshape2 melt +#' @importFrom data.table as.data.table melt.data.table #' @importFrom S4Vectors queryHits #' @importFrom methods as is #' @export @@ -50,8 +50,11 @@ plotAMR <- function (data.ranges, amr.ranges, highlight=NULL, title=NULL, + labs=c("genomic position", "beta value"), window=300) { + if (!requireNamespace("ggplot2", quietly=TRUE)) stop("ggplot2 is required for plotting. Please install") + if (is.null(data.samples)) data.samples <- colnames(GenomicRanges::mcols(data.ranges)) amr.ranges.reduced <- GenomicRanges::reduce(amr.ranges, min.gapwidth=window, with.revmap=TRUE) @@ -60,33 +63,42 @@ plotAMR <- function (data.ranges, for (i in seq_along(amr.ranges.relisted)) { plot.ranges <- unlist(amr.ranges.relisted[i]) - revmap.rows <- unique(unlist(plot.ranges$revmap)) + # revmap.rows <- unique(unlist(plot.ranges$revmap)) data.hits <- unique(S4Vectors::queryHits(GenomicRanges::findOverlaps(data.ranges, plot.ranges, maxgap=window, ignore.strand=TRUE))) if (length(data.hits)>0) { - plot.data <- data.frame(data.ranges[data.hits, data.samples], check.names=FALSE, stringsAsFactors=FALSE) - colnames(plot.data) <- c("seqnames", "start", "end", "width", "strand", data.samples) - plot.data$median <- matrixStats::rowMedians(as.matrix(plot.data[,data.samples]), na.rm=TRUE) + plot.data <- data.table::as.data.table(data.ranges[data.hits, data.samples]) + plot.data$median <- apply(plot.data[, ..data.samples], 1, median, na.rm=TRUE) colorify <- c("median", if (is.null(highlight)) unique(plot.ranges$sample), highlight) - plot.data.melt <- reshape2::melt(plot.data, id.vars=c("seqnames","start","end","width","strand"), + plot.data.melt <- data.table::melt.data.table(plot.data, id.vars=c("seqnames","start","end","width","strand"), variable.name="sample", value.name="beta") - plot.data.melt <- cbind(plot.data.melt, list(alpha=0.5,color=factor("lightgrey",levels=c("lightgrey",colorify)))) - - plot.data.melt[plot.data.melt$sample %in% colorify, "alpha"] <- 0.9 - for (sample.name in colorify) - plot.data.melt[plot.data.melt$sample==sample.name,"color"] <- sample.name - - gene.plot <- ggplot(plot.data.melt, aes_string(x="start", y="beta", group="sample", color="color", alpha="alpha")) + - geom_line(size=0.5) + - geom_point(size=1) + - scale_x_continuous(name="position") + - scale_y_continuous(name="beta value", limits=c(0, 1), breaks=c(0, 0.25, 0.5, 0.75, 1)) + - scale_color_discrete(name="samples", limits=colorify) + - scale_alpha_continuous(guide="none") + - theme(legend.text=element_text(size=8), - axis.text.x=element_text(size=8, angle=0), - axis.text.y=element_text(size=8)) + - ggtitle(if (is.null(title)) as.character(reduce(plot.ranges)), title) + plot.data.melt[, `:=` ( + size=0, + alpha=0.5, + color=factor("lightgrey",levels=c("lightgrey", colorify)) + )] + + plot.data.melt[sample %in% colorify, `:=` (alpha=0.9, color=sample)] + for (j in seq_along(plot.ranges)) { + plot.data.melt[sample==plot.ranges$sample[j] & start %in% GenomicRanges::start( data.ranges[unlist(plot.ranges[j]$revmap)] ), + size:=1] + } + + gene.plot <- ggplot2::ggplot(plot.data.melt, ggplot2::aes(x=start, y=beta, group=sample, color=color, alpha=alpha)) + + ggplot2::geom_line(linewidth=0.5) + + ggplot2::geom_point(mapping=ggplot2::aes(size=size)) + + ggplot2::scale_x_continuous(name=labs[1]) + + ggplot2::scale_y_continuous(name=labs[2], limits=c(0, 1), breaks=c(0, 0.25, 0.5, 0.75, 1)) + + ggplot2::scale_color_discrete(name="samples", limits=colorify) + + ggplot2::scale_alpha_continuous(guide="none") + + ggplot2::scale_size_identity(guide="none") + + ggplot2::theme_light() + + ggplot2::theme(legend.text=ggplot2::element_text(size=8), + axis.text.x=ggplot2::element_text(size=8, angle=0), + axis.text.y=ggplot2::element_text(size=8)) + + ggplot2::ggtitle( + if (is.null(title)) as.character(GenomicRanges::reduce(plot.ranges)) else title + ) plot.list[length(plot.list)+1] <- list(gene.plot) } diff --git a/R/simulateAMR.R b/R/simulateAMR.R index aa9dfbf..e3a249c 100644 --- a/R/simulateAMR.R +++ b/R/simulateAMR.R @@ -76,7 +76,7 @@ #' samples.per.region=2, min.cpgs=5, merge.window=1000) #' @importFrom methods is #' @importFrom utils head tail -#' @importFrom IRanges `%outside%` +#' @importFrom IRanges subsetByOverlaps #' @export simulateAMR <- function (template.ranges, nsamples, @@ -111,7 +111,7 @@ simulateAMR <- function (template.ranges, universe.ranges <- getUniverse(template.ranges, merge.window=merge.window, min.cpgs=min.cpgs, min.width=min.width) universe.ranges <- subset(universe.ranges, `ncpg`<=max.cpgs) if (methods::is(exclude.ranges,"GRanges")) - universe.ranges <- subset(universe.ranges, universe.ranges %outside% exclude.ranges) + universe.ranges <- IRanges::subsetByOverlaps(universe.ranges, exclude.ranges, invert=TRUE) if (is.null(sample.names)) sample.names <- paste0("sample", seq_len(nsamples)) From e83e9d46d3c5abd2ba4cc0de8aa972b62033d31a Mon Sep 17 00:00:00 2001 From: "Oleksii.Nikolaienko" Date: Tue, 17 Dec 2024 11:54:55 +0100 Subject: [PATCH 08/12] beinf docs --- R/getAMR.R | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/R/getAMR.R b/R/getAMR.R index 14284b7..6ad168d 100644 --- a/R/getAMR.R +++ b/R/getAMR.R @@ -7,12 +7,16 @@ #' @details #' In the provided data set, `getAMR` compares methylation beta values of each #' sample with other samples to identify rare long-range methylation -#' aberrations. For `ramr.method=="IQR"`: for every genomic location (CpG) in +#' aberrations (epimutations). +#' For `ramr.method=="IQR"`: for every genomic location (CpG) in #' `data.ranges` the IQR-normalized deviation from the median value is #' calculated, and all CpGs with such normalized deviation not smaller than the -#' `iqr.cutoff` are retained. For `ramr.method=="*beta"`: parameters of beta -#' distribution are estimated by means of `EnvStats::ebeta` or `ExtDist::eBeta` -#' functions, and then used to calculate the probability values, followed by the +#' `iqr.cutoff` are retained. For +#' `ramr.method %in% c("beta", "wbeta", "beinf")`: parameters of beta +#' distribution are estimated by means of `EnvStats::ebeta` (beta distribution), +#' `ExtDist::eBeta` (weighted beta destribution), or `gamlss.dist::BEINF` (zero +#' and one inflated beta distribution) functions, respectively. These +#' parameters are then used to calculate the probability values, followed by the #' filtering when all CpGs with p-values not greater than `qval.cutoff` are #' retained. Another filtering is then performed to exclude all CpGs within #' `exclude.range`. Next, the retained (significant) CpGs are merged within @@ -26,8 +30,9 @@ #' columns) are included in the analysis. #' @param ramr.method A character scalar: when ramr.method is "IQR" (the #' default), the filtering based on interquantile range is used (`iqr.cutoff` -#' value is then used as a threshold). When "beta" or "wbeta" - filtering based -#' on fitting non-weighted (`EnvStats::ebeta`) or weighted (`ExtDist::eBeta`) +#' value is then used as a threshold). When "beta", "wbeta" or "beinf" - +#' filtering based on fitting non-weighted (`EnvStats::ebeta`), weighted +#' (`ExtDist::eBeta`) or zero-and-one inflated (`gamlss.dist::BEINF`) #' beta distributions, respectively, is used, and `pval.cutoff` or `qval.cutoff` #' (if not `NULL`) is used as a threshold. For "wbeta", weights directly #' correlate with bin contents (number of values per bin) and inversly - with @@ -185,28 +190,28 @@ getAMR <- function (data.ranges, universe <- getUniverse(data.ranges, merge.window=merge.window, min.cpgs=min.cpgs, min.width=min.width) universe.cpgs <- unlist(universe$revmap) - betas <- as.matrix(mcols(data.ranges)[universe.cpgs,data.samples,drop=FALSE]) + betas <- as.matrix(mcols(data.ranges)[universe.cpgs, data.samples, drop=FALSE]) if (is.null(qval.cutoff)) qval.cutoff <- pval.cutoff/ncol(betas) - chunks <- split(seq_len(nrow(betas)), if (cores>1) cut(seq_len(nrow(betas)),cores) else 1) - medians <- foreach (chunk=chunks, .combine=c) %dorng% matrixStats::rowMedians(betas[chunk,], na.rm=TRUE) + chunks <- split(seq_len(nrow(betas)), if (cores>1) cut(seq_len(nrow(betas)), cores) else 1) + medians <- foreach (chunk=chunks, .combine=c) %dorng% matrixStats::rowMedians(betas[chunk, ], na.rm=TRUE) if (ramr.method=="IQR") { - iqrs <- foreach (chunk=chunks, .combine=c) %dorng% matrixStats::rowIQRs(betas[chunk,], na.rm=TRUE) + iqrs <- foreach (chunk=chunks, .combine=c) %dorng% matrixStats::rowIQRs(betas[chunk, ], na.rm=TRUE) betas.filtered <- (betas-medians)/iqrs betas.filtered[abs(betas.filtered)mle>>>fitdistrplus::fitdist) - betas.filtered <- foreach (chunk=chunks) %dorng% getPValues.beta(betas[chunk,], ...) + betas.filtered <- foreach (chunk=chunks) %dorng% getPValues.beta(betas[chunk, ], ...) betas.filtered <- do.call(rbind, betas.filtered) betas.filtered[betas.filtered>=qval.cutoff] <- NA } else if (ramr.method=="wbeta") { - betas.filtered <- foreach (chunk=chunks) %dorng% getPValues.wbeta(betas[chunk,], ...) + betas.filtered <- foreach (chunk=chunks) %dorng% getPValues.wbeta(betas[chunk, ], ...) betas.filtered <- do.call(rbind, betas.filtered) betas.filtered[betas.filtered>=qval.cutoff] <- NA } else if (ramr.method=="beinf") { - betas.filtered <- foreach (chunk=chunks) %dorng% getPValues.beinf(betas[chunk,], ...) + betas.filtered <- foreach (chunk=chunks) %dorng% getPValues.beinf(betas[chunk, ], ...) betas.filtered <- do.call(rbind, betas.filtered) betas.filtered[betas.filtered>=qval.cutoff] <- NA } else { From 615a3ac882fada26da0961a1ffa06c33b1fdffc9 Mon Sep 17 00:00:00 2001 From: "Oleksii.Nikolaienko" Date: Tue, 17 Dec 2024 11:55:33 +0100 Subject: [PATCH 09/12] + zero-and-one inflated beta --- DESCRIPTION | 14 +++++++------- NAMESPACE | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f4cb581..85bd736 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,21 +17,20 @@ Description: ramr is an R package for detection of epimutations (i.e., enrichment analysis, and to generate biologically relevant test data sets for performance evaluation of AMR/DMR search algorithms. Depends: - R (>= 4.1), - GenomicRanges, + R (>= 4.1) +Imports: parallel, doParallel, foreach, doRNG, - methods -Imports: + methods, + data.table, + matrixStats, + GenomicRanges, IRanges, BiocGenerics, - ggplot2, - reshape2, EnvStats, ExtDist, - matrixStats, S4Vectors, gamlss, gamlss.dist @@ -39,6 +38,7 @@ Suggests: RUnit, knitr, rmarkdown, + ggplot2, gridExtra, annotatr, LOLA, diff --git a/NAMESPACE b/NAMESPACE index 10ac23a..03e3881 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,7 +5,6 @@ export(getUniverse) export(plotAMR) export(simulateAMR) export(simulateData) -import(ggplot2) importFrom(BiocGenerics,relist) importFrom(EnvStats,ebeta) importFrom(ExtDist,eBeta) @@ -15,8 +14,10 @@ importFrom(GenomicRanges,findOverlaps) importFrom(GenomicRanges,granges) importFrom(GenomicRanges,mcols) importFrom(GenomicRanges,reduce) -importFrom(IRanges,`%outside%`) +importFrom(IRanges,subsetByOverlaps) importFrom(S4Vectors,queryHits) +importFrom(data.table,as.data.table) +importFrom(data.table,melt.data.table) importFrom(doParallel,registerDoParallel) importFrom(doRNG,"%dorng%") importFrom(foreach,foreach) @@ -30,7 +31,6 @@ importFrom(methods,is) importFrom(parallel,detectCores) importFrom(parallel,makeCluster) importFrom(parallel,stopCluster) -importFrom(reshape2,melt) importFrom(stats,median) importFrom(stats,na.omit) importFrom(stats,pbeta) From 4bd56a6d94d7f0e3eee80053f0efcf37667a34d3 Mon Sep 17 00:00:00 2001 From: "Oleksii.Nikolaienko" Date: Tue, 17 Dec 2024 12:15:40 +0100 Subject: [PATCH 10/12] cleaner dependencies --- R/getAMR.R | 2 -- R/plotAMR.R | 1 - 2 files changed, 3 deletions(-) diff --git a/R/getAMR.R b/R/getAMR.R index 6ad168d..5899642 100644 --- a/R/getAMR.R +++ b/R/getAMR.R @@ -214,8 +214,6 @@ getAMR <- function (data.ranges, betas.filtered <- foreach (chunk=chunks) %dorng% getPValues.beinf(betas[chunk, ], ...) betas.filtered <- do.call(rbind, betas.filtered) betas.filtered[betas.filtered>=qval.cutoff] <- NA - } else { - stop("unknown 'ramr.method'") } if (!is.null(exclude.range)) diff --git a/R/plotAMR.R b/R/plotAMR.R index b32630c..dbea126 100644 --- a/R/plotAMR.R +++ b/R/plotAMR.R @@ -43,7 +43,6 @@ #' @importFrom GenomicRanges reduce findOverlaps mcols #' @importFrom data.table as.data.table melt.data.table #' @importFrom S4Vectors queryHits -#' @importFrom methods as is #' @export plotAMR <- function (data.ranges, data.samples=NULL, From eda7a2e46b7b64500b2e736537fe4225fe80d4d5 Mon Sep 17 00:00:00 2001 From: "Oleksii.Nikolaienko" Date: Tue, 17 Dec 2024 12:27:26 +0100 Subject: [PATCH 11/12] cleaner dependencies --- R/getAMR.R | 12 ------------ R/getUniverse.R | 2 -- R/globals.R | 4 ---- R/plotAMR.R | 4 ---- R/simulateAMR.R | 3 --- R/simulateData.R | 8 -------- 6 files changed, 33 deletions(-) delete mode 100644 R/globals.R diff --git a/R/getAMR.R b/R/getAMR.R index 5899642..9bc23a1 100644 --- a/R/getAMR.R +++ b/R/getAMR.R @@ -87,18 +87,6 @@ #' data(ramr) #' getAMR(ramr.data, ramr.samples, ramr.method="beta", #' min.cpgs=5, merge.window=1000, qval.cutoff=1e-3, cores=2) -#' @importFrom parallel detectCores makeCluster stopCluster -#' @importFrom doParallel registerDoParallel -#' @importFrom GenomicRanges mcols reduce -#' @importFrom EnvStats ebeta -#' @importFrom ExtDist eBeta pBeta -#' @importFrom matrixStats rowMedians rowIQRs -#' @importFrom foreach foreach -#' @importFrom doRNG %dorng% -#' @importFrom methods as is -#' @importFrom stats median pbeta -#' @importFrom gamlss gamlss gamlss.control -#' @importFrom gamlss.dist pBEINF #' @export getAMR <- function (data.ranges, data.samples=NULL, diff --git a/R/getUniverse.R b/R/getUniverse.R index 2b34f40..e15ef22 100644 --- a/R/getUniverse.R +++ b/R/getUniverse.R @@ -40,8 +40,6 @@ #' runLOLA(amrs, universe, hg19.extdb, cores=1, redefineUserSets=TRUE) #' } #' } -#' @importFrom GenomicRanges reduce -#' @importFrom methods is #' @export getUniverse <- function (data.ranges, merge.window=300, diff --git a/R/globals.R b/R/globals.R deleted file mode 100644 index 36371b4..0000000 --- a/R/globals.R +++ /dev/null @@ -1,4 +0,0 @@ -utils::globalVariables(c( - "chunk", "column", "ncpg", "width", "..data.samples", ":=", "alpha", "color", - "size", "start" -)) diff --git a/R/plotAMR.R b/R/plotAMR.R index dbea126..e3a72ad 100644 --- a/R/plotAMR.R +++ b/R/plotAMR.R @@ -39,10 +39,6 @@ #' library(gridExtra) #' do.call("grid.arrange", #' c(plotAMR(ramr.data, ramr.samples, ramr.tp.nonunique), ncol=2)) -#' @importFrom BiocGenerics relist -#' @importFrom GenomicRanges reduce findOverlaps mcols -#' @importFrom data.table as.data.table melt.data.table -#' @importFrom S4Vectors queryHits #' @export plotAMR <- function (data.ranges, data.samples=NULL, diff --git a/R/simulateAMR.R b/R/simulateAMR.R index e3a249c..039eb00 100644 --- a/R/simulateAMR.R +++ b/R/simulateAMR.R @@ -74,9 +74,6 @@ #' amrs.nonunique <- #' simulateAMR(ramr.data, nsamples=3, exclude.ranges=amrs.unique, #' samples.per.region=2, min.cpgs=5, merge.window=1000) -#' @importFrom methods is -#' @importFrom utils head tail -#' @importFrom IRanges subsetByOverlaps #' @export simulateAMR <- function (template.ranges, nsamples, diff --git a/R/simulateData.R b/R/simulateData.R index 37d3f72..8c5e8fa 100644 --- a/R/simulateData.R +++ b/R/simulateData.R @@ -82,14 +82,6 @@ #' noisy.data <- #' simulateData(ramr.data, nsamples=10, amr.ranges=c(amrs,noise), cores=2) #' plotAMR(noisy.data, amr.ranges=amrs[1]) -#' @importFrom parallel detectCores makeCluster stopCluster -#' @importFrom doParallel registerDoParallel -#' @importFrom GenomicRanges mcols `mcols<-` granges -#' @importFrom EnvStats ebeta -#' @importFrom foreach foreach -#' @importFrom doRNG %dorng% -#' @importFrom methods is -#' @importFrom stats median na.omit rbeta #' @export simulateData <- function (template.ranges, nsamples, From e457826cb0b1cbc722c86234754de40f7d996018 Mon Sep 17 00:00:00 2001 From: "Oleksii.Nikolaienko" Date: Tue, 17 Dec 2024 13:26:21 +0100 Subject: [PATCH 12/12] cleaner dependencies --- R/internal.R | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 R/internal.R diff --git a/R/internal.R b/R/internal.R new file mode 100644 index 0000000..5335701 --- /dev/null +++ b/R/internal.R @@ -0,0 +1,49 @@ +#' @importFrom BiocGenerics relist +#' @importFrom data.table as.data.table melt.data.table +#' @importFrom doParallel registerDoParallel +#' @importFrom doRNG %dorng% +#' @importFrom EnvStats ebeta +#' @importFrom ExtDist eBeta pBeta +#' @importFrom foreach foreach +#' @importFrom gamlss gamlss gamlss.control +#' @importFrom gamlss.dist pBEINF +#' @importFrom GenomicRanges mcols `mcols<-` granges reduce findOverlaps +#' @importFrom IRanges subsetByOverlaps +#' @importFrom matrixStats rowMedians rowIQRs +#' @importFrom methods as is +#' @importFrom parallel detectCores makeCluster stopCluster +#' @importFrom S4Vectors queryHits +#' @importFrom stats median na.omit rbeta pbeta +#' @importFrom utils head tail +## #' @importFrom Rcpp sourceCpp +## #' @useDynLib ramr, .registration=TRUE + + +# internal globals, constants and helper functions +# + +################################################################################ +# Globals, unload +################################################################################ + +utils::globalVariables(c( + "chunk", "column", "ncpg", "width", "..data.samples", ":=", "alpha", "color", + "size", "start" +)) + +# .onUnload <- function (libpath) {library.dynam.unload("ramr", libpath)} + +################################################################################ +# Constants +################################################################################ + +# descr: ... + +# + +################################################################################ +# Functions: ... +################################################################################ + +# descr: ... +# value: ...