From 8ffc90d725a5f478d64733821c5bb4f962df3a70 Mon Sep 17 00:00:00 2001 From: Paul Villafuerte Date: Wed, 6 Jul 2022 12:46:33 -0400 Subject: [PATCH 1/5] Initial pass --- vignettes/independent_filtering_plots.Rmd | 207 ++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 vignettes/independent_filtering_plots.Rmd diff --git a/vignettes/independent_filtering_plots.Rmd b/vignettes/independent_filtering_plots.Rmd new file mode 100644 index 0000000..cb4d4fc --- /dev/null +++ b/vignettes/independent_filtering_plots.Rmd @@ -0,0 +1,207 @@ +--- +title: "Additional plots for: Independent filtering increases power for + detecting differentially expressed genes, Bourgon et al., PNAS (2010)" +author: "Richard Bourgon and Wolfgang Huber" +date: "`r format(Sys.time(), '%B %d, %Y')`" +vignette: > + %\VignetteEngine{knitr::rmarkdown} + %\VignetteIndexEntry{03 - Additional plots for: Independent filtering increases power for detecting differentially expressed genes, Bourgon et al., PNAS (2010)} + %\VignettePackage{genefilter} + %\VignetteEncoding{UTF-8} +output: + BiocStyle::html_document: + number_sections: yes + toc: yes + toc_depth: 4 +--- + +```{r setup, echo=FALSE} +options(width=80 ) +``` + + +# Introduction + +This vignette illustrates use of some functions in the *genefilter* +package that provide useful diagnostics for independent +filtering [@BourgonIndependentFiltering]: + +- `kappa_p` and `kappa_t` + +- `filtered_p` and `filtered_R` + +- `filter_volcano` + +- `rejection_plot` + +# Data preparation + +Load the ALL data set and the *genefilter* package: + +```{r libraries, message=FALSE, warning=FALSE} +library("genefilter") +library("ALL") +data("ALL") +``` + + +Reduce to just two conditions, then take a small subset of arrays from +these, with 3 arrays per condition: + +```{r sample_data, cache=TRUE} +bcell <- grep("^B", as.character(ALL$BT)) +moltyp <- which(as.character(ALL$mol.biol) %in% +c("NEG", "BCR/ABL")) +ALL_bcrneg <- ALL[, intersect(bcell, moltyp)] +ALL_bcrneg$mol.biol <- factor(ALL_bcrneg$mol.biol) +n1 <- n2 <- 3 +set.seed(1969) +use <- unlist(tapply(1:ncol(ALL_bcrneg), +ALL_bcrneg$mol.biol, sample, n1)) +subsample <- ALL_bcrneg[,use] +``` + +We now use functions from *genefilter* to compute overall standard +devation filter statistics as well as standard two-sample $t$ and +releated statistics. + +```{r stats, cache=TRUE} +S <- rowSds( exprs( subsample ) ) +temp <- rowttests( subsample, subsample$mol.biol ) +d <- temp$dm +p <- temp$p.value +t <- temp$statistic +``` + + +# Filtering volcano plot + +Filtering on overall standard deviation and then using a standard +$t$-statistic induces a lower bound of fold change, albeit one which +varies somewhat with the significance of the $t$-statistic. The +`filter_volcano` function allows you to visualize this effect. + +```{r filter_volcano, echo=FALSE} +S_cutoff <- quantile(S, .50) +filter_volcano(d, p, S, n1, n2, alpha=.01, S_cutoff) +``` + +The output is shown in the left panel of Fig. [\[fig:volcano\]][1]. + [1]: #fig:volcano {reference-type="ref" reference="fig:volcano"} + + + + + + + + + + + +The `kappa_p` and `kappa_t` functions, used to make the volcano plot, +compute the fold change bound multiplier as a function of either a +$t$-test $p$-value or the $t$-statistic itself. The actual induced bound +on the fold change is $\kappa$ times the filter's cutoff on the overall +standard deviation. Note that fold change bounds for values of $|T|$ +which are close to 0 are not of practical interest because we will not +reject the null hypothesis with test statistics in this range. + +```{r kappa, echo=FALSE} +t <- seq(0, 5, length=100) +plot(t, kappa_t(t, n1, n2) * S_cutoff, + xlab="|T|", ylab="Fold change bound", type="l") +``` + +The plot is shown in the right panel of +Fig. [2](#fig:volcano)reference-type="ref" reference="fig:volcano". + +# Rejection count plots + +## Across $p$-value cutoffs + +The `filtered_p` function permits easy simultaneous calculation of +unadjusted or adjusted $p$-values over a range of filtering thresholds +($\theta$). Here, we return to the full "BCR/ABL" versus "NEG" data set, +and compute adjusted $p$-values using the method of Benjamini and +Hochberg, for a range of different filter stringencies. + + + + + + + + + +```{r table, include=FALSE} +table(ALL_bcrneg$mol.biol) +``` +```{r filtered_p, message=FALSE, warning=FALSE, include=FALSE} +S2 <- rowVars(exprs(ALL_bcrneg)) +p2 <- rowttests(ALL_bcrneg, "mol.biol")$p.value +theta <- seq(0, .5, .1) +p_bh <- filtered_p(S2, p2, theta, method="BH") +``` + +```{r p_bh, echo=TRUE} +head(p_bh) +``` + +The `rejection_plot` function takes sets of $p$-values corresponding to +different filtering choices --- in the columns of a matrix or in a list +--- and shows how rejection count ($R$) relates to the choice of cutoff +for the $p$-values. For these data, over a reasonable range of FDR +cutoffs, increased filtering corresponds to increased rejections. + +```{r rejection_plot} +rejection_plot(p_bh, at="sample", + xlim=c(0,.3), ylim=c(0,1000), + main="Benjamini & Hochberg adjustment") +``` + +The plot is shown in the left panel of Fig. [4](#fig:rej){reference-type="ref" +reference="fig:rej"}. + +## Across filtering fractions + +If we select a fixed cutoff for the adjusted $p$-values, we can also +look more closely at the relationship between the fraction of null +hypotheses filtered and the total number of discoveries. The +`filtered_R` function wraps `filtered_p` and just returns rejection +counts. It requires a $p$-value cutoff. + +```{r filtered_R} +theta <- seq(0, .80, .01) +R_BH <- filtered_R(alpha=.10, S2, p2, theta, method="BH") +``` + +```{r R_BH} +head(R_BH) +``` + +Because overfiltering (or use of a filter which is inappropriate for the +application domain) discards both false and true null hypotheses, very +large values of $\theta$ reduce power in this example: + +```{r filtered_R_plot} +plot(theta, R_BH, type="l", + xlab=expression(theta), ylab="Rejections", + main="BH cutoff = 0.1") +``` + + The plot is shown in the right panel of +Fig. [4](#fig:rej){reference-type="ref" reference="fig:rej"}. + +# Session information {#session-information .unnumbered} + +```{r sessionInfo, echo=FALSE, results='asis'} +#sessionInfo() |> toLatex() +sessionInfo() +``` + + + + + + From 89a5d7ca94b0b695f3077cb8c0daa2e509374282 Mon Sep 17 00:00:00 2001 From: Paul Villafuerte Date: Wed, 3 Aug 2022 12:19:32 -0400 Subject: [PATCH 2/5] initial conversion --- vignettes/independent_filtering_plots.Rmd | 64 ++++++++++++++--------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/vignettes/independent_filtering_plots.Rmd b/vignettes/independent_filtering_plots.Rmd index cb4d4fc..f64b92b 100644 --- a/vignettes/independent_filtering_plots.Rmd +++ b/vignettes/independent_filtering_plots.Rmd @@ -1,6 +1,5 @@ --- -title: "Additional plots for: Independent filtering increases power for - detecting differentially expressed genes, Bourgon et al., PNAS (2010)" +title: "Additional plots for: Independent filtering increases power for detecting differentially expressed genes, Bourgon et al., PNAS (2010)" author: "Richard Bourgon and Wolfgang Huber" date: "`r format(Sys.time(), '%B %d, %Y')`" vignette: > @@ -15,11 +14,25 @@ output: toc_depth: 4 --- +```{r functions, message=TRUE, warning=TRUE, include=FALSE} +## Color Format +colFmt <- function(x,color) { + outputFormat <- knitr::opts_knit$get("rmarkdown.pandoc.to") + if(outputFormat == 'latex') { + ret <- paste("\\textcolor{",color,"}{",x,"}",sep="") + } else if(outputFormat == 'html') { + ret <- paste("",x,"",sep="") + } else { + ret <- x + } + return(ret) +} +``` + ```{r setup, echo=FALSE} options(width=80 ) ``` - # Introduction This vignette illustrates use of some functions in the *genefilter* @@ -81,23 +94,7 @@ $t$-statistic induces a lower bound of fold change, albeit one which varies somewhat with the significance of the $t$-statistic. The `filter_volcano` function allows you to visualize this effect. -```{r filter_volcano, echo=FALSE} -S_cutoff <- quantile(S, .50) -filter_volcano(d, p, S, n1, n2, alpha=.01, S_cutoff) -``` - -The output is shown in the left panel of Fig. [\[fig:volcano\]][1]. - [1]: #fig:volcano {reference-type="ref" reference="fig:volcano"} - - - - - - - - - - +The output is shown in the left panel of Fig.^[1](#fig:volcano){reference-type="ref" reference="fig:volcano"}. The `kappa_p` and `kappa_t` functions, used to make the volcano plot, compute the fold change bound multiplier as a function of either a @@ -107,14 +104,18 @@ standard deviation. Note that fold change bounds for values of $|T|$ which are close to 0 are not of practical interest because we will not reject the null hypothesis with test statistics in this range. -```{r kappa, echo=FALSE} +Table:\#tab:label +```{r filter_volcano, fig.cap = "Wide figure. A plot produced by a code chunk with option `fig.wide = TRUE`.", echo=FALSE} +par(mfrow=c(1, 2)) + +S_cutoff <- quantile(S, .50) +ss <- filter_volcano(d, p, S, n1, n2, alpha=.01, S_cutoff) + t <- seq(0, 5, length=100) -plot(t, kappa_t(t, n1, n2) * S_cutoff, +plot(t, kappa_t(t, n1, n2) * S_cutoff, xlab="|T|", ylab="Fold change bound", type="l") ``` - -The plot is shown in the right panel of -Fig. [2](#fig:volcano)reference-type="ref" reference="fig:volcano". +__Figure 1:__ __`r colFmt("Left panel: plot produced by the ", 'blue')`__ `filter_volcano` __`r colFmt("function ", 'blue')`__.\ # Rejection count plots @@ -148,6 +149,7 @@ p_bh <- filtered_p(S2, p2, theta, method="BH") head(p_bh) ``` + The `rejection_plot` function takes sets of $p$-values corresponding to different filtering choices --- in the columns of a matrix or in a list --- and shows how rejection count ($R$) relates to the choice of cutoff @@ -188,8 +190,13 @@ large values of $\theta$ reduce power in this example: plot(theta, R_BH, type="l", xlab=expression(theta), ylab="Rejections", main="BH cutoff = 0.1") + + ``` +your amazing header {#slug} +\@ref(slug)) + The plot is shown in the right panel of Fig. [4](#fig:rej){reference-type="ref" reference="fig:rej"}. @@ -198,10 +205,15 @@ Fig. [4](#fig:rej){reference-type="ref" reference="fig:rej"}. ```{r sessionInfo, echo=FALSE, results='asis'} #sessionInfo() |> toLatex() sessionInfo() -``` +``` +\@ref(fig:ss) + + + +\@ref(tab:label) From 1e2c9edf637724e0af8ac7ccffafbf3dc58ba11d Mon Sep 17 00:00:00 2001 From: Paul Villafuerte Date: Wed, 5 Oct 2022 13:39:45 -0400 Subject: [PATCH 3/5] independent_filtering_plots conversion #2: graphs --- vignettes/independent_filtering_plots.Rmd | 63 +++++++---------------- 1 file changed, 20 insertions(+), 43 deletions(-) diff --git a/vignettes/independent_filtering_plots.Rmd b/vignettes/independent_filtering_plots.Rmd index f64b92b..c9f3e1c 100644 --- a/vignettes/independent_filtering_plots.Rmd +++ b/vignettes/independent_filtering_plots.Rmd @@ -33,11 +33,14 @@ colFmt <- function(x,color) { options(width=80 ) ``` + + # Introduction This vignette illustrates use of some functions in the *genefilter* package that provide useful diagnostics for independent -filtering [@BourgonIndependentFiltering]: +filtering. ^[Richard Bourgon, Robert Gentleman and Wolfgang Huber. Independent filtering +increases power for detecting differentially expressed genes.] - `kappa_p` and `kappa_t` @@ -57,7 +60,6 @@ library("ALL") data("ALL") ``` - Reduce to just two conditions, then take a small subset of arrays from these, with 3 arrays per condition: @@ -75,8 +77,8 @@ subsample <- ALL_bcrneg[,use] ``` We now use functions from *genefilter* to compute overall standard -devation filter statistics as well as standard two-sample $t$ and -releated statistics. +deviation filter statistics as well as standard two-sample $t$ and +related statistics. ```{r stats, cache=TRUE} S <- rowSds( exprs( subsample ) ) @@ -94,7 +96,7 @@ $t$-statistic induces a lower bound of fold change, albeit one which varies somewhat with the significance of the $t$-statistic. The `filter_volcano` function allows you to visualize this effect. -The output is shown in the left panel of Fig.^[1](#fig:volcano){reference-type="ref" reference="fig:volcano"}. +The output is shown in the left panel of Fig. \@ref(fig:filter-volcano). The `kappa_p` and `kappa_t` functions, used to make the volcano plot, compute the fold change bound multiplier as a function of either a @@ -104,8 +106,10 @@ standard deviation. Note that fold change bounds for values of $|T|$ which are close to 0 are not of practical interest because we will not reject the null hypothesis with test statistics in this range. -Table:\#tab:label -```{r filter_volcano, fig.cap = "Wide figure. A plot produced by a code chunk with option `fig.wide = TRUE`.", echo=FALSE} +The output is shown in the right panel of Fig. \@ref(fig:filter-volcano). + + +```{r filter-volcano, fig.cap = "Left panel: plot produced by the filter_volcano function. Right panel: graph of the `kappa_t` function."} par(mfrow=c(1, 2)) S_cutoff <- quantile(S, .50) @@ -115,7 +119,6 @@ t <- seq(0, 5, length=100) plot(t, kappa_t(t, n1, n2) * S_cutoff, xlab="|T|", ylab="Fold change bound", type="l") ``` -__Figure 1:__ __`r colFmt("Left panel: plot produced by the ", 'blue')`__ `filter_volcano` __`r colFmt("function ", 'blue')`__.\ # Rejection count plots @@ -127,18 +130,11 @@ unadjusted or adjusted $p$-values over a range of filtering thresholds and compute adjusted $p$-values using the method of Benjamini and Hochberg, for a range of different filter stringencies. - - - - - - - - -```{r table, include=FALSE} +```{r table} table(ALL_bcrneg$mol.biol) ``` -```{r filtered_p, message=FALSE, warning=FALSE, include=FALSE} + +```{r filtered_p, message=FALSE, warning=FALSE, include=TRUE} S2 <- rowVars(exprs(ALL_bcrneg)) p2 <- rowttests(ALL_bcrneg, "mol.biol")$p.value theta <- seq(0, .5, .1) @@ -149,21 +145,19 @@ p_bh <- filtered_p(S2, p2, theta, method="BH") head(p_bh) ``` - The `rejection_plot` function takes sets of $p$-values corresponding to different filtering choices --- in the columns of a matrix or in a list --- and shows how rejection count ($R$) relates to the choice of cutoff for the $p$-values. For these data, over a reasonable range of FDR cutoffs, increased filtering corresponds to increased rejections. -```{r rejection_plot} +```{r rejection-plot, fig.cap = "Plot produced by the `rejection_plot function`"} rejection_plot(p_bh, at="sample", xlim=c(0,.3), ylim=c(0,1000), main="Benjamini & Hochberg adjustment") ``` -The plot is shown in the left panel of Fig. [4](#fig:rej){reference-type="ref" -reference="fig:rej"}. +The plot is shown in Fig. \@ref(fig:rejection-plot). ## Across filtering fractions @@ -186,34 +180,17 @@ Because overfiltering (or use of a filter which is inappropriate for the application domain) discards both false and true null hypotheses, very large values of $\theta$ reduce power in this example: -```{r filtered_R_plot} +```{r filtered-R-plot, fig.cap="graph of theta."} plot(theta, R_BH, type="l", xlab=expression(theta), ylab="Rejections", main="BH cutoff = 0.1") - - ``` -your amazing header {#slug} -\@ref(slug)) +The plot is shown in Fig. \@ref(fig:filtered-R-plot). - The plot is shown in the right panel of -Fig. [4](#fig:rej){reference-type="ref" reference="fig:rej"}. +# Session information {#session-information .unnumbered .unlisted} -# Session information {#session-information .unnumbered} - -```{r sessionInfo, echo=FALSE, results='asis'} -#sessionInfo() |> toLatex() +```{r sessionInfo, echo=FALSE} sessionInfo() - - ``` -\@ref(fig:ss) - - - - - - -\@ref(tab:label) From dc53ca8edf5bbb1d2b71023c5e089eb98594fcc6 Mon Sep 17 00:00:00 2001 From: Paul Villafuerte Date: Tue, 22 Nov 2022 10:06:05 -0500 Subject: [PATCH 4/5] Convert independent_filtering_plot.Rnw to Rmd --- vignettes/independent_filtering_plots.Rmd | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vignettes/independent_filtering_plots.Rmd b/vignettes/independent_filtering_plots.Rmd index c9f3e1c..668bd55 100644 --- a/vignettes/independent_filtering_plots.Rmd +++ b/vignettes/independent_filtering_plots.Rmd @@ -1,6 +1,10 @@ --- title: "Additional plots for: Independent filtering increases power for detecting differentially expressed genes, Bourgon et al., PNAS (2010)" -author: "Richard Bourgon and Wolfgang Huber" +author: +- name: "Richard Bourgon" +- name: "Wolfgang Huber" +- name: "Paul Villafuerte" + affiliation: "Vignette translation from Sweave to Rmarkdown / HTML" date: "`r format(Sys.time(), '%B %d, %Y')`" vignette: > %\VignetteEngine{knitr::rmarkdown} From 17de33319fc77a6732f099355e75f2c830e7dd36 Mon Sep 17 00:00:00 2001 From: Paul Villafuerte Date: Tue, 22 Nov 2022 10:21:40 -0500 Subject: [PATCH 5/5] Independent Filtering Plots Final --- vignettes/independent_filtering_plots.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/independent_filtering_plots.Rmd b/vignettes/independent_filtering_plots.Rmd index 668bd55..efd7f4f 100644 --- a/vignettes/independent_filtering_plots.Rmd +++ b/vignettes/independent_filtering_plots.Rmd @@ -15,8 +15,8 @@ output: BiocStyle::html_document: number_sections: yes toc: yes - toc_depth: 4 ---- + toc_depth: 4 +--- ```{r functions, message=TRUE, warning=TRUE, include=FALSE} ## Color Format