From 8ffc90d725a5f478d64733821c5bb4f962df3a70 Mon Sep 17 00:00:00 2001
From: Paul Villafuerte <pvillafuerte@gmail.com>
Date: Wed, 6 Jul 2022 12:46:33 -0400
Subject: [PATCH 1/5] Initial pass

---
 vignettes/independent_filtering_plots.Rmd | 207 ++++++++++++++++++++++
 1 file changed, 207 insertions(+)
 create mode 100644 vignettes/independent_filtering_plots.Rmd

diff --git a/vignettes/independent_filtering_plots.Rmd b/vignettes/independent_filtering_plots.Rmd
new file mode 100644
index 0000000..cb4d4fc
--- /dev/null
+++ b/vignettes/independent_filtering_plots.Rmd
@@ -0,0 +1,207 @@
+---
+title: "Additional plots for: Independent filtering increases power for
+  detecting differentially expressed genes, Bourgon et al., PNAS (2010)"
+author: "Richard Bourgon and Wolfgang Huber"
+date: "`r format(Sys.time(), '%B %d, %Y')`"
+vignette: >
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteIndexEntry{03 - Additional plots for: Independent filtering increases power for detecting differentially expressed genes, Bourgon et al., PNAS (2010)}
+  %\VignettePackage{genefilter}
+  %\VignetteEncoding{UTF-8}
+output:
+  BiocStyle::html_document:
+    number_sections: yes
+    toc: yes
+    toc_depth: 4
+---
+
+```{r setup, echo=FALSE}
+options(width=80 )
+```
+
+
+# Introduction
+
+This vignette illustrates use of some functions in the *genefilter*
+package that provide useful diagnostics for independent
+filtering [@BourgonIndependentFiltering]:
+
+-   `kappa_p` and `kappa_t`
+
+-   `filtered_p` and `filtered_R`
+
+-   `filter_volcano`
+
+-   `rejection_plot`
+
+# Data preparation
+
+Load the ALL data set and the *genefilter* package:
+
+```{r libraries, message=FALSE, warning=FALSE}
+library("genefilter")
+library("ALL")
+data("ALL")
+```
+
+
+Reduce to just two conditions, then take a small subset of arrays from
+these, with 3 arrays per condition:
+
+```{r sample_data, cache=TRUE}
+bcell <- grep("^B", as.character(ALL$BT))
+moltyp <- which(as.character(ALL$mol.biol) %in%
+c("NEG", "BCR/ABL"))
+ALL_bcrneg <- ALL[, intersect(bcell, moltyp)]
+ALL_bcrneg$mol.biol <- factor(ALL_bcrneg$mol.biol)
+n1 <- n2 <- 3
+set.seed(1969)
+use <- unlist(tapply(1:ncol(ALL_bcrneg),
+ALL_bcrneg$mol.biol, sample, n1))
+subsample <- ALL_bcrneg[,use]
+```
+
+We now use functions from *genefilter* to compute overall standard
+devation filter statistics as well as standard two-sample $t$ and
+releated statistics.
+
+```{r stats, cache=TRUE}
+S <- rowSds( exprs( subsample ) )
+temp <- rowttests( subsample, subsample$mol.biol )
+d <- temp$dm
+p <- temp$p.value
+t <- temp$statistic
+```
+
+
+# Filtering volcano plot
+
+Filtering on overall standard deviation and then using a standard
+$t$-statistic induces a lower bound of fold change, albeit one which
+varies somewhat with the significance of the $t$-statistic. The
+`filter_volcano` function allows you to visualize this effect.
+
+```{r filter_volcano, echo=FALSE}
+S_cutoff <- quantile(S, .50)
+filter_volcano(d, p, S, n1, n2, alpha=.01, S_cutoff)
+```
+
+The output is shown in the left panel of Fig. [\[fig:volcano\]][1].
+  [1]: #fig:volcano {reference-type="ref" reference="fig:volcano"}
+<!--   [1]: #fig:volcano {reference-type="ref" reference="fig:volcano"} -->
+
+<!-- ::: center -->
+<!-- ![Left panel: plot produced by the `filter_volcano` function. Right -->
+<!-- panel: graph of the `kappa_t` -->
+<!-- function.](figure/filter_volcano-1 "fig:"){#fig:volcano width="49%"} -->
+<!-- ![Left panel: plot produced by the `filter_volcano` function. Right -->
+<!-- panel: graph of the `kappa_t` -->
+<!-- function.](figure/kappa-1 "fig:"){#fig:volcano width="49%"} -->
+<!-- ::: -->
+
+The `kappa_p` and `kappa_t` functions, used to make the volcano plot,
+compute the fold change bound multiplier as a function of either a
+$t$-test $p$-value or the $t$-statistic itself. The actual induced bound
+on the fold change is $\kappa$ times the filter's cutoff on the overall
+standard deviation. Note that fold change bounds for values of $|T|$
+which are close to 0 are not of practical interest because we will not
+reject the null hypothesis with test statistics in this range.
+
+```{r kappa, echo=FALSE}
+t <- seq(0, 5, length=100)
+plot(t, kappa_t(t, n1, n2) * S_cutoff, 
+     xlab="|T|", ylab="Fold change bound", type="l")
+```
+
+The plot is shown in the right panel of
+Fig. [2](#fig:volcano)reference-type="ref" reference="fig:volcano".
+
+# Rejection count plots
+
+## Across $p$-value cutoffs
+
+The `filtered_p` function permits easy simultaneous calculation of
+unadjusted or adjusted $p$-values over a range of filtering thresholds
+($\theta$). Here, we return to the full "BCR/ABL" versus "NEG" data set,
+and compute adjusted $p$-values using the method of Benjamini and
+Hochberg, for a range of different filter stringencies.
+
+<!-- ::: center -->
+<!-- ![Left panel: plot produced by the `rejection_plot` function. Right -->
+<!-- panel: graph of `theta`.](figure/rejection_plot-1 "fig:"){#fig:rej -->
+<!-- width="49%"} ![Left panel: plot produced by the `rejection_plot` -->
+<!-- function. Right panel: graph of -->
+<!-- `theta`.](figure/filtered_R_plot-1 "fig:"){#fig:rej width="49%"} -->
+<!-- ::: -->
+
+```{r table, include=FALSE}
+table(ALL_bcrneg$mol.biol)
+```
+```{r filtered_p, message=FALSE, warning=FALSE, include=FALSE}
+S2 <- rowVars(exprs(ALL_bcrneg))
+p2 <- rowttests(ALL_bcrneg, "mol.biol")$p.value
+theta <- seq(0, .5, .1)
+p_bh <- filtered_p(S2, p2, theta, method="BH")
+```
+
+```{r p_bh, echo=TRUE}
+head(p_bh)
+```
+
+The `rejection_plot` function takes sets of $p$-values corresponding to
+different filtering choices --- in the columns of a matrix or in a list
+--- and shows how rejection count ($R$) relates to the choice of cutoff
+for the $p$-values. For these data, over a reasonable range of FDR
+cutoffs, increased filtering corresponds to increased rejections.
+
+```{r rejection_plot}
+rejection_plot(p_bh, at="sample",
+               xlim=c(0,.3), ylim=c(0,1000),
+               main="Benjamini & Hochberg adjustment")
+```
+
+The plot is shown in the left panel of Fig. [4](#fig:rej){reference-type="ref"
+reference="fig:rej"}.
+
+## Across filtering fractions
+
+If we select a fixed cutoff for the adjusted $p$-values, we can also
+look more closely at the relationship between the fraction of null
+hypotheses filtered and the total number of discoveries. The
+`filtered_R` function wraps `filtered_p` and just returns rejection
+counts. It requires a $p$-value cutoff.
+
+```{r filtered_R}
+theta <- seq(0, .80, .01)
+R_BH <- filtered_R(alpha=.10, S2, p2, theta, method="BH")
+```
+
+```{r R_BH}
+head(R_BH)
+```
+
+Because overfiltering (or use of a filter which is inappropriate for the
+application domain) discards both false and true null hypotheses, very
+large values of $\theta$ reduce power in this example:
+
+```{r filtered_R_plot}
+plot(theta, R_BH, type="l",
+     xlab=expression(theta), ylab="Rejections",
+     main="BH cutoff = 0.1")
+```
+
+ The plot is shown in the right panel of 
+Fig. [4](#fig:rej){reference-type="ref" reference="fig:rej"}.
+
+# Session information {#session-information .unnumbered}
+
+```{r sessionInfo, echo=FALSE, results='asis'}
+#sessionInfo() |> toLatex()
+sessionInfo()
+```
+
+
+<!-- ::: thebibliography -->
+<!-- 10 Richard Bourgon, Robert Gentleman and Wolfgang Huber. Independent -->
+<!-- filtering increases power for detecting differentially expressed genes. -->
+<!-- ::: -->

From 89a5d7ca94b0b695f3077cb8c0daa2e509374282 Mon Sep 17 00:00:00 2001
From: Paul Villafuerte <pvillafuerte@gmail.com>
Date: Wed, 3 Aug 2022 12:19:32 -0400
Subject: [PATCH 2/5] initial conversion

---
 vignettes/independent_filtering_plots.Rmd | 64 ++++++++++++++---------
 1 file changed, 38 insertions(+), 26 deletions(-)

diff --git a/vignettes/independent_filtering_plots.Rmd b/vignettes/independent_filtering_plots.Rmd
index cb4d4fc..f64b92b 100644
--- a/vignettes/independent_filtering_plots.Rmd
+++ b/vignettes/independent_filtering_plots.Rmd
@@ -1,6 +1,5 @@
 ---
-title: "Additional plots for: Independent filtering increases power for
-  detecting differentially expressed genes, Bourgon et al., PNAS (2010)"
+title: "Additional plots for: Independent filtering increases power for detecting differentially expressed genes, Bourgon et al., PNAS (2010)"
 author: "Richard Bourgon and Wolfgang Huber"
 date: "`r format(Sys.time(), '%B %d, %Y')`"
 vignette: >
@@ -15,11 +14,25 @@ output:
     toc_depth: 4
 ---
 
+```{r functions, message=TRUE, warning=TRUE, include=FALSE}
+## Color Format
+colFmt <- function(x,color) {
+  outputFormat <- knitr::opts_knit$get("rmarkdown.pandoc.to")
+  if(outputFormat == 'latex') {
+    ret <- paste("\\textcolor{",color,"}{",x,"}",sep="")
+  } else if(outputFormat == 'html') {
+    ret <- paste("<font color='",color,"'>",x,"</font>",sep="")
+  } else {
+    ret <- x
+  }
+  return(ret)
+}
+```
+
 ```{r setup, echo=FALSE}
 options(width=80 )
 ```
 
-
 # Introduction
 
 This vignette illustrates use of some functions in the *genefilter*
@@ -81,23 +94,7 @@ $t$-statistic induces a lower bound of fold change, albeit one which
 varies somewhat with the significance of the $t$-statistic. The
 `filter_volcano` function allows you to visualize this effect.
 
-```{r filter_volcano, echo=FALSE}
-S_cutoff <- quantile(S, .50)
-filter_volcano(d, p, S, n1, n2, alpha=.01, S_cutoff)
-```
-
-The output is shown in the left panel of Fig. [\[fig:volcano\]][1].
-  [1]: #fig:volcano {reference-type="ref" reference="fig:volcano"}
-<!--   [1]: #fig:volcano {reference-type="ref" reference="fig:volcano"} -->
-
-<!-- ::: center -->
-<!-- ![Left panel: plot produced by the `filter_volcano` function. Right -->
-<!-- panel: graph of the `kappa_t` -->
-<!-- function.](figure/filter_volcano-1 "fig:"){#fig:volcano width="49%"} -->
-<!-- ![Left panel: plot produced by the `filter_volcano` function. Right -->
-<!-- panel: graph of the `kappa_t` -->
-<!-- function.](figure/kappa-1 "fig:"){#fig:volcano width="49%"} -->
-<!-- ::: -->
+The output is shown in the left panel of Fig.^[1](#fig:volcano){reference-type="ref" reference="fig:volcano"}.
 
 The `kappa_p` and `kappa_t` functions, used to make the volcano plot,
 compute the fold change bound multiplier as a function of either a
@@ -107,14 +104,18 @@ standard deviation. Note that fold change bounds for values of $|T|$
 which are close to 0 are not of practical interest because we will not
 reject the null hypothesis with test statistics in this range.
 
-```{r kappa, echo=FALSE}
+Table:\#tab:label
+```{r filter_volcano, fig.cap = "Wide figure. A plot produced by a code chunk with option `fig.wide = TRUE`.", echo=FALSE}
+par(mfrow=c(1, 2))
+
+S_cutoff <- quantile(S, .50)
+ss <- filter_volcano(d, p, S, n1, n2, alpha=.01, S_cutoff)
+
 t <- seq(0, 5, length=100)
-plot(t, kappa_t(t, n1, n2) * S_cutoff, 
+plot(t, kappa_t(t, n1, n2) * S_cutoff,
      xlab="|T|", ylab="Fold change bound", type="l")
 ```
-
-The plot is shown in the right panel of
-Fig. [2](#fig:volcano)reference-type="ref" reference="fig:volcano".
+__Figure 1:__ __`r colFmt("Left panel: plot produced by the ", 'blue')`__ `filter_volcano` __`r colFmt("function ", 'blue')`__.\
 
 # Rejection count plots
 
@@ -148,6 +149,7 @@ p_bh <- filtered_p(S2, p2, theta, method="BH")
 head(p_bh)
 ```
 
+
 The `rejection_plot` function takes sets of $p$-values corresponding to
 different filtering choices --- in the columns of a matrix or in a list
 --- and shows how rejection count ($R$) relates to the choice of cutoff
@@ -188,8 +190,13 @@ large values of $\theta$ reduce power in this example:
 plot(theta, R_BH, type="l",
      xlab=expression(theta), ylab="Rejections",
      main="BH cutoff = 0.1")
+
+
 ```
 
+your amazing header {#slug}
+\@ref(slug))
+
  The plot is shown in the right panel of 
 Fig. [4](#fig:rej){reference-type="ref" reference="fig:rej"}.
 
@@ -198,10 +205,15 @@ Fig. [4](#fig:rej){reference-type="ref" reference="fig:rej"}.
 ```{r sessionInfo, echo=FALSE, results='asis'}
 #sessionInfo() |> toLatex()
 sessionInfo()
-```
 
 
+```
+\@ref(fig:ss)
+
 <!-- ::: thebibliography -->
 <!-- 10 Richard Bourgon, Robert Gentleman and Wolfgang Huber. Independent -->
 <!-- filtering increases power for detecting differentially expressed genes. -->
 <!-- ::: -->
+
+
+\@ref(tab:label)

From 1e2c9edf637724e0af8ac7ccffafbf3dc58ba11d Mon Sep 17 00:00:00 2001
From: Paul Villafuerte <pvillafuerte@gmail.com>
Date: Wed, 5 Oct 2022 13:39:45 -0400
Subject: [PATCH 3/5] independent_filtering_plots conversion #2: graphs

---
 vignettes/independent_filtering_plots.Rmd | 63 +++++++----------------
 1 file changed, 20 insertions(+), 43 deletions(-)

diff --git a/vignettes/independent_filtering_plots.Rmd b/vignettes/independent_filtering_plots.Rmd
index f64b92b..c9f3e1c 100644
--- a/vignettes/independent_filtering_plots.Rmd
+++ b/vignettes/independent_filtering_plots.Rmd
@@ -33,11 +33,14 @@ colFmt <- function(x,color) {
 options(width=80 )
 ```
 
+
+
 # Introduction
 
 This vignette illustrates use of some functions in the *genefilter*
 package that provide useful diagnostics for independent
-filtering [@BourgonIndependentFiltering]:
+filtering. ^[Richard Bourgon, Robert Gentleman and Wolfgang Huber. Independent filtering
+increases power for detecting differentially expressed genes.]
 
 -   `kappa_p` and `kappa_t`
 
@@ -57,7 +60,6 @@ library("ALL")
 data("ALL")
 ```
 
-
 Reduce to just two conditions, then take a small subset of arrays from
 these, with 3 arrays per condition:
 
@@ -75,8 +77,8 @@ subsample <- ALL_bcrneg[,use]
 ```
 
 We now use functions from *genefilter* to compute overall standard
-devation filter statistics as well as standard two-sample $t$ and
-releated statistics.
+deviation filter statistics as well as standard two-sample $t$ and
+related statistics.
 
 ```{r stats, cache=TRUE}
 S <- rowSds( exprs( subsample ) )
@@ -94,7 +96,7 @@ $t$-statistic induces a lower bound of fold change, albeit one which
 varies somewhat with the significance of the $t$-statistic. The
 `filter_volcano` function allows you to visualize this effect.
 
-The output is shown in the left panel of Fig.^[1](#fig:volcano){reference-type="ref" reference="fig:volcano"}.
+The output is shown in the left panel of Fig. \@ref(fig:filter-volcano).
 
 The `kappa_p` and `kappa_t` functions, used to make the volcano plot,
 compute the fold change bound multiplier as a function of either a
@@ -104,8 +106,10 @@ standard deviation. Note that fold change bounds for values of $|T|$
 which are close to 0 are not of practical interest because we will not
 reject the null hypothesis with test statistics in this range.
 
-Table:\#tab:label
-```{r filter_volcano, fig.cap = "Wide figure. A plot produced by a code chunk with option `fig.wide = TRUE`.", echo=FALSE}
+The output is shown in the right panel of Fig. \@ref(fig:filter-volcano).
+
+
+```{r filter-volcano, fig.cap = "Left panel: plot produced by the filter_volcano function.  Right panel: graph of the `kappa_t` function."}
 par(mfrow=c(1, 2))
 
 S_cutoff <- quantile(S, .50)
@@ -115,7 +119,6 @@ t <- seq(0, 5, length=100)
 plot(t, kappa_t(t, n1, n2) * S_cutoff,
      xlab="|T|", ylab="Fold change bound", type="l")
 ```
-__Figure 1:__ __`r colFmt("Left panel: plot produced by the ", 'blue')`__ `filter_volcano` __`r colFmt("function ", 'blue')`__.\
 
 # Rejection count plots
 
@@ -127,18 +130,11 @@ unadjusted or adjusted $p$-values over a range of filtering thresholds
 and compute adjusted $p$-values using the method of Benjamini and
 Hochberg, for a range of different filter stringencies.
 
-<!-- ::: center -->
-<!-- ![Left panel: plot produced by the `rejection_plot` function. Right -->
-<!-- panel: graph of `theta`.](figure/rejection_plot-1 "fig:"){#fig:rej -->
-<!-- width="49%"} ![Left panel: plot produced by the `rejection_plot` -->
-<!-- function. Right panel: graph of -->
-<!-- `theta`.](figure/filtered_R_plot-1 "fig:"){#fig:rej width="49%"} -->
-<!-- ::: -->
-
-```{r table, include=FALSE}
+```{r table}
 table(ALL_bcrneg$mol.biol)
 ```
-```{r filtered_p, message=FALSE, warning=FALSE, include=FALSE}
+
+```{r filtered_p, message=FALSE, warning=FALSE, include=TRUE}
 S2 <- rowVars(exprs(ALL_bcrneg))
 p2 <- rowttests(ALL_bcrneg, "mol.biol")$p.value
 theta <- seq(0, .5, .1)
@@ -149,21 +145,19 @@ p_bh <- filtered_p(S2, p2, theta, method="BH")
 head(p_bh)
 ```
 
-
 The `rejection_plot` function takes sets of $p$-values corresponding to
 different filtering choices --- in the columns of a matrix or in a list
 --- and shows how rejection count ($R$) relates to the choice of cutoff
 for the $p$-values. For these data, over a reasonable range of FDR
 cutoffs, increased filtering corresponds to increased rejections.
 
-```{r rejection_plot}
+```{r rejection-plot, fig.cap = "Plot produced by the `rejection_plot function`"}
 rejection_plot(p_bh, at="sample",
                xlim=c(0,.3), ylim=c(0,1000),
                main="Benjamini & Hochberg adjustment")
 ```
 
-The plot is shown in the left panel of Fig. [4](#fig:rej){reference-type="ref"
-reference="fig:rej"}.
+The plot is shown in Fig. \@ref(fig:rejection-plot).
 
 ## Across filtering fractions
 
@@ -186,34 +180,17 @@ Because overfiltering (or use of a filter which is inappropriate for the
 application domain) discards both false and true null hypotheses, very
 large values of $\theta$ reduce power in this example:
 
-```{r filtered_R_plot}
+```{r filtered-R-plot, fig.cap="graph of theta."}
 plot(theta, R_BH, type="l",
      xlab=expression(theta), ylab="Rejections",
      main="BH cutoff = 0.1")
-
-
 ```
 
-your amazing header {#slug}
-\@ref(slug))
+The plot is shown in Fig. \@ref(fig:filtered-R-plot).
 
- The plot is shown in the right panel of 
-Fig. [4](#fig:rej){reference-type="ref" reference="fig:rej"}.
+# Session information {#session-information .unnumbered .unlisted}
 
-# Session information {#session-information .unnumbered}
-
-```{r sessionInfo, echo=FALSE, results='asis'}
-#sessionInfo() |> toLatex()
+```{r sessionInfo, echo=FALSE}
 sessionInfo()
-
-
 ```
-\@ref(fig:ss)
-
-<!-- ::: thebibliography -->
-<!-- 10 Richard Bourgon, Robert Gentleman and Wolfgang Huber. Independent -->
-<!-- filtering increases power for detecting differentially expressed genes. -->
-<!-- ::: -->
-
 
-\@ref(tab:label)

From dc53ca8edf5bbb1d2b71023c5e089eb98594fcc6 Mon Sep 17 00:00:00 2001
From: Paul Villafuerte <pvillafuerte@gmail.com>
Date: Tue, 22 Nov 2022 10:06:05 -0500
Subject: [PATCH 4/5] Convert independent_filtering_plot.Rnw to Rmd

---
 vignettes/independent_filtering_plots.Rmd | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/vignettes/independent_filtering_plots.Rmd b/vignettes/independent_filtering_plots.Rmd
index c9f3e1c..668bd55 100644
--- a/vignettes/independent_filtering_plots.Rmd
+++ b/vignettes/independent_filtering_plots.Rmd
@@ -1,6 +1,10 @@
 ---
 title: "Additional plots for: Independent filtering increases power for detecting differentially expressed genes, Bourgon et al., PNAS (2010)"
-author: "Richard Bourgon and Wolfgang Huber"
+author: 
+- name: "Richard Bourgon"
+- name: "Wolfgang Huber"
+- name: "Paul Villafuerte"
+  affiliation: "Vignette translation from Sweave to Rmarkdown / HTML"
 date: "`r format(Sys.time(), '%B %d, %Y')`"
 vignette: >
   %\VignetteEngine{knitr::rmarkdown}

From 17de33319fc77a6732f099355e75f2c830e7dd36 Mon Sep 17 00:00:00 2001
From: Paul Villafuerte <pvillafuerte@gmail.com>
Date: Tue, 22 Nov 2022 10:21:40 -0500
Subject: [PATCH 5/5] Independent Filtering Plots Final

---
 vignettes/independent_filtering_plots.Rmd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vignettes/independent_filtering_plots.Rmd b/vignettes/independent_filtering_plots.Rmd
index 668bd55..efd7f4f 100644
--- a/vignettes/independent_filtering_plots.Rmd
+++ b/vignettes/independent_filtering_plots.Rmd
@@ -15,8 +15,8 @@ output:
   BiocStyle::html_document:
     number_sections: yes
     toc: yes
-    toc_depth: 4
----
+    toc_depth: 4 
+---  
 
 ```{r functions, message=TRUE, warning=TRUE, include=FALSE}
 ## Color Format