Merge pull request #69 from nhejazi/pass_gbound

nhejazi · Jul 26, 2023 · d3e1031 · d3e1031
2 parents 60d2eab + 09ea4b8
commit d3e1031
Show file tree

Hide file tree

Showing 21 changed files with 155 additions and 122 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,10 +1,13 @@
 # txshift 0.3.9
 
 As of May 2023:
-* A new argument `bound` has been added to `bound_propensity` to specify the
+* A new argument `bound` has been added to `bound_propensity()` to specify the
   lower tolerated limit of generalized propensity score estimates. Estimates
   are bounded to the higher of the specified or default value of `bound` and
   the inverse of the sample size, 1/n.
+* A new argument `gps_bound` has also been introduced to both `txshift()` and
+  `est_Hn()` in order to accommodate passing in truncation bounds for the
+  estimated generalized propensity score.
 
 # txshift 0.3.6
 

diff --git a/R/bound.R b/R/bound.R
@@ -33,16 +33,15 @@ bound_precision <- function(vals) {
 #'  should only be bounded/truncated away from zero.
 #' @param bound \code{numeric} atomic giving the lower limit of the generalized
 #'  propensity score estimates to be tolerated. Estimates less than this will
-#'  be truncated to this value. Note that the default value of 0.001 assumes a
-#'  relatively large sample size (n = 1000), and this is internally overwritten
-#'  by checking the sample size, so the actual lower bound used is the greater
-#'  of the value specified here and 1/n.
+#'  be truncated to this value (default = 0.005). Note that the default may be
+#'  internally overwritten by reference to the sample size (n), so the lower
+#'  bound used is the greater of the specified value and 1/n.
 #'
 #' @return A \code{numeric} vector of the same length as \code{vals}, where the
 #'  returned values are bounded such that the minimum is no lower than 1/n, for
 #'  the sample size n.
-bound_propensity <- function(vals, bound = 0.001) {
-  # bound likelihood component g(a|w) away from 0 only
+bound_propensity <- function(vals, bound = 0.005) {
+  # bound generalized propensity score g(a|w) away from 0 only
   propensity_bound <- max(1 / length(vals), bound)
   vals_bounded <- pmax(vals, propensity_bound)
   return(vals_bounded)

diff --git a/R/fit_mechanisms.R b/R/fit_mechanisms.R
@@ -557,15 +557,19 @@ est_samp <- function(V,
 #'
 #' @param gn_exp An estimate of the exposure density (a generalized propensity
 #'  score) using the output provided by \code{\link{est_g_exp}}.
+#' @param gps_bound \code{numeric} giving the lower limit of the generalized
+#'  propensity score estimates to be tolerated (default = 0.05). Estimates less
+#'  than this are truncated to this or 1/n. See \code{\link{bound_propensity}}
+#'  for details.
 #'
 #' @importFrom data.table as.data.table setnames
 #'
 #' @return A \code{data.table} with two columns, containing estimates of the
 #'  auxiliary covariate at the natural value of the exposure H(A, W) and at the
 #'  shifted value of the exposure H(A + delta, W).
-est_Hn <- function(gn_exp) {
+est_Hn <- function(gn_exp, gps_bound) {
   # set any g(a|w) = 0 values to a very small value above zero
-  gn_exp$noshift <- bound_propensity(gn_exp$noshift)
+  gn_exp$noshift <- bound_propensity(gn_exp$noshift, gps_bound)
 
   # compute the ratio of the propensity scores for Hn(A,W)
   ratio_g_noshift <- (gn_exp$downshift / gn_exp$noshift) +

diff --git a/R/txshift.R b/R/txshift.R
@@ -37,6 +37,10 @@
 #'  tilting regression.
 #' @param max_iter A \code{numeric} integer giving the maximum number of steps
 #'  to be taken in iterating to a solution of the efficient influence function.
+#' @param gps_bound \code{numeric} giving the lower limit of the generalized
+#'  propensity score estimates to be tolerated (default = 0.05). Estimates less
+#'  than this are truncated to this or 1/n. See \code{\link{bound_propensity}}
+#'  for details.
 #' @param samp_fit_args A \code{list} of arguments, all but one of which are
 #'  passed to \code{\link{est_samp}}. For details, consult the documentation of
 #'  \code{\link{est_samp}}. The first element (i.e., \code{fit_type}) is used
@@ -199,6 +203,7 @@ txshift <- function(W,
                     estimator = c("tmle", "onestep"),
                     fluctuation = c("standard", "weighted"),
                     max_iter = 10,
+                    gps_bound = 0.005,
                     samp_fit_args = list(
                       fit_type = c("glm", "sl", "external"),
                       sl_learners = NULL
@@ -373,7 +378,7 @@ txshift <- function(W,
   }
 
   # initial estimate of the auxiliary covariate
-  Hn_estim <- est_Hn(gn_exp = gn_exp_estim)
+  Hn_estim <- est_Hn(gn_exp = gn_exp_estim, gps_bound = gps_bound)
 
   # compute whichever efficient estimator was asked for
   if (estimator == "tmle") {

diff --git a/README.md b/README.md
@@ -104,7 +104,7 @@ treatment, consider the following example:
 
 ``` r
 library(txshift)
-#> txshift v0.3.8: Efficient Estimation of the Causal Effects of Stochastic
+#> txshift v0.3.9: Efficient Estimation of the Causal Effects of Stochastic
 #> Interventions
 library(sl3)
 set.seed(429153)
@@ -255,28 +255,26 @@ After using the `txshift` R package, please cite the following:
 
 ## Related
 
--   [R/`tmle3shift`](https://github.com/tlverse/tmle3shift) - An R
-    package providing an independent implementation of the same core
-    routines for the TML estimation procedure and statistical
-    methodology as is made available here, through reliance on a unified
-    interface for Targeted Learning provided by the
-    [`tmle3`](https://github.com/tlverse/tmle3) engine of the [`tlverse`
-    ecosystem](https://github.com/tlverse).
-
--   [R/`medshift`](https://github.com/nhejazi/medshift) - An R package
-    providing facilities to estimate the causal effect of stochastic
-    treatment regimes in the mediation setting, including classical
-    (IPW) and augmented double robust (one-step) estimators. This is an
-    implementation of the methodology explored by Dı́az and Hejazi
-    (2020).
-
--   [R/`haldensify`](https://github.com/nhejazi/haldensify) - A minimal
-    package for estimating the conditional density treatment mechanism
-    component of this parameter based on using the [highly adaptive
-    lasso](https://github.com/tlverse/hal9001) (Coyle, Hejazi, Phillips,
-    et al. 2022; Hejazi, Coyle, and van der Laan 2020) in combination
-    with a pooled hazard regression. This package implements a variant
-    of the approach advocated by Dı́az and van der Laan (2011).
+- [R/`tmle3shift`](https://github.com/tlverse/tmle3shift) - An R package
+  providing an independent implementation of the same core routines for
+  the TML estimation procedure and statistical methodology as is made
+  available here, through reliance on a unified interface for Targeted
+  Learning provided by the [`tmle3`](https://github.com/tlverse/tmle3)
+  engine of the [`tlverse` ecosystem](https://github.com/tlverse).
+
+- [R/`medshift`](https://github.com/nhejazi/medshift) - An R package
+  providing facilities to estimate the causal effect of stochastic
+  treatment regimes in the mediation setting, including classical (IPW)
+  and augmented double robust (one-step) estimators. This is an
+  implementation of the methodology explored by Dı́az and Hejazi (2020).
+
+- [R/`haldensify`](https://github.com/nhejazi/haldensify) - A minimal
+  package for estimating the conditional density treatment mechanism
+  component of this parameter based on using the [highly adaptive
+  lasso](https://github.com/tlverse/hal9001) (Coyle, Hejazi, Phillips,
+  et al. 2022; Hejazi, Coyle, and van der Laan 2020) in combination with
+  a pooled hazard regression. This package implements a variant of the
+  approach advocated by Dı́az and van der Laan (2011).
 
 ------------------------------------------------------------------------
 
@@ -331,17 +329,17 @@ See below for details:
 <div id="ref-coyle-sl3-rpkg" class="csl-entry">
 
 Coyle, Jeremy R, Nima S Hejazi, Ivana Malenica, Rachael V Phillips, and
-Oleg Sofrygin. 2022. *<span class="nocase">sl3</span>: Modern Machine
-Learning Pipelines for Super Learning*.
+Oleg Sofrygin. 2022. “<span class="nocase">sl3</span>: Modern Machine
+Learning Pipelines for Super Learning.”
 <https://doi.org/10.5281/zenodo.1342293>.
 
 </div>
 
 <div id="ref-coyle-hal9001-rpkg" class="csl-entry">
 
 Coyle, Jeremy R, Nima S Hejazi, Rachael V Phillips, Lars W van der Laan,
-and Mark J van der Laan. 2022. *<span class="nocase">hal9001</span>: The
-Scalable Highly Adaptive Lasso*.
+and Mark J van der Laan. 2022. “<span class="nocase">hal9001</span>: The
+Scalable Highly Adaptive Lasso.”
 <https://doi.org/10.5281/zenodo.3558313>.
 
 </div>
@@ -388,9 +386,9 @@ in Medicine* 32 (30): 5260–77.
 
 <div id="ref-hejazi2020hal9001-joss" class="csl-entry">
 
-Hejazi, Nima S, Jeremy R Coyle, and Mark J van der Laan. 2020. “<span
-class="nocase">hal9001</span>: Scalable Highly Adaptive Lasso Regression
-in R.” *Journal of Open Source Software* 5 (53): 2526.
+Hejazi, Nima S, Jeremy R Coyle, and Mark J van der Laan. 2020.
+“<span class="nocase">hal9001</span>: Scalable Highly Adaptive Lasso
+Regression in R.” *Journal of Open Source Software* 5 (53): 2526.
 <https://doi.org/10.21105/joss.02526>.
 
 </div>

diff --git a/docs/articles/intro_txshift.html b/docs/articles/intro_txshift.html