Merge pull request #87 from fasrc/release_v0.2.6

Release v0.2.6
NSAPH-Software · Sep 7, 2021 · a6d2f23 · a6d2f23
2 parents 42a939e + 0225bb1
commit a6d2f23
Show file tree

Hide file tree

Showing 140 changed files with 789 additions and 1,472 deletions.
diff --git a/CRAN-RELEASE b/CRAN-RELEASE
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: CausalGPS
 Type: Package
 Title: Matching on Generalized Propensity Scores with Continuous Exposures
-Version: 0.2.5
+Version: 0.2.6
 Authors@R: c(
     person("Naeem", "Khoshnevis", email = "nkhoshnevis@g.harvard.edu",
            role=c("aut","cre"), 
@@ -40,7 +40,6 @@ Imports:
     ggplot2,
     rlang,
     logger,
-    glue,
     Rcpp,
     gnm,
     tidyr

diff --git a/NEWS.md b/NEWS.md
@@ -1,18 +1,35 @@
-
-## CausalGPS 0.2.5 (2021-07-09)
+## CausalGPS 0.2.6 (2021-09-03)
 
 ### Added
+* added the status of optimized compile to generate_psuedo_pop function output.
+* compute_closest_wgps accepts the number of user-defined threads.
 
 ### Changed
 
-* User needs to activate the logger
+* Vignette file names.
+* The trim condition from > and < into >= and <=.
+* Removed seed input from generate_syn_data function. In R package, setting seed value inside function is not recommended. Users can set the seed before using the function. 
+* OpenMP uses user defined number of cores.
 
 ### Fixed
 
-* CRAN package URLs are in canonical forms.
+* Initial covariate balance for weighted approach. The counter column was not preallocated correctly.
+* Counter value for compiling. The initial value was set to one, which, however, zero is the correct one. 
+* Private variable issue with OpenMP.
+* Fixed OpenMP option on macOS checks. 
 
 ### Removed
 
+## CausalGPS 0.2.5 (2021-07-23)
+
+### Changed
+
+* User needs to activate the logger
+
+### Fixed
+
+* CRAN package URLs are in canonical forms.
+
 ## CausalGPS 0.2.4 (2021-07-11)
 
 ### Added
@@ -68,25 +85,21 @@
 
 * Package name: GPSmatching --> CausalGPS 
 
-### Fixed
-### Removed
-
-
 ## GPSmatching 0.2.1 (2021-04-23)
 
 ### Added
 
-*  User defined bin sequence in compiling speudo population
-*  Non-parametric option for estimating gps
+*  User defined bin sequence in compiling speudo population.
+*  Non-parametric option for estimating GPS.
 *  Adaptive approach to transform features in training sessions.
-*  Cpp code for computing pair of w and gps.
+*  Cpp code for computing pair of w and GPS.
 *  `set_logger` function.
 *  Customized wrapper for ranger package.
-*  Extended plot function for gen_pseudo_pop object (plot.R)
-*  Extended plot function for estimate_erf object (plot.R)
-*  Extended print function for estimate_erf object (print.R)
-*  test-estimate_erf.R
-*  create_weighting.R
+*  Extended plot function for gen_pseudo_pop object (plot.R).
+*  Extended plot function for estimate_erf object (plot.R).
+*  Extended print function for estimate_erf object (print.R).
+*  test-estimate_erf.R.
+*  create_weighting.R.
 *  Steps for adding test data into 'sysdata.rda'.
 *  `weighting` option as causal inference approach.  
 *  absolute_weighted_corr_fun.R
@@ -108,8 +121,6 @@
 
 * mclapply memory issue (compute_closest_wgps.R).
 
-### Removed
-
 
 ## GPSmatching 0.2.0 (2021-03-01)
 
@@ -124,7 +135,7 @@
 * Function to impute NA values based on density and unittest
 * Function to separate prediction model training (train_it)
 * Function to separate min and max value estimation and unittest
-* Function to find the closest data based on gps and w
+* Function to find the closest data based on GPS and w
 * Wrapper function to generate pseudo population and test it for covariate balance (gen_pseudo_pop)
 * Function to estimate only GPS value (estimate_gps)
 * Helper function to take the input data + GPS values and return pseudo population based on selected causal inference approach. The output of this function may or may not satisfy the covariate balance test. (compile_pseudo_pop)
@@ -144,10 +155,6 @@
 * GPSmatching-package.R --> gpsmatching_package.R
 * GPSmatching_smooth.R --> gpsmatching_smooth.R
 
-### Fixed
-
-* None
-
 ### Removed
 
 * GPSmatching.R functions are separated into smaller functions, and the file is removed.
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -1,7 +1,7 @@
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
-compute_closest_wgps_helper <- function(a, b, cd, sc) {
-    .Call(`_CausalGPS_compute_closest_wgps_helper`, a, b, cd, sc)
+compute_closest_wgps_helper <- function(a, b, cd, sc, nthread) {
+    .Call(`_CausalGPS_compute_closest_wgps_helper`, a, b, cd, sc, nthread)
 }
 
diff --git a/R/absolute_corr_fun.R b/R/absolute_corr_fun.R
@@ -9,7 +9,7 @@
 #' @return
 #' The function returns a list including:
 #' - \code{absolute_corr}: the absolute correlations for each pre-exposure
-#'  covairates;
+#'  covariates;
 #' - \code{mean_absolute_corr}: the average absolute correlations for all
 #'  pre-exposure covairates.
 #'

diff --git a/R/check_arguments.R b/R/check_arguments.R
@@ -211,7 +211,7 @@ check_args_compile_pseudo_pop <- function(ci_appr, use_cov_transform,
 #' Check Covariate Balance Transformers Argument
 #'
 #' @description
-#' Checks Covriate Balance Transformers in terms of using them and available
+#' Checks Covariate Balance Transformers in terms of using them and available
 #' transformers.
 #'
 #' @param use_cov_transform A logical value (TRUE/FALSE) to use covariate balance
@@ -235,7 +235,7 @@ check_args_use_cov_transformers <- function(use_cov_transform,
   }
 
   if (!is.list(transformers)){
-    stop(paste("transformers expects a list of transformerns. Curren type: ",
+    stop(paste("transformers expects a list of transformerns. Current type: ",
                typeof(transformers)))
   }
 

diff --git a/R/compute_closest_wgps.R b/R/compute_closest_wgps.R
@@ -10,14 +10,15 @@
 #' @param c  Vector of the second attribute values for subset of data.
 #' @param d  Vector of size one for the second attribute value.
 #' @param sc Scale parameter to give weight for two mentioned measurements.
+#' @param nthread Number of available cores.
 #'
 #' @return
 #' The function returns index of subset data that is closest to the original data
 #' sample.
 #'
 #' @keywords internal
 #'
-compute_closest_wgps <- function(a, b, c, d, sc){
+compute_closest_wgps <- function(a, b, c, d, sc, nthread){
 
   if (!is.numeric(a) ||
       !is.numeric(b) ||
@@ -49,9 +50,8 @@ compute_closest_wgps <- function(a, b, c, d, sc){
     stop('Expecting sc in [0,1] range.')
   }
 
-
    c_minus_d <- abs(c-d)*(1-sc)
-   wm <- compute_closest_wgps_helper(a, b, c_minus_d, sc)
+   wm <- compute_closest_wgps_helper(a, b, c_minus_d, sc, nthread)
 
    return(wm)
 }
diff --git a/R/create_matching.R b/R/create_matching.R
@@ -67,6 +67,9 @@ create_matching <- function(dataset, bin_seq = NULL, gps_model = "parametric",
                                 outfile= lfp)
 
 
+    # seed_val <- .Random.seed
+    # parallel::clusterSetRNGStream(cl = cl, iseed = seed_val)
+
     parallel::clusterEvalQ(cl, {library("CausalGPS")})
     p_c_t_e <- proc.time()
 

diff --git a/R/estimate_gps.R b/R/estimate_gps.R
@@ -111,7 +111,7 @@ estimate_gps <- function(Y,
 
   w_mx <- compute_min_max(w)
   gps_mx <- compute_min_max(gps)
-  counter <- (w*0)+1 # initialize counter.
+  counter <- (w*0)+0 # initialize counter.
   row_index <- seq(1,length(w),1) # initialize row index.
   dataset <- cbind(Y,w,gps,counter, row_index, c)
 

diff --git a/R/estimate_pmetric_erf.R b/R/estimate_pmetric_erf.R
@@ -47,6 +47,14 @@ estimate_pmetric_erf <- function(formula, family, data, ci_appr){
   counter <- ipw <- NULL
 
   if (ci_appr == "matching"){
+
+    # If the approach is not optimized, the counter will be zero, which causes
+    # problem in generating prediction model.
+    if (sum(data$counter) == 0) {
+      data$counter <- data$counter + 1
+      logger::log_debug("Giving equal weight for all samples.")
+    }
+
     suppressWarnings(gnm_model <- gnm::gnm(formula = formula,
                                            family = family,
                                            data = data,

diff --git a/R/estimate_semipmetric_erf.R b/R/estimate_semipmetric_erf.R
@@ -47,6 +47,14 @@ estimate_semipmetric_erf <- function(formula, family, data, ci_appr){
   counter <- ipw <- NULL
 
   if (ci_appr == "matching"){
+
+    # If the approach is not optimized, the counter will be zero, which causes
+    # problem in generating prediction model.
+    if (sum(data$counter) == 0) {
+      data$counter <- data$counter + 1
+      logger::log_debug("Giving equal weight for all samples.")
+    }
+
     suppressWarnings(gam_model <- gam::gam(formula = formula,
                                            family = family,
                                            data = data,

diff --git a/R/generate_pseudo_pop.R b/R/generate_pseudo_pop.R
@@ -72,6 +72,7 @@
 #' - pseudo_pop
 #' - adjusted_corr_results
 #' - original_corr_results
+#' - optimized_compile (True or False)
 #'
 #' @export
 #' @examples
@@ -151,16 +152,17 @@ generate_pseudo_pop <- function(Y,
 
   q1 <- stats::quantile(w,trim_quantiles[1])
   q2 <- stats::quantile(w,trim_quantiles[2])
+
+  logger::log_debug("{trim_quantiles[1]*100}% qauntile for trim: {q1}")
+  logger::log_debug("{trim_quantiles[2]*100}% for trim: {q2}")
+
   tmp_data <- convert_data_into_standard_format(Y, w, c, q1, q2, ci_appr)
 
 
   original_corr_obj <- check_covar_balance(tmp_data, ci_appr, nthread,
                                            optimized_compile, ...)
   tmp_data <- NULL
 
-  logger::log_debug("1% qauntile for trim: {q1}")
-  logger::log_debug("99% qauntile for trim: {q2}")
-
   # loop until the generated pseudo population is acceptable or reach maximum
   # allowed iteration.
 
@@ -211,7 +213,7 @@ generate_pseudo_pop <- function(Y,
                                      optimized_compile = optimized_compile,...)
     # trim pseudo population
     pseudo_pop <- subset(pseudo_pop[stats::complete.cases(pseudo_pop) ,],
-                         w < q2  & w > q1)
+                         w <= q2  & w >= q1)
     logger::log_debug("Finished compiling pseudo population.")
 
     if (ci_appr == 'adjust'){
@@ -320,6 +322,7 @@ generate_pseudo_pop <- function(Y,
   result$passed_covar_test <- adjusted_corr_obj$pass
   result$counter <- counter
   result$ci_appr <- ci_appr
+  result$optimized_compile <- optimized_compile
 
   end_time_gpp <- proc.time()
 

diff --git a/R/generate_synthetic_data.R b/R/generate_synthetic_data.R
@@ -5,7 +5,6 @@
 #' Generates synthetic data set based on different GPS models and covariates.
 #'
 #' @param sample_size Number of data samples.
-#' @param seed The seed of R's random number generator.
 #' @param outcome_sd Standard deviation used to generate the outcome in the
 #' synthetic data set.
 #' @param gps_spec A numerical value (1-7) that indicates the GPS model
@@ -21,11 +20,13 @@
 #' @export
 #'
 #' @examples
-#' s_data <- generate_syn_data(sample_size=100, seed = 403,
+#'
+#' set.seed(298)
+#' s_data <- generate_syn_data(sample_size=100,
 #'                                   outcome_sd = 10, gps_spec = 1,
 #'                                   cova_spec = 1)
 #'
-generate_syn_data <- function(sample_size=1000, seed = 300, outcome_sd = 10,
+generate_syn_data <- function(sample_size=1000, outcome_sd = 10,
                               gps_spec = 1, cova_spec = 1) {
 
   if (sample_size < 0 || !is.numeric(sample_size)){
@@ -34,7 +35,6 @@ generate_syn_data <- function(sample_size=1000, seed = 300, outcome_sd = 10,
 
   #TODO: Check other input arguments.
 
-  set.seed(seed)
   size <- sample_size
 
   #pre-treatment variables (confounders)

diff --git a/R/matching_l1.R b/R/matching_l1.R
@@ -83,11 +83,14 @@ matching_l1 <- function(w,
                   " radius."))
     return(list())
   }
+
   wm <- compute_closest_wgps(dataset_subset[["std_gps"]],
                              std_p_w,
                              dataset_subset[["std_w"]],
                              std_w,
-                             scale)
+                             scale,
+                             nthread)
+
 
   dp <- dataset_subset[wm,]
 

diff --git a/R/utils.R b/R/utils.R
@@ -44,19 +44,19 @@ log_system_info <- function(){
 convert_data_into_standard_format <- function(Y, w, c, q1, q2, ci_appr){
 
   w_4 <- replicate(4, w)
-  colnames(w_4) <- replicate(4, "w")
+  colnames(w_4) <- c("w", "gps", "counter", "row_index")
+  w_4 <- data.frame(w_4)
+  w_4$counter <- w_4$counter * 0 + 1
   if (ci_appr=="matching"){
     tmp_data <- cbind(Y,w_4,c)
   } else if (ci_appr=="weighting"){
     tmp_data <- cbind(Y,w_4,w*0+1,c)
   }
 
-  tmp_data <- subset(tmp_data[stats::complete.cases(tmp_data) ,],  w < q2  & w > q1)
+  tmp_data <- subset(tmp_data[stats::complete.cases(tmp_data) ,],
+                     w <= q2  & w >= q1)
   tmp_data <- data.table(tmp_data)
 
-  logger::log_debug("1% qauntile for trim: {q1}")
-  logger::log_debug("99% qauntile for trim: {q2}")
-
   return(tmp_data)
 }
 

diff --git a/R/zzz.R b/R/zzz.R
@@ -1,18 +1,4 @@
 
-# .onLoad <- function(libname, pkgname){
-#
-#   flogger <- logger::layout_glue_generator(format =
-#                                            paste('{time} {node} {pid} ',
-#                                                  '{namespace} {fn} ',
-#                                                  '{level}:  {msg}',
-#                                                  sep = ""))
-#   logger::log_appender(appender = logger::appender_file("CausalGPS.log"),
-#                        index = 1)
-#   logger::log_threshold(logger::INFO,index = 1)
-#   logger::log_layout(flogger, index = 1)
-#
-# }
-
 .onLoad <- function(libname, pkgname){
 
   flogger <- logger::layout_glue_generator(format =
@@ -22,5 +8,6 @@
                                                    sep = ""))
 
   logger::log_layout(flogger, index = 1)
+  # RNGkind(kind = "L'Ecuyer-CMRG")
 
 }
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -30,8 +30,8 @@ reference:
 articles:
   - title: Introduction
     navbar: Introduction
-    contents: [estimating_gps,generating_pseudo_population,outcome_models,generate_synthetic_data,faq]
+    contents: [Estimating-GPS,Generating-Pseudo-Population,Outcome-Models,Generating-Synthetic-Data,FAQ]
 
   - title: Collabration
     navbar: Collabration
-    contents: [contribution, testing_the_package, notes_on_sl_wrapper]
+    contents: [Developers-Guide, Testing-the-Package, Notes-on-SL-Wrapper]