fixed the Rd issues, typos in description and some jags2_baker issue

zhenkewu · Dec 19, 2023 · 4632782 · 4632782
1 parent 7c4e9d1
commit 4632782
Show file tree

Hide file tree

Showing 17 changed files with 338 additions and 303 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: baker
 Type: Package
 Title: Nested Partially Latent Class Models
 Version: 1.0.2
-Date: 2023-12-14
+Date: 2023-12-18
 Authors@R: c(
      person("Zhenke", "Wu", email="zhenkewu@gmail.com",role=c("cre","aut","cph"),
      comment = c(ORCID = "0000-0001-7582-669X")),
@@ -14,14 +14,14 @@ Authors@R: c(
      comment = c(ORCID = "0000-0002-9366-8506"))
      )
 Description: Provides functions to specify, fit and visualize
-    nested partially-latent class models (Wu et al., 2016,
-    'JRSS-C' <doi:10.1111/rssc.12101>; 
-    Wu et al., 2017, 'Biostatistics' <doi:10.1093/biostatistics/kxw037>;
-    Wu and Chen, 2021, Statistics in Medicine <doi:10.1002/sim.8804>) for 
+    nested partially-latent class models (
+    'Wu, Deloria-Knoll, Hammitt, and Zeger, 2016, JRSS-C' <doi:10.1111/rssc.12101>; 
+    'Wu, Deloria-Knoll, and Zeger, 2017, Biostatistics' <doi:10.1093/biostatistics/kxw037>;
+    'Wu and Chen, 2021, Statistics in Medicine' <doi:10.1002/sim.8804>) for 
     inference of population disease etiology and individual diagnosis. In the motivating
     Pneumonia Etiology Research for Child Health (PERCH) study, because both quantities
     of interest sum to one hundred percent, the PERCH scientists frequently refer to
-    them as "population etiology pie" and "individual etiology pie", hence the name of the package.
+    them as 'population etiology pie' and 'individual etiology pie', hence the name of the package.
 Depends:
     R(>= 4.3.0)
 Imports:

diff --git a/R/clean-perch-data.R b/R/clean-perch-data.R
@@ -7,7 +7,7 @@
 #' @param clean_options The list of options for cleaning PERCH data.
 #' Its elements are defined as follows:
 #' 
-#' \itemize{
+#' \describe{
 #' \item{`raw_meas_dir`}{: The file path to the raw data;}
 #' \item{`case_def`}{: Variable name in raw data for **case** definition;}
 #' \item{`case_def_val`}{: The value for **case** definition;}
@@ -194,20 +194,20 @@ clean_perch_data <- function(clean_options) {
 #'   The default is NULL, which means not reading in any covariate.
 #'
 #' @return A list of data.
-#' \itemize{
+#' \describe{
 #' \item{Mobs}{
-#'    \itemize{
-#'    \item{MBS} A list of Bronze-Standard (BrS) measurements.
+#'    \describe{
+#'    \item{MBS}{ A list of Bronze-Standard (BrS) measurements.
 #'    The names of the list take the form of `specimen`_`test`. 
 #'    Each element of the list is a data frame. The rows of the data frame 
-#'    are for subjects; the columns are for measured pathogens.
-#'    \item{MSS} A list of Silver-Standard (SS) measurements. 
-#'    The formats are the same as `MBS` above.
-#'    \item{MGS} A list of Gold-Standard (GS) measurements. 
-#'    It equals `NULL` if no GS data exist.
+#'    are for subjects; the columns are for measured pathogens.}
+#'    \item{MSS}{ A list of Silver-Standard (SS) measurements. 
+#'    The formats are the same as `MBS` above.}
+#'    \item{MGS}{ A list of Gold-Standard (GS) measurements. 
+#'    It equals `NULL` if no GS data exist.}
 #'    }
 #' }
-#' \item{X} A data frame with columns specified by `extra_covariates`.
+#' \item{X}{ A data frame with columns specified by `extra_covariates`.}
 #' }
 #' 
 #' @family raw data importing functions

diff --git a/R/nplcm.R b/R/nplcm.R
@@ -18,7 +18,8 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c("set_prior_tpr","set_prio
 #' (effectively deleting `MGS` from `Mobs`).
 #' \itemize{
 #' \item `MBS` a list of data frame of bronze-standard (BrS) measurements.
-#' Rows are subjects, columns are causative agents (e.g., pathogen species). 
+#' For each data frame (referred to as a 'slice'), 
+#' rows are subjects, columns are causative agents (e.g., pathogen species). 
 #' We use `list` here to accommodate the possibility of multiple sets of BrS data.
 #' They have imperfect sensitivity/specificity (e.g. nasopharyngeal polymerase chain
 #' reaction - NPPCR). 
@@ -46,32 +47,35 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c("set_prior_tpr","set_prio
 #'   A vector of characters strings; can be one or more from `"BrS"`, `"SS"`, `"GS"`.
 #' }
 #' \item{`likelihood`}{
-#'     \itemize{
-#'          \item{cause_list} The vector of causes (NB: specify);
-#'          \item{k_subclass} The number of nested subclasses in each 
+#'     \describe{
+#'          \item{cause_list}{ The vector of causes (NB: specify);}
+#'          \item{k_subclass}{ The number of nested subclasses in each 
 #'          disease class (one of case classes or the control class; the same `k_subclass`
 #'          is assumed for each class) and each slice of BrS measurements. 
 #'          `1` for conditional independence; larger than `1` for conditional dependence. 
 #'          It is only available for BrS measurements. It is a vector of length equal to 
-#'          the number of slices of BrS measurements;
-#'          \item{Eti_formula} Formula for etiology regressions. You can use  
+#'          the number of slices of BrS measurements;}
+#'          \item{Eti_formula}{ Formula for etiology regressions. You can use  
 #'          [s_date_Eti()] to specify the design matrix for `R` format enrollment date; 
-#'          it will produce natural cubic spline basis. Specify `~ 1` if no regression is intended.
-#'          \item{FPR_formula}formula for false positive rates (FPR) regressions; see [formula()]. 
+#'          it will produce natural cubic spline basis. Specify `~ 1` if no regression is intended.}
+#'          \item{FPR_formula}{formula for false positive rates (FPR) regressions; see [formula()]. 
 #'          You can use [s_date_FPR()] to specify part of the design matrix for `R` 
 #'          format enrollment date; it will produce penalized-spline basis (based on B-splines). 
 #'          Specify `~ 1` if no regression is intended. (NB: If `effect="fixed"`, [dm_Rdate_FPR()]
-#'          will just specify a design matrix with appropriately standardized dates.)
+#'          will just specify a design matrix with appropriately standardized dates.)}
 #'     }
 #' }
 #' 
 #' \item{`prior`}{
-#'     \itemize{
-#'          \item{Eti_prior}Description of etiology prior (e.g., `overall_uniform` - 
-#'          all hyperparameters are `1`; or `0_1` - all hyperparameters are `0.1`);
-#'          \item{TPR_prior}Description of priors for the measurements 
-#'          (e.g., informative vs non-informative). Its length should be the same with `M_use`.
-#'          (NB: not sure what M use is...)
+#'     \describe{
+#'          \item{Eti_prior}{Description of etiology prior (e.g., `overall_uniform` - 
+#'          all hyperparameters are `1`; or `0_1` - all hyperparameters are `0.1`);}
+#'          \item{TPR_prior}{Description of priors for the measurements 
+#'          (e.g., informative vs non-informative). Its length should be the 
+#'          same as `use_measurements` above. Please see examples for how to specify.
+#'          The package can also handle multiple slices of BrS, SS data, so separate
+#'          specification of the TPR priors are needed. 
+#'        }
 #'     }
 #' }
 #' }
@@ -116,7 +120,7 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c("set_prior_tpr","set_prio
 #'          This function is called when there exists one or more than one discrete covariate among
 #'          the union of the two covariate sets. The method implemented by this function
 #'          directly lets FPR depend upon covariates. 
-#'          This is different from Wu and Chen (2020+), which let the subclass 
+#'          This is different from Wu and Chen (2021), which let the subclass 
 #'          weights depend upon covariates. We implemented this function for methods comparison.
 #'           \item [nplcm_fit_Reg_discrete_predictor_NoNest] deals with the setting
 #'           with all discrete covariates for FPRs and CSCFs. The strata defined by the two sets of 
@@ -126,7 +130,7 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c("set_prior_tpr","set_prio
 #'           }
 #'          \item local dependence model for BrS measures: 
 #'          Fitted at lower level by [nplcm_fit_Reg_Nest]: This is the method introduced in 
-#'          Wu and Chen (2020+): CSCF regression + case/control subclass weight regression.
+#'          Wu and Chen (2021): CSCF regression + case/control subclass weight regression.
 #'          It does not provide a specialized function for the setting with all discrete covariates.
 #'        }
 #' }
@@ -994,30 +998,21 @@ nplcm_fit_NoReg<-
     if(file.exists(curr_data_txt_file)){file.remove(curr_data_txt_file)}
     dump(names(in_data.list), append = FALSE, envir = here,
          file = curr_data_txt_file)
-    ## fix dimension problem.... convert say .Dmi=7:6 to c(7,6) (an issue for templateBS_1):
-    bad_jagsdata_txt <- readLines(curr_data_txt_file)
-    #good_jagsdata_txt <- gsub( "([0-9]+):([0-9]+)", "c(\\1,\\2)", bad_jagsdata_txt,fixed = FALSE)
-    ## to add an additional complicatoin of dump creates a text file with , dim = but the JAGS only accepts .Dim=
-    good_jagsdata_txt <- gsub( ", dim =", ", .Dim=", 
-                               gsub( "([0-9]+):([0-9]+)", "c(\\1,\\2)", bad_jagsdata_txt,fixed = FALSE),
-                               fixed = FALSE)
-
-
-    writeLines(good_jagsdata_txt, curr_data_txt_file)
-
-    # fix dimension problem.... convert say 7:6 to c(7,6) (an issue for a dumped matrix):
-    inits_fnames <- list.files(mcmc_options$result.folder,pattern = "^jagsinits[0-9]+.txt",
-                               full.names = TRUE)
-    for (fiter in seq_along(inits_fnames)){
-      curr_inits_txt_file <- inits_fnames[fiter]
-      bad_jagsinits_txt <- readLines(curr_inits_txt_file)
-      good_jagsinits_txt <- gsub( "([0-9]+):([0-9]+)", "c(\\1,\\2)", bad_jagsinits_txt,fixed = FALSE)
-      writeLines(good_jagsinits_txt, curr_inits_txt_file)
-    }
 
+    # ## fix dimension problem.... convert say .Dmi=7:6 to c(7,6) (an issue for templateBS_1):
+    # bad_jagsdata_txt <- readLines(curr_data_txt_file)
+    # #good_jagsdata_txt <- gsub( "([0-9]+):([0-9]+)", "c(\\1,\\2)", bad_jagsdata_txt,fixed = FALSE)
+    # ## to add an additional complicatoin of dump creates a text file with , dim = but the JAGS only accepts .Dim=
+    # good_jagsdata_txt <- gsub( ", dim =", ", .Dim=", 
+    #                            gsub( "([0-9]+):([0-9]+)", "c(\\1,\\2)", bad_jagsdata_txt,fixed = FALSE),
+    #                            fixed = FALSE)
+    # 
+    # 
+    # writeLines(good_jagsdata_txt, curr_data_txt_file)
+
     ## fixed some problems of JAGS 4.3.2 not having cut function; and I(a,b) functions weirdly, even though
     ## the two elements are already constants (errors says that are not constant).
-    curr_model_txt_file <- file.path(mcmc_options$result.folder,"model_NoReg.bug")
+    curr_model_txt_file <- file.path(mcmc_options$result.folder,model_bugfile_name)
     bad_model_txt <- readLines(curr_model_txt_file)
     good_model_txt <- gsub( "cut\\(", "(", bad_model_txt,fixed = FALSE)
     good_model_txt <- gsub( "I\\(0\\.000001,0\\.999999\\)", " ", good_model_txt,fixed = FALSE)
@@ -1771,14 +1766,18 @@ nplcm_fit_Reg_discrete_predictor_NoNest <-
     if(file.exists(curr_data_txt_file)){file.remove(curr_data_txt_file)}
     dump(names(in_data.list), append = FALSE, envir = here,
          file = curr_data_txt_file)
-    # fix dimension problem.... convert say .Dmi=7:6 to c(7,6) (an issue for templateBS_1):
-    bad_jagsdata_txt <- readLines(curr_data_txt_file)
-    good_jagsdata_txt <- gsub( ".Dim = ([0-9]+):([0-9]+)", ".Dim = c(\\1,\\2)", 
-                               bad_jagsdata_txt,fixed = FALSE)
-    writeLines(good_jagsdata_txt, curr_data_txt_file)
+
+    ## fixed some problems of JAGS 4.3.2 not having cut function; and I(a,b) functions weirdly, even though
+    ## the two elements are already constants (errors says that are not constant).
+    curr_model_txt_file <- file.path(mcmc_options$result.folder,model_bugfile_name)
+    bad_model_txt <- readLines(curr_model_txt_file)
+    good_model_txt <- gsub( "cut\\(", "(", bad_model_txt,fixed = FALSE)
+    good_model_txt <- gsub( "I\\(0\\.000001,0\\.999999\\)", " ", good_model_txt,fixed = FALSE)
+    writeLines(good_model_txt, curr_model_txt_file)
+
     if(is.null(mcmc_options$jags.dir)){mcmc_options$jags.dir=""}
     gs <- jags2_baker(data   = curr_data_txt_file,
-                      inits  = in_init,
+                      inits  = xxx,
                       parameters.to.save = out_parameter,
                       model.file = filename,
                       working.directory = mcmc_options$result.folder,
@@ -2456,20 +2455,15 @@ nplcm_fit_Reg_NoNest <-
       if(file.exists(curr_data_txt_file)){file.remove(curr_data_txt_file)}
       dump(names(in_data.list), append = FALSE, envir = here,
            file = curr_data_txt_file)
-      ## fix dimension problem.... convert say .Dmi=7:6 to c(7,6) (an issue for templateBS_1):
-      bad_jagsdata_txt <- readLines(curr_data_txt_file)
-      good_jagsdata_txt <- gsub( ".Dim = ([0-9]+):([0-9]+)", ".Dim = c(\\1,\\2)", bad_jagsdata_txt,fixed = FALSE)
-      writeLines(good_jagsdata_txt, curr_data_txt_file)
-
-      # # fix dimension problem.... convert say 7:6 to c(7,6) (an issue for a dumped matrix):
-      # inits_fnames <- list.files(mcmc_options$result.folder,pattern = "^jagsinits[0-9]+.txt",
-      #                            full.names = TRUE)
-      # for (fiter in seq_along(inits_fnames)){
-      #   curr_inits_txt_file <- inits_fnames[fiter]
-      #   bad_jagsinits_txt <- readLines(curr_inits_txt_file)
-      #   good_jagsinits_txt <- gsub( "([0-9]+):([0-9]+)", "c(\\1,\\2)", bad_jagsinits_txt,fixed = FALSE)
-      #   writeLines(good_jagsinits_txt, curr_inits_txt_file)
-      # }
+
+      ## fixed some problems of JAGS 4.3.2 not having cut function; and I(a,b) functions weirdly, even though
+      ## the two elements are already constants (errors says that are not constant).
+      curr_model_txt_file <- file.path(mcmc_options$result.folder,model_bugfile_name)
+      bad_model_txt <- readLines(curr_model_txt_file)
+      good_model_txt <- gsub( "cut\\(", "(", bad_model_txt,fixed = FALSE)
+      good_model_txt <- gsub( "I\\(0\\.000001,0\\.999999\\)", " ", good_model_txt,fixed = FALSE)
+      writeLines(good_model_txt, curr_model_txt_file)
+
       if(is.null(mcmc_options$jags.dir)){mcmc_options$jags.dir=""}
       gs <- jags2_baker(data   = curr_data_txt_file,
                         inits  = in_init,
@@ -3222,10 +3216,16 @@ nplcm_fit_Reg_Nest <- function(data_nplcm,model_options,mcmc_options){
   if(file.exists(curr_data_txt_file)){file.remove(curr_data_txt_file)}
   dump(names(in_data.list), append = FALSE, envir = here,
        file = curr_data_txt_file)
-  ## fix dimension problem.... convert say .Dmi=7:6 to c(7,6) (an issue for templateBS_1):
-  bad_jagsdata_txt <- readLines(curr_data_txt_file)
-  good_jagsdata_txt <- gsub( ".Dim = ([0-9]+):([0-9]+)", ".Dim = c(\\1,\\2)", bad_jagsdata_txt,fixed = FALSE)
-  writeLines(good_jagsdata_txt, curr_data_txt_file)
+
+
+  ## fixed some problems of JAGS 4.3.2 not having cut function; and I(a,b) functions weirdly, even though
+  ## the two elements are already constants (errors says that are not constant).
+  curr_model_txt_file <- file.path(mcmc_options$result.folder,model_bugfile_name)
+  bad_model_txt <- readLines(curr_model_txt_file)
+  good_model_txt <- gsub( "cut\\(", "(", bad_model_txt,fixed = FALSE)
+  good_model_txt <- gsub( "I\\(0\\.000001,0\\.999999\\)", " ", good_model_txt,fixed = FALSE)
+  writeLines(good_model_txt, curr_model_txt_file)
+
   if(is.null(mcmc_options$jags.dir)){mcmc_options$jags.dir=""}
   gs <- jags2_baker(data   = curr_data_txt_file,
                     inits  = in_init,