Add files via upload

OuhscBbmc · Feb 5, 2024 · 41cdf2c · 41cdf2c
1 parent d03bdc7
commit 41cdf2c
Showing 1 changed file with 136 additions and 0 deletions.
diff --git a/R/random_rpql.R b/R/random_rpql.R
@@ -0,0 +1,136 @@
+
+#' Title
+#'
+#' random rPQL
+#'@description
+#'
+#'random_rpql() allows the user to do variable selection for fixed effect and random effect in GLMMs model by using rpql(Hui,et al,2017) and random lasso(Wang, et al., 2011)
+#' @usage random_rpql(data,q1_fix,q1_random,bt1,fixed_effect,random_effect,ycol,id_vector,no_scale,cluster_id,within_cluster_id,family="gaussian",lam1,verbose=T,ci_criteria1=5,pen.type1="adl",random_draw_sample1=0.5,q2_fix,q2_random,bt2,lam2,pen.type2="adl",ci_criteria2=5,random_draw_sample2=0.5,use_lmer=F,use_fre_fun1=T)
+#' @return variable importance plots and tables for fixed effects and random effects
+#' @param data dataset without missing data
+#' @param q1_fix number of fixed effects used in each bootstrapping step in first step of random rpql
+#' @param q1_random number of random effects used in each bootstrapping step in first step of random rpql
+#' @param bt1  number of bootstrapping in first step of random rpql
+#' @param fixed_effect candidate variables used to fixed effects
+#' @param random_effect candidate variables used to random effects
+#' @param ycol  outcome variables
+#' @param id_vector column names for groups (cross-sectional design)/participants id(for longitudinal design) and within-units id
+#' @param no_scale  variable names of discrete variables
+#' @param cluster_id variable name for participants id/group id
+#' @param within_cluster_id variable name for time/id for participant within each unit(participant/group)
+#' @param family The distribution for the outcome in GLMM.  Supported arguments include: gaussian(), binomial().
+#' @param lam1 a vector for tuning parameters used in rPQL for each bootstrapping step in first step
+#' @param ci_criteria1 criteria to choose the optimal tuning parameters for step 1, details refers to Hui,et al,2017
+#' @param pen.type1 the type of penalty used in random rpql first step. supported argument include: "lasso" for standard lasso(Tibshirani,1996),"adl"for adaptive lasso(Zou,2006)
+#' @param random_draw_sample1 the proportion sample used in each bootstrapping in first step
+#' @param q2_fix  number of fixed effects used in each bootstrapping step in second step of random rpql
+#' @param q2_random number of random effects used in each bootstrapping step in second step of random rpql
+#' @param bt2 number of bootstrapping in second step of random rpql
+#' @param lam2 a vector for tuning parameters used in rPQL for each bootstrapping step in first step
+#' @param pen.type2 the type of penalty used in random rpql second step
+#' @param ci_criteria2 criteria to choose the optimal tuning parameters for step 2, details refers to Hui,et al,2017
+#' @param random_draw_sample2 the proportion sample used in each bootstrapping in second step
+#' @param use_lmer if pen.type2 is true, whether use lmer to derived the weights of penalty. if use_lmer=T, using lmer to calculate weights for "adl". otherwise, the weights for "adl" are calculated by the importance index obtained by first step
+#' @param use_fre_fun1 whether used frequency list to calculate importance index
+#' @param verbose show the running details
+#' @import rpql
+#' @import dplyr
+#' @import lme4
+#' @import purrr
+#' @import gtools
+#' @import PlackettLuce
+#' @import data.table
+#' @import tibble
+#' @importFrom methods new
+#' @importFrom stats coef df na.omit setNames
+
+
+
+#'
+#'
+#' @examples
+#' \dontrun{
+#' id_vector<- c("participant_id", "wave")
+#'
+#' no_scale <- c("participant_id", "wave")
+#' cluster_id="participant_id"
+#' within_cluster_id="wave"
+#' ycol <- "y"
+#' family<-"gaussian"
+#' lam1=exp(seq(from=log(1e-4),to=log(1e-8),length.out=100))
+#' lam2=exp(seq(from=log(1e-1),to=log(1e-8),length.out=100))
+#'  random_rpql(data,q1_fix=4,q1_random=3,bt1=200,fixed_effect,random_effect,ycol,id_vector,no_scale,cluster_id,within_cluster_id,family="gaussian",lam1,verbose=T,ci_criteria1=5,pen.type1="adl",random_draw_sample1=0.5,q2_fix=4,q2_random=3,bt2=200,lam2,pen.type2="adl",ci_criteria2=4,random_draw_sample2=0.5,use_lmer=F,use_fre_fun1=T)
+#'  }
+#' @references
+#' Hui, F. K. C., Mueller, S., and Welsh, A.H. (2017). Hierarchical Selection of Fixed and Random Effects in Generalized Linear Mixed Models. Statistica Sinica, 27, 501-518.
+#' Wang, S., Nan, B., Rosset, S., & Zhu, J. (2011). Random lasso. The annals of applied statistics, 5(1), 468.
+#'
+#' @return
+#' @export
+
+random_rpql=
+  function(data,q1_fix,q1_random,bt1,
+           fixed_effect,random_effect,
+           ycol,id_vector,no_scale,cluster_id,
+           within_cluster_id,family="gaussian",lam1,
+           verbose=T,ci_criteria1=5,pen.type1="adl",random_draw_sample1=0.5,
+           q2_fix,q2_random,bt2,lam2,pen.type2="adl",ci_criteria2=5,
+           random_draw_sample2=0.5,use_lmer=F,use_fre_fun1=T
+  ){
+    options(warn=-1)
+    d=data_change_format(data,fixed_effect,random_effect,ycol,cluster_id,time=within_cluster_id)
+    df=d[[1]]
+    fixed_effect_name=d[[2]]
+    random_effect_name=d[[3]]
+    fixed_effect_n=d[[2]][,2]
+    random_effect_n=d[[3]][,2]
+
+
+    res1=random_rpql_com1 (q1_fix=q1_fix, # number of fixed effect used in each bootscrap step
+                           q1_random=q1_random,# number of random effect used in each bootscrap step
+                           bt=bt1, # time of bootscrapping
+                           data=df,
+                           fixed_effect_n,
+                           random_effect_n,
+                           ycol,
+                           id_vector,
+                           no_scale,
+                           cluster_id,
+                           time=within_cluster_id,
+                           family =family,
+                           lam=lam1,
+                           verbose,
+                           ci_criteria=ci_criteria1,# select whether ci used to select the optimal model
+                           pen.type=pen.type1,
+                           random_draw_sample =random_draw_sample1 #new: manipulate how much sample size of id to draw, if it is 1 means use the orginal ss
+    )
+
+
+    res2=random_rpql_com2(q2_fix = q2_fix,
+                          q2_random=q2_random,
+                          bt=bt2,
+                          data=df,
+                          fixed_effect_n,
+                          random_effect_n,
+                          ycol,
+                          id_vector,
+                          no_scale,
+                          cluster_id,
+                          time=within_cluster_id,
+                          family = family,
+
+                          pen.type=pen.type2,
+                          result=res1,
+                          lam=lam2,
+                          verbose = TRUE,
+                          random_draw_sample = random_draw_sample2,
+                          ci_criteria=ci_criteria2,
+                          use_lmer,# whether the weights of ALasso from lmer
+                          use_fre_fun1) # use the frequency list to calculate the important index from function1}
+
+
+    rk=change_rank(res2=res2,fixed_effect=fixed_effect_n,random_effect=random_effect_n)
+    p=rp_change_fun(rank=rk,fixed_effect=fixed_effect_n,random_effect=random_effect_n,
+                    fixed_effect_name=fixed_effect_name,random_effect_name=random_effect_name)
+
+    return(p)}