👽 ci col names: ages->data,dff->derivative,ci_low/high->lower/upper

WillForan · WillForan · commit 14d5a7ce814a · 2024-01-04T22:23:57.000-05:00
* is_sig now boolean instead of returning dataframe
* print for variables that are centered
* remove old dead code (simdiff1 funcs, clip_on_sig)
diff --git a/R/growthrate_gam.R b/R/growthrate_gam.R
@@ -15,7 +15,7 @@
 #' @importFrom gratia derivatives
 #' @export
 #' @examples
-#'  d <- read.csv("/Volumes/Phillips/R03_BehavioralBTC/data/btc_R03scoredmeasures_20190313.csv") %>%
+#'  d <- read.csv("tests/btc_r03_scored.csv") %>%
 #'    group_by(id) %>%
 #'    mutate(visit=rank(d8))
 #'  f <- f1score ~ s(Ageatvisit) + s(visit) + s(id, bs="re")
@@ -24,7 +24,7 @@
 #'  m <- gamm4::gamm4(f, data=d)
 #'  ci <- gam_growthrate(m, 'Ageatvisit', 'id')
 gam_growthrate <- function(m, agevar, idvar=NULL, qnt=.975) { # TODO: interval_inc
-  m <- gam_extract_model(m) # gam in list, mgcv not
+  m <- gam_extract_model(m) # gam 'model' is within list, mgcv has model at top
 
   varying_list <- list(.1); names(varying_list) <- agevar # eg. list(age=.1)
   age_step_df <- gen_predict_df(m, varying_list)
@@ -37,6 +37,11 @@ gam_growthrate <- function(m, agevar, idvar=NULL, qnt=.975) { # TODO: interval_i
                                level=qnt,
                                type="backward", # NA was first in original code
                                data=age_step_df)
+ # f1deriv
+ #   smooth        var        by_var fs_var  data derivative     se  crit lower upper
+ #   <chr>         <chr>      <chr>  <chr>  <dbl>      <dbl>  <dbl> <dbl> <dbl> <dbl>
+ # 1 s(Ageatvisit) Ageatvisit <NA>   <NA>    8.01      0.374 0.0615  3.27 0.173 0.575
+ # 2 s(Ageatvisit) Ageatvisit <NA>   <NA>    8.11      0.374 0.0615  3.27 0.173 0.57
   #names(f1deriv) <- c("smooth","term","ages","mean_dff","se","crit","ci_low","ci_high")
                    # c("smooth", "var", "by_var", "fs_var", "data", "derivative","se", "crit", "lower", "upper")
   return(f1deriv)
@@ -81,15 +86,19 @@ gen_predict_df <- function(m, varying=list(age=.1)){
       if(is.null(colname)) next
       #colclass <- attr(terms(m),'dataClass')[colname])
       colclass <- class(m$model[,colname])
-      if("numeric" %in% colclass) {
+      if(any(c("integer","numeric") %in% colclass)) {
           center <- mean(m$model[,colname],na.rm=T)
+          # ugly to print something for each column
+          # but likely this doing something unintended
+          cat(glue::glue("# column '{colname}' (type '{colclass}') centered at {center}"),"\n")
       } else {
          center <- m$model[middle_idx,colname]
-         warning(sprintf("set all %s=%s (center %s@%.2f of '%s' type)",
-                         colname, center, median_of,median_of_first, colclass))
+         warning(sprintf("non-numeric '%s' column! set all %s=%s (closet to center %s@%.2f)",
+                         colclass, colname, center, median_of,median_of_first))
       }
       pred_at_df[,colname] <- center
    }
+   return(pred_at_df)
 }
 
 
@@ -123,137 +132,18 @@ find_covars_gam <- function(fml, ...) {
    return(no_re)
 }
 
-sim_diff1_from_gam <- function(m, agevar, idvar=NULL,
-                               n.iterations=10000, interval_inc=.1) {
-   v <- m$model[, agevar]
-   cond_list <- list(seq(min(v), max(v), by=interval_inc))
-   pp <- data.frame(a=cond_list[[1]], b=Inf)
-   # names should match what went into the model
-   names(pp) <- c(agevar, idvar)
-
-   # what if idvar is factor (Inf wont work)
-   if (is.null(idvar)) {
-      # do nothing. no idvar
-   } else if (is.factor(m$model[, idvar])){
-      # select idvar with the middle most random effect
-      # random effects are coefficents like s(idvar).xxxxx
-      # where xxxx is the index of the specific idvar factor name
-      idvarpatt <- sprintf("s\\(%s\\)", idvar)
-      idvarpatt. <- sprintf("s\\(%s\\).", idvar)
-      randeff <- m$coefficients[ grep(idvarpatt, names(m$coefficients)) ]
-      medval <- sort(randeff)[floor(length(randeff)/2)]
-      med_re_name <- names(which(randeff == medval))
-      median_idx <- gsub(idvarpatt., "", med_re_name)
-      median_subj <- levels(m$model[, idvar])[as.numeric(median_idx)]
-      warning("gam w/factor idvar, ",
-              "setting the middle most random effect subject: ",
-              median_subj)
-      pp[, 2] <- median_subj
-
-      # alternatively, select the first
-      # pp[, 2] <- m$model[1, idvar]
-   } else {
-     warning("predition with continous (non-factor) idvar will give 'Inf' fit")
-     # maybe pick middle value instead?
-     # pp[, 2] <- mean(m$model[, idvar], na.rm=T)
-   }
-
-   # for all covars, pick out the mean
-   for (cv in find_covars_gam(m$formula, agevar)) {
-      x <- m$model[, cv]
-      if (is.character(x) || is.factor(x) ){
-         warning("gam w/factor covar, setting all sim to the first!")
-         y <- x[1]
-         # TODO: maybe pracma::Mode ?
-      } else {
-          y <- mean(x, na.rm=T)
-      }
-      pp[, cv] <- y
-   }
-
-   Xp <- predict(m, pp, type="lpmatrix")
-
-   mu_beta <- coef(m)
-   sigma_Vb <- vcov(m)
-   # variance-covariance matrix of the main parameters  fitted model
-   # used as: a positive-definite symmetric matrix specifying
-   #  the covariance matrix of the variables.
-
-   # set.seed(10)
-   mrand <- MASS::mvrnorm(n.iterations, mu_beta, sigma_Vb)
-
-   # gamm$gam doesn't support 'family'
-   # class( mgcv::gamm(data=mtcars, cyl ~ mpg + s(wt) )$gam)
-   # [1] "gam"
-   # class( mgcv::gam(data=mtcars, cyl ~ mpg + s(wt) ))
-   # [1] "gam" "glm" "lm"
-   if("glm" %in% class(m)){
-      ilink <- family(m)$linkinv
-   } else {
-      ilink <- m$family$linkinv
-   }
-
-   # only want inetercept and agevar
-   keep_cols <- grep(paste0("Intercept|", agevar), dimnames(Xp)[[2]], value=T)
-   Xp_agevar <- Xp[, keep_cols]
-   mrand_agevar <- mrand[, keep_cols]
-
-   # generate a whole bunch of plausable values, get the diff
-   diffs <- lapply(seq_len(n.iterations), function(i)  {
-                  fit <- ilink(Xp_agevar %*% mrand_agevar[i, ])
-                  dff <- c(NA, diff(fit))
-                  return(dff)
-                 })
-
-   return(list(pred=diffs, ages=pp[, 1], fit=predict(m, pp)))
-}
-
-ci_from_simdiff1 <- function(pred, ages, qnt=c(.025, .975)) {
-
-   names(pred) <- 1:length(pred)
-   mm <- t(dplyr::bind_rows(pred))
-
-   # this is the ouptut !
-   mean_dff <- apply(mm, 2, mean)
-   ci <- apply(mm, 2, quantile, qnt, na.rm=T)
-   colnames(ci) <- ages
-   out <- data.frame(mean_dff=mean_dff, ages=ages)
-   ci_out <- t(ci)
-   dimnames(ci_out)[[2]] <- c("ci_low", "ci_high")
-   return(cbind(out, ci_out))
-
-   # NEVER REACHED -- left as bad documentation
-   # old: return just ci and mean_dff
-   return(list(ci=ci, mean_dff=mean_dff))
-
-   # this is for fun
-   ages[which.min(ci[1, ])]
-   ages[which.min(ci[2, ])]
-
-   plot(ages, mean_dff)
-   for (i in 1:10) lines(ages, pred[[i]])
-}
-
 too_small <- function(x) abs(x) < 10^-15
-clip_on_sig_old <- function(ci){
-  # if confidence interval includes zero
-  # signs of x and y will be different, -x * +y  < 0
-  # or if both high and low are extremly close to zero
-  not_sig <- ci$ci_low * ci$ci_high < 0 |
-             (too_small(ci$ci_low) & too_small(ci$ci_high))
-  ci$mean_dff_clip <- ci$mean_dff # TODO MATCH DERIVE NAME HERE
-  ci$mean_dff_clip[not_sig] <- 0
-  return(ci)
-}
-clip_on_sig <- function(ci){ # 20231205 now for gratia
+#' add sig column:  0 not significant; 1 is signficiant
+is_sig <- function(ci){ # 20231205 now for gratia
   # if confidence interval includes zero
   # signs of x and y will be different, -x * +y  < 0
   # or if both high and low are extremly close to zero
   not_sig <- ci$lower * ci$upper < 0 |
     (too_small(ci$lower) & too_small(ci$upper))
-  ci$sig <- 1
-  ci$sig[not_sig] <- 0
-  return(ci)
+  return(!not_sig)
+  #ci$sig <- 1
+  #ci$sig[not_sig] <- 0
+  #return(ci)
 }
 
 
@@ -263,21 +153,22 @@ clip_on_sig <- function(ci){ # 20231205 now for gratia
 #' @param ci     growthrate_gam output (confidence interval and derivitive)
 #' @export
 gam_maturation_point <- function(ci) {
+  # ci has cols: smooth, var, data, derivative, se, crit, lower, upper
 
   # when ci bounds include 0 (different sign), no longer signficant
   # clip out insignificant derivitive
-  if (is.na(ci$ci_low[1])) ci <- ci[-1, ]
+  if (is.na(ci$lower[1])) ci <- ci[-1, ]
 
   # get mean_df_clip column
-  if (! "mean_dff_clip" %in% names(ci)) ci <- clip_on_sig(ci)
+  if (! "sig" %in% names(ci)) ci$sig <- is_sig(ci)
 
   # find maturation point after the first signficant age
-  onset_sig <- ci$ages[ci$mean_dff_clip != 0]
+  onset_sig <- ci$data[ci$sig]
   maturation_pnt <- NA
   if (length(onset_sig)>0L && !all(is.na(onset_sig))) {
-     mat_points_idx <- ci$mean_dff_clip==0 & ci$ages > onset_sig[1]
+     mat_points_idx <- !ci$sig & ci$data > onset_sig[1]
      if (length(mat_points_idx) > 0L && any(mat_points_idx))
-        maturation_pnt <- min(ci$ages[mat_points_idx], na.rm=T)
+        maturation_pnt <- min(ci$data[mat_points_idx], na.rm=T)
   }
 
   return(maturation_pnt)
@@ -341,9 +232,8 @@ gam_growthrate_plot <-
   if (! "data.frame" %in% class(ci) ) stop("ci is not growthrate_gam() output")
   if (! yvar %in% names(model$model) ) stop(yvar, "not in model dataframe!")
 
-  ci$mean_dff_clip <- ci$mean_dff # derivative # CHANGE HERE
   # when ci bounds include 0 (different sign), no longer signficant
-  ci <- clip_on_sig(ci)
+  ci$sig <- is_sig(ci)
   maturation_pnt <- gam_maturation_point(ci)
 
   # warn about no matruation point
@@ -353,13 +243,19 @@ gam_growthrate_plot <-
   }
 
   # show even unsignficant change in raster if show_all_fill
-  fill_column <- ifelse(show_all_fill, "mean_dff", "mean_dff_clip")
+  # was mean_dff vs mean_dff_clip, now derivative or clip (TODO: not ci$data?)
+  fill_column <- "derivative"
+  if(!show_all_fill) {
+      fill_column <- "deriv_clip"
+      ci$deriv_clip <- ci$derivative
+      ci$deriv_clip[!ci$sig] <- 0 # should be NA? will plot as grey in tile
+   }
 
   ## setup derivitive raster plot
-  deriv_range <- range(ci$mean_dff, na.rm=T)
+  deriv_range <- range(ci[,fill_column], na.rm=T)
   tile <-
      ggplot(ci[-1, ]) + # don't plot first row (is NA)
-     aes(x=ages, y=1, fill=!!sym(fill_column)) +
+     aes(x=data, y=1, fill=!!sym(fill_column)) +
      geom_raster(interpolate=TRUE) +
      scale_fill_gradient2(
         low = "blue", mid = "white", high = "red",
@@ -382,7 +278,7 @@ gam_growthrate_plot <-
 
   # predictions
   modeldata<-data.frame(ydata=model$y, agevar=model$model[, agevar])
-  condlist <- list(a=ci$ages)
+  condlist <- list(a=ci$data)
   names(condlist) <- agevar
   # 20190826 BTC - remove random effects (bug fix)
   agepred <- itsadug::get_predictions(model, cond = condlist, rm.ranef=TRUE)
@@ -484,7 +380,7 @@ gam_plot_raster_theme <- function(...) {
     legend.position  = "none", ...)
 }
 
-#' gam_extract_model papers over gamm4 and mgcv gam models
+#' gam_extract_model abstract over gamm4 and mgcv gam models
 #' @param m  either a mgcv::gam or a gamm4::gamm4 model
 #' @return model componet (gamm4$gam) or pass through
 gam_extract_model <- function(m) {