Skip to content

Commit 572c2af

Browse files
committed
🚧 R/growthrate_gam.R using gratia
1 parent fbd5103 commit 572c2af

File tree

1 file changed

+172
-24
lines changed

1 file changed

+172
-24
lines changed

R/growthrate_gam.R

Lines changed: 172 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#' @param nnumber of iterations to run (quick)
1313
#' @param qntquantiles to use for confidence interval
1414
#' @importFrom MASS mvrnorm
15+
#' @importFrom gratia derivatives
1516
#' @export
1617
#' @examples
1718
#' d <- read.csv("/Volumes/Phillips/R03_BehavioralBTC/data/btc_R03scoredmeasures_20190313.csv") %>%
@@ -20,11 +21,75 @@
2021
#' f <- f1score ~ s(Ageatvisit) + s(visit) + s(id, bs="re")
2122
#' m <- mgcv::gam(f, data=d)
2223
#' ci <- gam_growthrate(m, 'Ageatvisit', 'id')
23-
gam_growthrate <- function(m, agevar, idvar=NULL, n.iterations=10000, qnt=c(.025, .975)) {
24-
simdiff <- sim_diff1_from_gam(m, agevar, idvar, n.iterations=n.iterations)
25-
ci <- ci_from_simdiff1(simdiff$pred, simdiff$ages, qnt=qnt)
26-
ci$fit <- simdiff$fit
27-
return(ci)
24+
#' m <- gamm4::gamm4(f, data=d)
25+
#' ci <- gam_growthrate(m, 'Ageatvisit', 'id')
26+
gam_growthrate <- function(m, agevar, idvar=NULL, qnt=.975) { # TODO: interval_inc
27+
m <- gam_extract_model(m) # gam in list, mgcv not
28+
29+
varying_list <- list(.1); names(varying_list) <- agevar # eg. list(age=.1)
30+
age_step_df <- gen_predict_df(m, varying_list)
31+
32+
# gamm4 'from' must be a finite number; mgcv: random effect lunaid not found
33+
f1deriv <- gratia::derivatives(m,
34+
term=agevar,
35+
partial_match=TRUE, # BTC: agrep?, doesn't matter here
36+
interval="simultaneous",
37+
level=qnt,
38+
type="backward", # NA was first in original code
39+
data=age_step_df)
40+
#names(f1deriv) <- c("smooth","term","ages","mean_dff","se","crit","ci_low","ci_high")
41+
# c("smooth", "var", "by_var", "fs_var", "data", "derivative","se", "crit", "lower", "upper")
42+
return(f1deriv)
43+
}
44+
45+
#' gen_predict_df dataframe of model with variable(s) in fixed steps. use for prediction or model plotting
46+
#' @param m gam model
47+
#' @param varying list(coluname=stepsize, col2=step2, ...) default list(age=.1)
48+
#' @export
49+
gen_predict_df <- function(m, varying=list(age=.1)){
50+
have_varying <- names(varying) %in% names(m$model)
51+
if(!all(have_varying)) {
52+
cat("MISSING: not in model but want to vary ",
53+
paste(collapse=", ", names(varying)[have_varying]),
54+
"\n")
55+
stop('creating prediciton df: all columns to vary must be in model!')
56+
}
57+
58+
# create dataframe with each step of each predition needed
59+
# can get very large if many varying columns with small interval
60+
vars_list <-
61+
mapply(function(colname,interval)
62+
seq(min(m$model[,colname],na.rm=T),
63+
max(m$model[,colname],na.rm=T),
64+
by=interval),
65+
names(varying), varying, SIMPLIFY=F)
66+
pred_at_df <- do.call(expand.grid,vars_list)
67+
68+
# add missing
69+
lhs_terms <- labels(terms(m))
70+
center_terms <- setdiff(lhs_terms, names(varying))
71+
72+
# for factors, use the row closest to the median of the first varying column
73+
# TODO: euclidian distand of each varying median?
74+
median_of <- names(varying)[1]
75+
first_var_vals <- m$model[,median_of]
76+
median_of_first <- median(first_var_vals)
77+
middle_idx <- which.min(abs(median_of_first -first_var_vals))
78+
79+
# intertiavely build each column by mean
80+
for(colname in center_terms){
81+
if(is.null(colname)) next
82+
#colclass <- attr(terms(m),'dataClass')[colname])
83+
colclass <- class(m$model[,colname])
84+
if("numeric" %in% colclass) {
85+
center <- mean(m$model[,colname],na.rm=T)
86+
} else {
87+
center <- m$model[middle_idx,colname]
88+
warning(sprintf("set all %s=%s (center %s@%.2f of '%s' type)",
89+
colname, center, median_of,median_of_first, colclass))
90+
}
91+
pred_at_df[,colname] <- center
92+
}
2893
}
2994

3095

@@ -170,16 +235,27 @@ ci_from_simdiff1 <- function(pred, ages, qnt=c(.025, .975)) {
170235
}
171236

172237
too_small <- function(x) abs(x) < 10^-15
173-
clip_on_sig <- function(ci){
238+
clip_on_sig_old <- function(ci){
174239
# if confidence interval includes zero
175240
# signs of x and y will be different, -x * +y < 0
176241
# or if both high and low are extremly close to zero
177242
not_sig <- ci$ci_low * ci$ci_high < 0 |
178243
(too_small(ci$ci_low) & too_small(ci$ci_high))
179-
ci$mean_dff_clip <- ci$mean_dff
244+
ci$mean_dff_clip <- ci$mean_dff # TODO MATCH DERIVE NAME HERE
180245
ci$mean_dff_clip[not_sig] <- 0
181246
return(ci)
182247
}
248+
clip_on_sig <- function(ci){ # 20231205 now for gratia
249+
# if confidence interval includes zero
250+
# signs of x and y will be different, -x * +y < 0
251+
# or if both high and low are extremly close to zero
252+
not_sig <- ci$lower * ci$upper < 0 |
253+
(too_small(ci$lower) & too_small(ci$upper))
254+
ci$sig <- 1
255+
ci$sig[not_sig] <- 0
256+
return(ci)
257+
}
258+
183259

184260
#' gam_maturation_point
185261
#'
@@ -224,6 +300,8 @@ gam_maturation_point <- function(ci) {
224300
#' @param draw_points T|F, show individual points as scatter plot over gam fit line
225301
#' @param show_all_fill T|F, should we clip the raster fill to only significant ages?
226302
#' @param ci_plot T|F, plot 95 percent confidence interval with geom_ribbon?
303+
#' @param theme_func what theme to apply, default theme_bw. also see 'gam_plot_raster_theme'
304+
#' @param font_size, size of
227305
#' @export
228306
#' @importFrom itsadug get_predictions
229307
#' @examples
@@ -248,7 +326,7 @@ gam_growthrate_plot <-
248326
yvar=as.character(model$formula[2]),
249327
plotsavename=NA, xplotname="Age", yplotname=yvar,
250328
draw_maturation=T, draw_points=T, show_all_fill=F,
251-
ci_plot=T){
329+
ci_plot=T, theme_func=theme_bw, fontsize=36){
252330

253331
require(ggplot2)
254332
require(itsadug)
@@ -263,7 +341,7 @@ gam_growthrate_plot <-
263341
if (! "data.frame" %in% class(ci) ) stop("ci is not growthrate_gam() output")
264342
if (! yvar %in% names(model$model) ) stop(yvar, "not in model dataframe!")
265343

266-
ci$mean_dff_clip <- ci$mean_dff
344+
ci$mean_dff_clip <- ci$mean_dff # derivative # CHANGE HERE
267345
# when ci bounds include 0 (different sign), no longer signficant
268346
ci <- clip_on_sig(ci)
269347
maturation_pnt <- gam_maturation_point(ci)
@@ -295,14 +373,12 @@ gam_growthrate_plot <-
295373
# draw dotted line where maturation point is
296374
if (draw_maturation)
297375
tile <- tile +
298-
geom_segment(
376+
geom_segment( # TODO: modify NA to clear grey as option
299377
linetype=2, colour="black",
300378
aes(x=maturation_pnt, xend=maturation_pnt, y=.5, yend=1.5))
301379

302-
# lunaize the figure
303-
tile_luna <- lunaize_geomraster(tile) +
304-
theme(text = element_text(size=36))
305-
380+
# styleize theme
381+
tile_themed <- tile + theme_func() + gam_plot_raster_theme(text = element_text(size=fontsize))
306382

307383
# predictions
308384
modeldata<-data.frame(ydata=model$y, agevar=model$model[, agevar])
@@ -315,7 +391,7 @@ gam_growthrate_plot <-
315391
ggplot(agepred) +
316392
aes(x=!!sym(agevar), y=fit) +
317393
# solid bold line for fitted model
318-
geom_line(colour="black", linewidth=2) +
394+
geom_line(colour="black", linewidth=2) + # TODO: change me
319395
# label plot
320396
ylab(yplotname) +
321397
xlab(xplotname)
@@ -335,15 +411,15 @@ gam_growthrate_plot <-
335411
geom_line(data=d, aes(y=!!sym(yvar), group=!!sym(idvar)), alpha=.2)
336412

337413
# lunaize main plot
338-
ageplot_luna<-LNCDR::lunaize(ageplot)+
339-
theme(text = element_text(size=36),
414+
ageplot_luna<- ageplot + theme_func() + theme(
415+
text = element_text(size=fontsize),
340416
axis.title.x=element_blank(),
341417
axis.text.x=element_blank())
342418

343419
# save to file if we have plotsavename
344-
g <- gam_growthrate_plot_combine(ageplot_luna, tile_luna, plotsavename)
420+
g <- gam_growthrate_plot_combine(ageplot_luna, tile_themed, plotsavename)
345421

346-
list_of_plots <- list(tile=tile_luna, ageplot=ageplot_luna, both=g)
422+
list_of_plots <- list(tile=tile_themed, ageplot=ageplot_luna, both=g)
347423
# give back everything we created
348424
return(list_of_plots)
349425
}
@@ -396,14 +472,86 @@ gam_growthrate_plot_combine <- function(ageplot_luna, tile_luna, PDFout=NA) {
396472
return(g)
397473
}
398474

399-
lunaize_geomraster<-function(x){
400-
x+
401-
theme_bw()+
402-
theme(
475+
#' gam_plot_raster_theme removes panel and axis ticks and legend
476+
#' @export
477+
gam_plot_raster_theme <- function(...) {
478+
theme(
403479
panel.grid.minor = element_blank(),
404480
panel.grid.major = element_blank(),
405481
axis.title.y = element_blank(),
406482
axis.ticks.y = element_blank(),
407483
axis.text.y = element_blank(),
408-
legend.position = "none")
484+
legend.position = "none", ...)
485+
}
486+
487+
#' gam_extract_model papers over gamm4 and mgcv gam models
488+
#' @param m either a mgcv::gam or a gamm4::gamm4 model
489+
#' @return model componet (gamm4$gam) or pass through
490+
gam_extract_model <- function(m) {
491+
# class(mgcv::gam(...)) # "gam" "glm" "lm"
492+
if("gam" %in% class(m)) return(m)
493+
494+
# m <- gamm4::gamm4(data=d, random=~(1|lunaid), value ~ s(age,k=5) + hemi)
495+
# class(gamm4::gamm4(...)) #
496+
if("list" %in% class(m) && "gam" %in% names(m)) return(m$gam)
497+
498+
if(! "model" %in% names(m)) stop("input model not gam, does not have model, cannot use!")
499+
return(m)
500+
}
501+
502+
mgcvgampreddata<-function(model,predvar,idvar=NULL,interval_inc=.1,varycovarsname=NULL,varycovarlevels=NULL){
503+
# if (class(model)[1]=="gamm" || (class(model)=="list" && "gam" %in% class(model[['gam']]))){
504+
# model<-model$gam
505+
# }
506+
# TODO:gamm4 vs mgcv
507+
modeldata<-data.frame(ydata=model$y, predvar=model$model[, predvar])
508+
preddata<-data.frame(var=seq(min(modeldata$predvar,na.rm=T),
509+
max(modeldata$predvar,na.rm=T),
510+
by=interval_inc))
511+
names(preddata)[1]<-predvar
512+
if (!identical(find_covars_gam(model$formula, predvar),character(0))){
513+
# if (!(length(find_covars_gam(model$formula, predvar))==1 && find_covars_gam(model$formula, predvar)[1]==varycovarsname)){
514+
for (cv in find_covars_gam(model$formula,predvar)){
515+
x <- model$model[, cv]
516+
if (is.character(x) || is.factor(x)){
517+
warning("gam w/character or factor covar, setting all sim to the first obs for character/first level if factor")
518+
y <- x[1]
519+
if (class(x)=="factor"){y<-levels(x)[1]}
520+
print(sprintf("covar % set to level %s",cv,y))
521+
} else {
522+
y <- mean(x, na.rm=T)
523+
}
524+
preddata[, cv] <- y
525+
}
526+
#}
527+
}else if(is.null(varycovarsname)){
528+
preddata$cov<-"no cov"
529+
}
530+
531+
#if (!(length(find_covars_gam(model$formula, predvar))==1 && find_covars_gam(model$formula, predvar)[1]==varycovarsname)){
532+
names(preddata)<-c(predvar,find_covars_gam(model$formula, predvar))
533+
#}
534+
if (identical(find_covars_gam(model$formula, predvar),character(0)) && is.null(varycovarsname)){
535+
names(preddata)<-c(predvar,"nullcovar")
536+
}
537+
538+
if (!is.null(varycovarsname)){
539+
require(reshape)
540+
orignameorder<-names(preddata)
541+
preddata[,varycovarsname]<-NULL
542+
preddata<-reshape::expand.grid.df(preddata,data.frame(varycovar=varycovarlevels))
543+
names(preddata)[names(preddata)=="varycovar"]<-varycovarsname
544+
#preddata<-preddata[,orignameorder]
545+
}
546+
547+
yhats <- predict(model,preddata,se.fit=TRUE)
548+
preddata<-cbind(preddata,yhats$fit,yhats$se.fit)
549+
names(preddata)<-c(predvar,find_covars_gam(model$formula, predvar),"fit","se")
550+
if (identical(find_covars_gam(model$formula, predvar),character(0)) && is.null(varycovarsname)){
551+
names(preddata)<-c(predvar,"nullcovar","fit","se")
552+
}else if(identical(find_covars_gam(model$formula, predvar),character(0)) && !is.null(varycovarsname)){
553+
names(preddata)<-c(predvar,varycovarsname,"fit","se")
554+
}
555+
preddata$CI<-2*preddata$se
556+
return(preddata)
409557
}

0 commit comments

Comments
 (0)