12
12
# ' @param nnumber of iterations to run (quick)
13
13
# ' @param qntquantiles to use for confidence interval
14
14
# ' @importFrom MASS mvrnorm
15
+ # ' @importFrom gratia derivatives
15
16
# ' @export
16
17
# ' @examples
17
18
# ' d <- read.csv("/Volumes/Phillips/R03_BehavioralBTC/data/btc_R03scoredmeasures_20190313.csv") %>%
20
21
# ' f <- f1score ~ s(Ageatvisit) + s(visit) + s(id, bs="re")
21
22
# ' m <- mgcv::gam(f, data=d)
22
23
# ' ci <- gam_growthrate(m, 'Ageatvisit', 'id')
23
- gam_growthrate <- function (m , agevar , idvar = NULL , n.iterations = 10000 , qnt = c(.025 , .975 )) {
24
- simdiff <- sim_diff1_from_gam(m , agevar , idvar , n.iterations = n.iterations )
25
- ci <- ci_from_simdiff1(simdiff $ pred , simdiff $ ages , qnt = qnt )
26
- ci $ fit <- simdiff $ fit
27
- return (ci )
24
+ # ' m <- gamm4::gamm4(f, data=d)
25
+ # ' ci <- gam_growthrate(m, 'Ageatvisit', 'id')
26
+ gam_growthrate <- function (m , agevar , idvar = NULL , qnt = .975 ) { # TODO: interval_inc
27
+ m <- gam_extract_model(m ) # gam in list, mgcv not
28
+
29
+ varying_list <- list (.1 ); names(varying_list ) <- agevar # eg. list(age=.1)
30
+ age_step_df <- gen_predict_df(m , varying_list )
31
+
32
+ # gamm4 'from' must be a finite number; mgcv: random effect lunaid not found
33
+ f1deriv <- gratia :: derivatives(m ,
34
+ term = agevar ,
35
+ partial_match = TRUE , # BTC: agrep?, doesn't matter here
36
+ interval = " simultaneous" ,
37
+ level = qnt ,
38
+ type = " backward" , # NA was first in original code
39
+ data = age_step_df )
40
+ # names(f1deriv) <- c("smooth","term","ages","mean_dff","se","crit","ci_low","ci_high")
41
+ # c("smooth", "var", "by_var", "fs_var", "data", "derivative","se", "crit", "lower", "upper")
42
+ return (f1deriv )
43
+ }
44
+
45
+ # ' gen_predict_df dataframe of model with variable(s) in fixed steps. use for prediction or model plotting
46
+ # ' @param m gam model
47
+ # ' @param varying list(coluname=stepsize, col2=step2, ...) default list(age=.1)
48
+ # ' @export
49
+ gen_predict_df <- function (m , varying = list (age = .1 )){
50
+ have_varying <- names(varying ) %in% names(m $ model )
51
+ if (! all(have_varying )) {
52
+ cat(" MISSING: not in model but want to vary " ,
53
+ paste(collapse = " , " , names(varying )[have_varying ]),
54
+ " \n " )
55
+ stop(' creating prediciton df: all columns to vary must be in model!' )
56
+ }
57
+
58
+ # create dataframe with each step of each predition needed
59
+ # can get very large if many varying columns with small interval
60
+ vars_list <-
61
+ mapply(function (colname ,interval )
62
+ seq(min(m $ model [,colname ],na.rm = T ),
63
+ max(m $ model [,colname ],na.rm = T ),
64
+ by = interval ),
65
+ names(varying ), varying , SIMPLIFY = F )
66
+ pred_at_df <- do.call(expand.grid ,vars_list )
67
+
68
+ # add missing
69
+ lhs_terms <- labels(terms(m ))
70
+ center_terms <- setdiff(lhs_terms , names(varying ))
71
+
72
+ # for factors, use the row closest to the median of the first varying column
73
+ # TODO: euclidian distand of each varying median?
74
+ median_of <- names(varying )[1 ]
75
+ first_var_vals <- m $ model [,median_of ]
76
+ median_of_first <- median(first_var_vals )
77
+ middle_idx <- which.min(abs(median_of_first - first_var_vals ))
78
+
79
+ # intertiavely build each column by mean
80
+ for (colname in center_terms ){
81
+ if (is.null(colname )) next
82
+ # colclass <- attr(terms(m),'dataClass')[colname])
83
+ colclass <- class(m $ model [,colname ])
84
+ if (" numeric" %in% colclass ) {
85
+ center <- mean(m $ model [,colname ],na.rm = T )
86
+ } else {
87
+ center <- m $ model [middle_idx ,colname ]
88
+ warning(sprintf(" set all %s=%s (center %s@%.2f of '%s' type)" ,
89
+ colname , center , median_of ,median_of_first , colclass ))
90
+ }
91
+ pred_at_df [,colname ] <- center
92
+ }
28
93
}
29
94
30
95
@@ -170,16 +235,27 @@ ci_from_simdiff1 <- function(pred, ages, qnt=c(.025, .975)) {
170
235
}
171
236
172
237
too_small <- function (x ) abs(x ) < 10 ^- 15
173
- clip_on_sig <- function (ci ){
238
+ clip_on_sig_old <- function (ci ){
174
239
# if confidence interval includes zero
175
240
# signs of x and y will be different, -x * +y < 0
176
241
# or if both high and low are extremly close to zero
177
242
not_sig <- ci $ ci_low * ci $ ci_high < 0 |
178
243
(too_small(ci $ ci_low ) & too_small(ci $ ci_high ))
179
- ci $ mean_dff_clip <- ci $ mean_dff
244
+ ci $ mean_dff_clip <- ci $ mean_dff # TODO MATCH DERIVE NAME HERE
180
245
ci $ mean_dff_clip [not_sig ] <- 0
181
246
return (ci )
182
247
}
248
+ clip_on_sig <- function (ci ){ # 20231205 now for gratia
249
+ # if confidence interval includes zero
250
+ # signs of x and y will be different, -x * +y < 0
251
+ # or if both high and low are extremly close to zero
252
+ not_sig <- ci $ lower * ci $ upper < 0 |
253
+ (too_small(ci $ lower ) & too_small(ci $ upper ))
254
+ ci $ sig <- 1
255
+ ci $ sig [not_sig ] <- 0
256
+ return (ci )
257
+ }
258
+
183
259
184
260
# ' gam_maturation_point
185
261
# '
@@ -224,6 +300,8 @@ gam_maturation_point <- function(ci) {
224
300
# ' @param draw_points T|F, show individual points as scatter plot over gam fit line
225
301
# ' @param show_all_fill T|F, should we clip the raster fill to only significant ages?
226
302
# ' @param ci_plot T|F, plot 95 percent confidence interval with geom_ribbon?
303
+ # ' @param theme_func what theme to apply, default theme_bw. also see 'gam_plot_raster_theme'
304
+ # ' @param font_size, size of
227
305
# ' @export
228
306
# ' @importFrom itsadug get_predictions
229
307
# ' @examples
@@ -248,7 +326,7 @@ gam_growthrate_plot <-
248
326
yvar = as.character(model $ formula [2 ]),
249
327
plotsavename = NA , xplotname = " Age" , yplotname = yvar ,
250
328
draw_maturation = T , draw_points = T , show_all_fill = F ,
251
- ci_plot = T ){
329
+ ci_plot = T , theme_func = theme_bw , fontsize = 36 ){
252
330
253
331
require(ggplot2 )
254
332
require(itsadug )
@@ -263,7 +341,7 @@ gam_growthrate_plot <-
263
341
if (! " data.frame" %in% class(ci ) ) stop(" ci is not growthrate_gam() output" )
264
342
if (! yvar %in% names(model $ model ) ) stop(yvar , " not in model dataframe!" )
265
343
266
- ci $ mean_dff_clip <- ci $ mean_dff
344
+ ci $ mean_dff_clip <- ci $ mean_dff # derivative # CHANGE HERE
267
345
# when ci bounds include 0 (different sign), no longer signficant
268
346
ci <- clip_on_sig(ci )
269
347
maturation_pnt <- gam_maturation_point(ci )
@@ -295,14 +373,12 @@ gam_growthrate_plot <-
295
373
# draw dotted line where maturation point is
296
374
if (draw_maturation )
297
375
tile <- tile +
298
- geom_segment(
376
+ geom_segment( # TODO: modify NA to clear grey as option
299
377
linetype = 2 , colour = " black" ,
300
378
aes(x = maturation_pnt , xend = maturation_pnt , y = .5 , yend = 1.5 ))
301
379
302
- # lunaize the figure
303
- tile_luna <- lunaize_geomraster(tile ) +
304
- theme(text = element_text(size = 36 ))
305
-
380
+ # styleize theme
381
+ tile_themed <- tile + theme_func() + gam_plot_raster_theme(text = element_text(size = fontsize ))
306
382
307
383
# predictions
308
384
modeldata <- data.frame (ydata = model $ y , agevar = model $ model [, agevar ])
@@ -315,7 +391,7 @@ gam_growthrate_plot <-
315
391
ggplot(agepred ) +
316
392
aes(x = !! sym(agevar ), y = fit ) +
317
393
# solid bold line for fitted model
318
- geom_line(colour = " black" , linewidth = 2 ) +
394
+ geom_line(colour = " black" , linewidth = 2 ) + # TODO: change me
319
395
# label plot
320
396
ylab(yplotname ) +
321
397
xlab(xplotname )
@@ -335,15 +411,15 @@ gam_growthrate_plot <-
335
411
geom_line(data = d , aes(y = !! sym(yvar ), group = !! sym(idvar )), alpha = .2 )
336
412
337
413
# lunaize main plot
338
- ageplot_luna <- LNCDR :: lunaize( ageplot ) +
339
- theme( text = element_text(size = 36 ),
414
+ ageplot_luna <- ageplot + theme_func() + theme(
415
+ text = element_text(size = fontsize ),
340
416
axis.title.x = element_blank(),
341
417
axis.text.x = element_blank())
342
418
343
419
# save to file if we have plotsavename
344
- g <- gam_growthrate_plot_combine(ageplot_luna , tile_luna , plotsavename )
420
+ g <- gam_growthrate_plot_combine(ageplot_luna , tile_themed , plotsavename )
345
421
346
- list_of_plots <- list (tile = tile_luna , ageplot = ageplot_luna , both = g )
422
+ list_of_plots <- list (tile = tile_themed , ageplot = ageplot_luna , both = g )
347
423
# give back everything we created
348
424
return (list_of_plots )
349
425
}
@@ -396,14 +472,86 @@ gam_growthrate_plot_combine <- function(ageplot_luna, tile_luna, PDFout=NA) {
396
472
return (g )
397
473
}
398
474
399
- lunaize_geomraster <- function ( x ){
400
- x +
401
- theme_bw() +
402
- theme(
475
+ # ' gam_plot_raster_theme removes panel and axis ticks and legend
476
+ # ' @export
477
+ gam_plot_raster_theme <- function ( ... ) {
478
+ theme(
403
479
panel.grid.minor = element_blank(),
404
480
panel.grid.major = element_blank(),
405
481
axis.title.y = element_blank(),
406
482
axis.ticks.y = element_blank(),
407
483
axis.text.y = element_blank(),
408
- legend.position = " none" )
484
+ legend.position = " none" , ... )
485
+ }
486
+
487
+ # ' gam_extract_model papers over gamm4 and mgcv gam models
488
+ # ' @param m either a mgcv::gam or a gamm4::gamm4 model
489
+ # ' @return model componet (gamm4$gam) or pass through
490
+ gam_extract_model <- function (m ) {
491
+ # class(mgcv::gam(...)) # "gam" "glm" "lm"
492
+ if (" gam" %in% class(m )) return (m )
493
+
494
+ # m <- gamm4::gamm4(data=d, random=~(1|lunaid), value ~ s(age,k=5) + hemi)
495
+ # class(gamm4::gamm4(...)) #
496
+ if (" list" %in% class(m ) && " gam" %in% names(m )) return (m $ gam )
497
+
498
+ if (! " model" %in% names(m )) stop(" input model not gam, does not have model, cannot use!" )
499
+ return (m )
500
+ }
501
+
502
+ mgcvgampreddata <- function (model ,predvar ,idvar = NULL ,interval_inc = .1 ,varycovarsname = NULL ,varycovarlevels = NULL ){
503
+ # if (class(model)[1]=="gamm" || (class(model)=="list" && "gam" %in% class(model[['gam']]))){
504
+ # model<-model$gam
505
+ # }
506
+ # TODO:gamm4 vs mgcv
507
+ modeldata <- data.frame (ydata = model $ y , predvar = model $ model [, predvar ])
508
+ preddata <- data.frame (var = seq(min(modeldata $ predvar ,na.rm = T ),
509
+ max(modeldata $ predvar ,na.rm = T ),
510
+ by = interval_inc ))
511
+ names(preddata )[1 ]<- predvar
512
+ if (! identical(find_covars_gam(model $ formula , predvar ),character (0 ))){
513
+ # if (!(length(find_covars_gam(model$formula, predvar))==1 && find_covars_gam(model$formula, predvar)[1]==varycovarsname)){
514
+ for (cv in find_covars_gam(model $ formula ,predvar )){
515
+ x <- model $ model [, cv ]
516
+ if (is.character(x ) || is.factor(x )){
517
+ warning(" gam w/character or factor covar, setting all sim to the first obs for character/first level if factor" )
518
+ y <- x [1 ]
519
+ if (class(x )== " factor" ){y <- levels(x )[1 ]}
520
+ print(sprintf(" covar % set to level %s" ,cv ,y ))
521
+ } else {
522
+ y <- mean(x , na.rm = T )
523
+ }
524
+ preddata [, cv ] <- y
525
+ }
526
+ # }
527
+ }else if (is.null(varycovarsname )){
528
+ preddata $ cov <- " no cov"
529
+ }
530
+
531
+ # if (!(length(find_covars_gam(model$formula, predvar))==1 && find_covars_gam(model$formula, predvar)[1]==varycovarsname)){
532
+ names(preddata )<- c(predvar ,find_covars_gam(model $ formula , predvar ))
533
+ # }
534
+ if (identical(find_covars_gam(model $ formula , predvar ),character (0 )) && is.null(varycovarsname )){
535
+ names(preddata )<- c(predvar ," nullcovar" )
536
+ }
537
+
538
+ if (! is.null(varycovarsname )){
539
+ require(reshape )
540
+ orignameorder <- names(preddata )
541
+ preddata [,varycovarsname ]<- NULL
542
+ preddata <- reshape :: expand.grid.df(preddata ,data.frame (varycovar = varycovarlevels ))
543
+ names(preddata )[names(preddata )== " varycovar" ]<- varycovarsname
544
+ # preddata<-preddata[,orignameorder]
545
+ }
546
+
547
+ yhats <- predict(model ,preddata ,se.fit = TRUE )
548
+ preddata <- cbind(preddata ,yhats $ fit ,yhats $ se.fit )
549
+ names(preddata )<- c(predvar ,find_covars_gam(model $ formula , predvar )," fit" ," se" )
550
+ if (identical(find_covars_gam(model $ formula , predvar ),character (0 )) && is.null(varycovarsname )){
551
+ names(preddata )<- c(predvar ," nullcovar" ," fit" ," se" )
552
+ }else if (identical(find_covars_gam(model $ formula , predvar ),character (0 )) && ! is.null(varycovarsname )){
553
+ names(preddata )<- c(predvar ,varycovarsname ," fit" ," se" )
554
+ }
555
+ preddata $ CI <- 2 * preddata $ se
556
+ return (preddata )
409
557
}
0 commit comments