Skip to content

Commit 14d5a7c

Browse files
committed
👽 ci col names: ages->data,dff->derivative,ci_low/high->lower/upper
* is_sig now boolean instead of returning dataframe * print for variables that are centered * remove old dead code (simdiff1 funcs, clip_on_sig)
1 parent 572c2af commit 14d5a7c

File tree

1 file changed

+38
-142
lines changed

1 file changed

+38
-142
lines changed

R/growthrate_gam.R

Lines changed: 38 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
#' @importFrom gratia derivatives
1616
#' @export
1717
#' @examples
18-
#' d <- read.csv("/Volumes/Phillips/R03_BehavioralBTC/data/btc_R03scoredmeasures_20190313.csv") %>%
18+
#' d <- read.csv("tests/btc_r03_scored.csv") %>%
1919
#' group_by(id) %>%
2020
#' mutate(visit=rank(d8))
2121
#' f <- f1score ~ s(Ageatvisit) + s(visit) + s(id, bs="re")
@@ -24,7 +24,7 @@
2424
#' m <- gamm4::gamm4(f, data=d)
2525
#' ci <- gam_growthrate(m, 'Ageatvisit', 'id')
2626
gam_growthrate <- function(m, agevar, idvar=NULL, qnt=.975) { # TODO: interval_inc
27-
m <- gam_extract_model(m) # gam in list, mgcv not
27+
m <- gam_extract_model(m) # gam 'model' is within list, mgcv has model at top
2828

2929
varying_list <- list(.1); names(varying_list) <- agevar # eg. list(age=.1)
3030
age_step_df <- gen_predict_df(m, varying_list)
@@ -37,6 +37,11 @@ gam_growthrate <- function(m, agevar, idvar=NULL, qnt=.975) { # TODO: interval_i
3737
level=qnt,
3838
type="backward", # NA was first in original code
3939
data=age_step_df)
40+
# f1deriv
41+
# smooth var by_var fs_var data derivative se crit lower upper
42+
# <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
43+
# 1 s(Ageatvisit) Ageatvisit <NA> <NA> 8.01 0.374 0.0615 3.27 0.173 0.575
44+
# 2 s(Ageatvisit) Ageatvisit <NA> <NA> 8.11 0.374 0.0615 3.27 0.173 0.57
4045
#names(f1deriv) <- c("smooth","term","ages","mean_dff","se","crit","ci_low","ci_high")
4146
# c("smooth", "var", "by_var", "fs_var", "data", "derivative","se", "crit", "lower", "upper")
4247
return(f1deriv)
@@ -81,15 +86,19 @@ gen_predict_df <- function(m, varying=list(age=.1)){
8186
if(is.null(colname)) next
8287
#colclass <- attr(terms(m),'dataClass')[colname])
8388
colclass <- class(m$model[,colname])
84-
if("numeric" %in% colclass) {
89+
if(any(c("integer","numeric") %in% colclass)) {
8590
center <- mean(m$model[,colname],na.rm=T)
91+
# ugly to print something for each column
92+
# but likely this doing something unintended
93+
cat(glue::glue("# column '{colname}' (type '{colclass}') centered at {center}"),"\n")
8694
} else {
8795
center <- m$model[middle_idx,colname]
88-
warning(sprintf("set all %s=%s (center %s@%.2f of '%s' type)",
89-
colname, center, median_of,median_of_first, colclass))
96+
warning(sprintf("non-numeric '%s' column! set all %s=%s (closet to center %s@%.2f)",
97+
colclass, colname, center, median_of,median_of_first))
9098
}
9199
pred_at_df[,colname] <- center
92100
}
101+
return(pred_at_df)
93102
}
94103

95104

@@ -123,137 +132,18 @@ find_covars_gam <- function(fml, ...) {
123132
return(no_re)
124133
}
125134

126-
sim_diff1_from_gam <- function(m, agevar, idvar=NULL,
127-
n.iterations=10000, interval_inc=.1) {
128-
v <- m$model[, agevar]
129-
cond_list <- list(seq(min(v), max(v), by=interval_inc))
130-
pp <- data.frame(a=cond_list[[1]], b=Inf)
131-
# names should match what went into the model
132-
names(pp) <- c(agevar, idvar)
133-
134-
# what if idvar is factor (Inf wont work)
135-
if (is.null(idvar)) {
136-
# do nothing. no idvar
137-
} else if (is.factor(m$model[, idvar])){
138-
# select idvar with the middle most random effect
139-
# random effects are coefficents like s(idvar).xxxxx
140-
# where xxxx is the index of the specific idvar factor name
141-
idvarpatt <- sprintf("s\\(%s\\)", idvar)
142-
idvarpatt. <- sprintf("s\\(%s\\).", idvar)
143-
randeff <- m$coefficients[ grep(idvarpatt, names(m$coefficients)) ]
144-
medval <- sort(randeff)[floor(length(randeff)/2)]
145-
med_re_name <- names(which(randeff == medval))
146-
median_idx <- gsub(idvarpatt., "", med_re_name)
147-
median_subj <- levels(m$model[, idvar])[as.numeric(median_idx)]
148-
warning("gam w/factor idvar, ",
149-
"setting the middle most random effect subject: ",
150-
median_subj)
151-
pp[, 2] <- median_subj
152-
153-
# alternatively, select the first
154-
# pp[, 2] <- m$model[1, idvar]
155-
} else {
156-
warning("predition with continous (non-factor) idvar will give 'Inf' fit")
157-
# maybe pick middle value instead?
158-
# pp[, 2] <- mean(m$model[, idvar], na.rm=T)
159-
}
160-
161-
# for all covars, pick out the mean
162-
for (cv in find_covars_gam(m$formula, agevar)) {
163-
x <- m$model[, cv]
164-
if (is.character(x) || is.factor(x) ){
165-
warning("gam w/factor covar, setting all sim to the first!")
166-
y <- x[1]
167-
# TODO: maybe pracma::Mode ?
168-
} else {
169-
y <- mean(x, na.rm=T)
170-
}
171-
pp[, cv] <- y
172-
}
173-
174-
Xp <- predict(m, pp, type="lpmatrix")
175-
176-
mu_beta <- coef(m)
177-
sigma_Vb <- vcov(m)
178-
# variance-covariance matrix of the main parameters fitted model
179-
# used as: a positive-definite symmetric matrix specifying
180-
# the covariance matrix of the variables.
181-
182-
# set.seed(10)
183-
mrand <- MASS::mvrnorm(n.iterations, mu_beta, sigma_Vb)
184-
185-
# gamm$gam doesn't support 'family'
186-
# class( mgcv::gamm(data=mtcars, cyl ~ mpg + s(wt) )$gam)
187-
# [1] "gam"
188-
# class( mgcv::gam(data=mtcars, cyl ~ mpg + s(wt) ))
189-
# [1] "gam" "glm" "lm"
190-
if("glm" %in% class(m)){
191-
ilink <- family(m)$linkinv
192-
} else {
193-
ilink <- m$family$linkinv
194-
}
195-
196-
# only want inetercept and agevar
197-
keep_cols <- grep(paste0("Intercept|", agevar), dimnames(Xp)[[2]], value=T)
198-
Xp_agevar <- Xp[, keep_cols]
199-
mrand_agevar <- mrand[, keep_cols]
200-
201-
# generate a whole bunch of plausable values, get the diff
202-
diffs <- lapply(seq_len(n.iterations), function(i) {
203-
fit <- ilink(Xp_agevar %*% mrand_agevar[i, ])
204-
dff <- c(NA, diff(fit))
205-
return(dff)
206-
})
207-
208-
return(list(pred=diffs, ages=pp[, 1], fit=predict(m, pp)))
209-
}
210-
211-
ci_from_simdiff1 <- function(pred, ages, qnt=c(.025, .975)) {
212-
213-
names(pred) <- 1:length(pred)
214-
mm <- t(dplyr::bind_rows(pred))
215-
216-
# this is the ouptut !
217-
mean_dff <- apply(mm, 2, mean)
218-
ci <- apply(mm, 2, quantile, qnt, na.rm=T)
219-
colnames(ci) <- ages
220-
out <- data.frame(mean_dff=mean_dff, ages=ages)
221-
ci_out <- t(ci)
222-
dimnames(ci_out)[[2]] <- c("ci_low", "ci_high")
223-
return(cbind(out, ci_out))
224-
225-
# NEVER REACHED -- left as bad documentation
226-
# old: return just ci and mean_dff
227-
return(list(ci=ci, mean_dff=mean_dff))
228-
229-
# this is for fun
230-
ages[which.min(ci[1, ])]
231-
ages[which.min(ci[2, ])]
232-
233-
plot(ages, mean_dff)
234-
for (i in 1:10) lines(ages, pred[[i]])
235-
}
236-
237135
too_small <- function(x) abs(x) < 10^-15
238-
clip_on_sig_old <- function(ci){
239-
# if confidence interval includes zero
240-
# signs of x and y will be different, -x * +y < 0
241-
# or if both high and low are extremly close to zero
242-
not_sig <- ci$ci_low * ci$ci_high < 0 |
243-
(too_small(ci$ci_low) & too_small(ci$ci_high))
244-
ci$mean_dff_clip <- ci$mean_dff # TODO MATCH DERIVE NAME HERE
245-
ci$mean_dff_clip[not_sig] <- 0
246-
return(ci)
247-
}
248-
clip_on_sig <- function(ci){ # 20231205 now for gratia
136+
#' add sig column: 0 not significant; 1 is signficiant
137+
is_sig <- function(ci){ # 20231205 now for gratia
249138
# if confidence interval includes zero
250139
# signs of x and y will be different, -x * +y < 0
251140
# or if both high and low are extremly close to zero
252141
not_sig <- ci$lower * ci$upper < 0 |
253142
(too_small(ci$lower) & too_small(ci$upper))
254-
ci$sig <- 1
255-
ci$sig[not_sig] <- 0
256-
return(ci)
143+
return(!not_sig)
144+
#ci$sig <- 1
145+
#ci$sig[not_sig] <- 0
146+
#return(ci)
257147
}
258148

259149

@@ -263,21 +153,22 @@ clip_on_sig <- function(ci){ # 20231205 now for gratia
263153
#' @param ci growthrate_gam output (confidence interval and derivitive)
264154
#' @export
265155
gam_maturation_point <- function(ci) {
156+
# ci has cols: smooth, var, data, derivative, se, crit, lower, upper
266157

267158
# when ci bounds include 0 (different sign), no longer signficant
268159
# clip out insignificant derivitive
269-
if (is.na(ci$ci_low[1])) ci <- ci[-1, ]
160+
if (is.na(ci$lower[1])) ci <- ci[-1, ]
270161

271162
# get mean_df_clip column
272-
if (! "mean_dff_clip" %in% names(ci)) ci <- clip_on_sig(ci)
163+
if (! "sig" %in% names(ci)) ci$sig <- is_sig(ci)
273164

274165
# find maturation point after the first signficant age
275-
onset_sig <- ci$ages[ci$mean_dff_clip != 0]
166+
onset_sig <- ci$data[ci$sig]
276167
maturation_pnt <- NA
277168
if (length(onset_sig)>0L && !all(is.na(onset_sig))) {
278-
mat_points_idx <- ci$mean_dff_clip==0 & ci$ages > onset_sig[1]
169+
mat_points_idx <- !ci$sig & ci$data > onset_sig[1]
279170
if (length(mat_points_idx) > 0L && any(mat_points_idx))
280-
maturation_pnt <- min(ci$ages[mat_points_idx], na.rm=T)
171+
maturation_pnt <- min(ci$data[mat_points_idx], na.rm=T)
281172
}
282173

283174
return(maturation_pnt)
@@ -341,9 +232,8 @@ gam_growthrate_plot <-
341232
if (! "data.frame" %in% class(ci) ) stop("ci is not growthrate_gam() output")
342233
if (! yvar %in% names(model$model) ) stop(yvar, "not in model dataframe!")
343234

344-
ci$mean_dff_clip <- ci$mean_dff # derivative # CHANGE HERE
345235
# when ci bounds include 0 (different sign), no longer signficant
346-
ci <- clip_on_sig(ci)
236+
ci$sig <- is_sig(ci)
347237
maturation_pnt <- gam_maturation_point(ci)
348238

349239
# warn about no matruation point
@@ -353,13 +243,19 @@ gam_growthrate_plot <-
353243
}
354244

355245
# show even unsignficant change in raster if show_all_fill
356-
fill_column <- ifelse(show_all_fill, "mean_dff", "mean_dff_clip")
246+
# was mean_dff vs mean_dff_clip, now derivative or clip (TODO: not ci$data?)
247+
fill_column <- "derivative"
248+
if(!show_all_fill) {
249+
fill_column <- "deriv_clip"
250+
ci$deriv_clip <- ci$derivative
251+
ci$deriv_clip[!ci$sig] <- 0 # should be NA? will plot as grey in tile
252+
}
357253

358254
## setup derivitive raster plot
359-
deriv_range <- range(ci$mean_dff, na.rm=T)
255+
deriv_range <- range(ci[,fill_column], na.rm=T)
360256
tile <-
361257
ggplot(ci[-1, ]) + # don't plot first row (is NA)
362-
aes(x=ages, y=1, fill=!!sym(fill_column)) +
258+
aes(x=data, y=1, fill=!!sym(fill_column)) +
363259
geom_raster(interpolate=TRUE) +
364260
scale_fill_gradient2(
365261
low = "blue", mid = "white", high = "red",
@@ -382,7 +278,7 @@ gam_growthrate_plot <-
382278

383279
# predictions
384280
modeldata<-data.frame(ydata=model$y, agevar=model$model[, agevar])
385-
condlist <- list(a=ci$ages)
281+
condlist <- list(a=ci$data)
386282
names(condlist) <- agevar
387283
# 20190826 BTC - remove random effects (bug fix)
388284
agepred <- itsadug::get_predictions(model, cond = condlist, rm.ranef=TRUE)
@@ -484,7 +380,7 @@ gam_plot_raster_theme <- function(...) {
484380
legend.position = "none", ...)
485381
}
486382

487-
#' gam_extract_model papers over gamm4 and mgcv gam models
383+
#' gam_extract_model abstract over gamm4 and mgcv gam models
488384
#' @param m either a mgcv::gam or a gamm4::gamm4 model
489385
#' @return model componet (gamm4$gam) or pass through
490386
gam_extract_model <- function(m) {

0 commit comments

Comments
 (0)