Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementing standard curve models for eDNA applications #292

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export(censored_poisson)
export(delta_beta)
export(delta_gamma)
export(delta_gamma_mix)
export(delta_gaussian)
export(delta_lognormal)
export(delta_lognormal_mix)
export(delta_poisson_link_gamma)
Expand Down Expand Up @@ -69,6 +70,7 @@ export(sdmTMBcontrol)
export(sdmTMBpriors)
export(set_delta_model)
export(spread_sims)
export(stdcurve)
export(student)
export(tidy)
export(truncated_nbinom1)
Expand Down
3 changes: 2 additions & 1 deletion R/enum.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
censored_poisson = 12,
gamma_mix = 13,
lognormal_mix = 14,
nbinom2_mix = 15
nbinom2_mix = 15,
stdcurve = 16
)
.valid_link <- c(
identity = 0,
Expand Down
32 changes: 32 additions & 0 deletions R/families.R
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,24 @@ student <- function(link = "identity", df = 3) {
add_to_family(x)
}

#' @export
#' @rdname families
#' @examples
#' stdcurve(link = "identity")
stdcurve <- function(link = "identity") {
linktemp <- substitute(link)
if (!is.character(linktemp))
linktemp <- deparse(linktemp)
okLinks <- c("identity")
if (linktemp %in% okLinks)
stats <- stats::make.link(linktemp)
else if (is.character(link))
stats <- stats::make.link(link)

x <- c(list(family = "stdcurve", link = linktemp), stats)
add_to_family(x)
}

#' @export
#' @examples
#' tweedie(link = "log")
Expand Down Expand Up @@ -419,3 +437,17 @@ delta_beta <- function(link1 = "logit", link2 = "logit") {
family = c("binomial", "Beta"),
clean_name = "delta_beta(link1 = 'logit', link2 = 'logit')"), class = "family")
}

#' @export
#' @examples
#' delta_gaussian()
#' @rdname families
delta_gaussian <- function(link1 = "logit", link2 = "identity") {
link1 <- match.arg(link1)
link2 <- match.arg(link2)
f1 <- binomial(link = "logit")
f2 <- gaussian(link = "identity")
structure(list(f1, f2, delta = TRUE, link = c("logit", "identity"),
family = c("binomial", "gaussian"),
clean_name = "delta_gaussian(link1 = 'logit', link2 = 'identity')"), class = "family")
}
82 changes: 80 additions & 2 deletions R/fit.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,14 @@ NULL
#' \code{\link[sdmTMB:families]{truncated_nbinom1()}},
#' \code{\link[sdmTMB:families]{censored_poisson()}},
#' \code{\link[sdmTMB:families]{gamma_mix()}},
#' \code{\link[sdmTMB:families]{nbinom2_mix()}},
#' \code{\link[sdmTMB:families]{stdcurve()}},
#' \code{\link[sdmTMB:families]{lognormal_mix()}},
#' \code{\link[sdmTMB:families]{student()}}, and
#' \code{\link[sdmTMB:families]{tweedie()}}. Supports the delta/hurdle models:
#' \code{\link[sdmTMB:families]{delta_beta()}},
#' \code{\link[sdmTMB:families]{delta_gamma()}},
#' \code{\link[sdmTMB:families]{delta_gaussian()}},
#' \code{\link[sdmTMB:families]{delta_gamma_mix()}},
#' \code{\link[sdmTMB:families]{delta_lognormal_mix()}},
#' \code{\link[sdmTMB:families]{delta_lognormal()}}, and
Expand Down Expand Up @@ -588,6 +591,14 @@ sdmTMB <- function(
data <- droplevels(data) # if data was subset, strips absent factors

delta <- isTRUE(family$delta)
stdcurve <- ifelse(is.null(control$stdcurve_df), FALSE, TRUE)
if(stdcurve) {
family <- stdcurve()
if("plate" %in% names(control$stdcurve_df) == FALSE) cli_abort("`plate` must be a column in the standards dataframe")
if("Ct" %in% names(control$stdcurve_df) == FALSE) cli_abort("`Ct` must be a column in the standards dataframe")
if("known_conc_ul" %in% names(control$stdcurve_df) == FALSE) cli_abort("`known_conc_ul` must be a column in the standards dataframe")
if("plate" %in% names(data) == FALSE) cli_abort("`plate` must be a column in the input dataframe")
}
n_m <- if (delta) 2L else 1L

if (!missing(spatial)) {
Expand Down Expand Up @@ -676,10 +687,11 @@ sdmTMB <- function(
upper <- control$upper
get_joint_precision <- control$get_joint_precision
upr <- control$censored_upper
stdcurve_df <- control$stdcurve_df

dot_checks <- c("lower", "upper", "profile", "parallel", "censored_upper",
"nlminb_loops", "newton_steps", "mgcv", "quadratic_roots", "multiphase",
"newton_loops", "start", "map", "get_joint_precision", "normalize")
"newton_loops", "start", "map", "get_joint_precision", "normalize","stdcurve_df")
.control <- control
# FIXME; automate this from sdmTMcontrol args?
for (i in dot_checks) .control[[i]] <- NULL # what's left should be for nlminb
Expand Down Expand Up @@ -1127,6 +1139,49 @@ sdmTMB <- function(
no_spatial = no_spatial
)

tmb_data$stdcurve_flag <- as.numeric(stdcurve)
if(stdcurve) {
# process standards data
stdcurve_df$present <- ifelse(stdcurve_df$Ct > 0, 1, 0)
#stdcurve_df$plate_n <- as.numeric(as.factor(stdcurve_df$plate))
plates <- data.frame(plate = levels(as.factor(stdcurve_df$plate)))
plates$id <- seq_len(nrow(plates))
stdcurve_df$plate_n <- plates$id[match(stdcurve_df$plate, plates$plate)]
n_pcr <- nrow(plates)
# also add plate number (index) to main dataframe
tmb_data$pcr_idx <- plates$id[match(data$plate, plates$plate)] - 1L # -1 for index -> 0

pos_indx <- which(stdcurve_df$Ct > 0)
tmb_data$N_stand_bin <- nrow(stdcurve_df)
tmb_data$N_stand_pos <- length(pos_indx)
tmb_data$bin_stand <- stdcurve_df$present
tmb_data$pos_stand <- stdcurve_df$Ct[pos_indx]
tmb_data$D_bin_stand <- log(stdcurve_df$known_conc_ul) # could change to log10
tmb_data$D_pos_stand <- log(stdcurve_df$known_conc_ul[pos_indx])# could change to log10
tmb_data$pcr_stand_bin_idx <- stdcurve_df$plate_n - 1L # -1 for index -> 0
tmb_data$pcr_stand_pos_idx <- stdcurve_df$plate_n[pos_indx] - 1L
#pcr_pos_indx <- which(data$Ct > 0)
#tmb_data$N_pcr_pos <- length(pcr_pos_indx)
#tmb_data$pcr_pos <- data$Ct[pcr_pos_indx]
#tmb_data$pcr_pos_idx <- tmb_data$pcr_idx[pcr_pos_indx]
#tmb_data$stand_offset <- 0
} else {
tmb_data$N_stand_bin <- 0L
tmb_data$N_stand_pos <- 0L
tmb_data$bin_stand <- 0L
tmb_data$pos_stand <- 0
tmb_data$D_bin_stand <- 0
tmb_data$D_pos_stand <- 0
tmb_data$pcr_stand_bin_idx <- 0L
tmb_data$pcr_stand_pos_idx <- 0L
tmb_data$pcr_idx <- 0L
#tmb_data$pcr_bin <- 0L
#tmb_data$N_pcr_pos <- 0L
#tmb_data$pcr_pos <- 0L
#tmb_data$pcr_pos_idx <- 0L
n_pcr <- 1
}

if(is.na(sum(nobs_RE))) {
cli_abort("One of the groups used in the factor levels is NA - please remove")
}
Expand Down Expand Up @@ -1161,7 +1216,14 @@ sdmTMB <- function(
ln_epsilon_re_sigma = rep(0, n_m),
epsilon_re = matrix(0, tmb_data$n_t, n_m),
b_smooth = if (sm$has_smooths) matrix(0, sum(sm$sm_dims), n_m) else array(0),
ln_smooth_sigma = if (sm$has_smooths) matrix(0, length(sm$sm_dims), n_m) else array(0)
ln_smooth_sigma = if (sm$has_smooths) matrix(0, length(sm$sm_dims), n_m) else array(0),
std_xi_2 = rep(0, n_pcr),
std_xi_3 = rep(0, n_pcr),
std_xi_0 = rep(0, n_pcr),
std_xi_1 = rep(0, n_pcr),
std_mu = c(40, -3.32, 2, 4), # order: phi0, phi1, beta0, beta1
ln_std_sigma = c(0,-2,0,0)#log(c(2, 2, 5, 0.1)) # order: phi0, phi1, beta0, beta1
#log_sigma_all_stand = 0
)
if (identical(family$link, "inverse") && family$family[1] %in% c("Gamma", "gaussian", "student") && !delta) {
fam <- family
Expand Down Expand Up @@ -1203,6 +1265,15 @@ sdmTMB <- function(
if (est_epsilon_slope == 1L) {
tmb_map <- unmap(tmb_map, "b_epsilon")
}
if(stdcurve) {
tmb_map$std_xi_2 <- NULL
tmb_map$std_xi_3 <- NULL
tmb_map$std_xi_0 <- NULL
tmb_map$std_xi_1 <- NULL
tmb_map$std_mu <- NULL
tmb_map$ln_std_sds <- NULL
#tmb_map$log_sigma_all_stand <- NULL
}

if (multiphase && is.null(previous_fit) && do_fit) {

Expand Down Expand Up @@ -1259,6 +1330,9 @@ sdmTMB <- function(
tmb_random <- c(tmb_random, "epsilon_re")
tmb_map <- unmap(tmb_map, c("epsilon_re"))
}
if(stdcurve) {
tmb_random <- c(tmb_random, "std_xi_2", "std_xi_3", "std_xi_0", "std_xi_1")
}

tmb_map$ar1_phi <- as.numeric(tmb_map$ar1_phi) # strip factors
for (i in seq_along(spatiotemporal)) {
Expand Down Expand Up @@ -1483,6 +1557,10 @@ sdmTMB <- function(
conv <- get_convergence_diagnostics(sd_report)

out_structure$tmb_obj <- tmb_obj

if(stdcurve) {
out_structure$plates <- plates
}
out <- c(out_structure, list(
model = tmb_opt,
sd_report = sd_report,
Expand Down
16 changes: 15 additions & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@
#' cores, as an example, use `TMB::openmp(n = 3, DLL = "sdmTMB")`. But be
#' careful, because it's not always faster with more cores and there is
#' definitely an upper limit.
#' @param stdcurve_df The dataframe containing the data for standard curve estimation.
#' The critical names in the data are `plate` indicating the PCR plate, `Ct` indicating
#' the PCR cycle count at which exponential DNA amplification was detected. If Ct is NA,
#' no amplification was observed, `known_conc_ul` the known concentration of DNA that was
#' spiked into the standards to build the standard curve
#' @param ... Anything else. See the 'Control parameters' section of
#' [stats::nlminb()].
#'
Expand Down Expand Up @@ -92,6 +97,7 @@ sdmTMBcontrol <- function(
profile = FALSE,
get_joint_precision = TRUE,
parallel = getOption("sdmTMB.cores", 1L),
stdcurve_df = NULL,
...) {

if (is_present(mgcv)) {
Expand All @@ -114,6 +120,13 @@ sdmTMBcontrol <- function(
parallel <- as.integer(parallel)
}

if(!is.null(stdcurve_df)) {
# check names are ok
assert_that("plate" %in% names(stdcurve_df))
assert_that("Ct" %in% names(stdcurve_df))
assert_that("known_conc_ul" %in% names(stdcurve_df))
}

out <- named_list(
eval.max,
iter.max,
Expand All @@ -129,7 +142,8 @@ sdmTMBcontrol <- function(
censored_upper,
multiphase,
parallel,
get_joint_precision
get_joint_precision,
stdcurve_df
)
c(out, list(...))
}
Expand Down
67 changes: 67 additions & 0 deletions inst/examples.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
d <- readRDS("inst/eulachon qPCR 2019 and 2021 standards clean.rds")
d$Ct[which(is.na(d$Ct))] = 0 # delta-models in sdmTMB need this

library(sdmTMB)

# Build a mesh to implement the SPDE approach:
mesh <- make_mesh(pcod_2011, c("X", "Y"), cutoff = 20)
fit <- sdmTMB(
density ~ s(depth),
data = pcod_2011, mesh = mesh,
family = tweedie(link = "log")
)

# Use the same model as above, but pass in a 2nd dataframe for std curve estimation
# fit2 <- sdmTMB( # should throw error, because wrong family
# density ~ s(depth),
# data = pcod_2011, mesh = mesh,
# family = tweedie(link = "log"),
# control = sdmTMBcontrol(stdcurve = TRUE, stdcurve_df = d)
# )
#
# fit3 <- sdmTMB(
# density ~ s(depth),
# data = pcod_2011, mesh = mesh,
# family = delta_gamma(),
# control = sdmTMBcontrol(stdcurve_df = d)
# )

# load in observed data
d_obs <- readRDS("inst/eulachon qPCR 2019 and 2021 samples clean.rds")
d_obs$Ct[which(is.na(d_obs$Ct))] = 0 # delta-models in sdmTMB need this
d_obs$utm.lon.m <- d_obs$utm.lon.m / 1000
d_obs$utm.lat.m <- d_obs$utm.lat.m / 1000
mesh <- make_mesh(d_obs, c("utm.lon.m", "utm.lat.m"), cutoff = 20)

# Fitting the model with no standard curve (intercept only)
fit4 <- sdmTMB(
Ct ~ 1,
data = d_obs, mesh = mesh,
family = delta_gaussian()
)

# Fit everything together -- intercept only. AIC 15213.08
fit5 <- sdmTMB(
Ct ~ 1,
data = d_obs, mesh = mesh,
control = sdmTMBcontrol(stdcurve_df = d)
)
#sanity(fit5) # pass

#adding year: doesn't converge
fit6 <- sdmTMB(
Ct ~ year,
data = d_obs, mesh = mesh,
family = delta_gaussian(),
control = sdmTMBcontrol(stdcurve_df = d)
)

fit7 <- sdmTMB(
Ct ~ year,
data = d_obs, mesh = mesh,
family = delta_gaussian(),
time = "year",
spatiotemporal = "iid",
control = sdmTMBcontrol(stdcurve_df = d)
)

8 changes: 8 additions & 0 deletions man/families.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/sdmTMB.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading