diff --git a/.github/workflows/R-CMD-build.yaml b/.github/workflows/R-CMD-build.yaml index 01e9167..25b5f8b 100644 --- a/.github/workflows/R-CMD-build.yaml +++ b/.github/workflows/R-CMD-build.yaml @@ -3,7 +3,7 @@ on: push: branches: - 'build' + 'build' pull_request: branches: - master @@ -40,7 +40,6 @@ jobs: r-version: ${{ matrix.config.r }} http-user-agent: ${{ matrix.config.http-user-agent }} use-public-rspm: true - extra-repositories: "https://inla.r-inla-download.org/R/testing" - name: Install system dependencies on MacOS (X11, gdal) if: runner.os == 'macOS' @@ -54,6 +53,7 @@ jobs: if: runner.os == 'Linux' run: | sudo apt-get update -y && sudo apt-get install -y libglu1-mesa-dev + sudo apt-get install -y libproj-dev gdal-bin - uses: r-lib/actions/setup-r-dependencies@v2 with: diff --git a/.github/workflows/R-CMD-check-HTML5.yaml b/.github/workflows/R-CMD-check-HTML5.yaml index a4c492c..1dd5a46 100644 --- a/.github/workflows/R-CMD-check-HTML5.yaml +++ b/.github/workflows/R-CMD-check-HTML5.yaml @@ -1,58 +1,56 @@ -# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. -# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions -on: - push: - branches: [html5] - pull_request: - branches: [html5] - -name: R-CMD-check-html5 - - -jobs: - HTML5-check: - runs-on: ubuntu-latest - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - extra-repositories: "https://inla.r-inla-download.org/R/stable" - - - - name: Install pdflatex - run: sudo apt-get install texlive-latex-base texlive-fonts-recommended texlive-fonts-extra texlive-latex-extra - - - uses: r-lib/actions/setup-pandoc@v2 - - - name: Install system dependencies on MacOS (X11, gdal) - if: runner.os == 'macOS' - run: | - brew install --cask xquartz - brew install pkg-config - brew install proj@8 - brew install gdal - - - uses: r-lib/actions/setup-r-dependencies@v2 - with: - dependencies: '"all"' - extra-packages: | - rcmdcheck - - - name: Session info - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - sessioninfo::session_info(pkgs, include_base = TRUE) - shell: Rscript {0} - - - uses: r-lib/actions/check-r-package@v2 - with: - args: '"--as-cran"' - build_args: 'character()' - #error-on: '"note"' +# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. +# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +on: + push: + branches: [html5] + pull_request: + branches: [html5] + +name: R-CMD-check-html5 + + +jobs: + HTML5-check: + runs-on: ubuntu-latest + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + + - name: Install pdflatex + run: sudo apt-get install texlive-latex-base texlive-fonts-recommended texlive-fonts-extra texlive-latex-extra + + - uses: r-lib/actions/setup-pandoc@v2 + + - name: Install system dependencies on MacOS (X11, gdal) + if: runner.os == 'macOS' + run: | + brew install --cask xquartz + brew install pkg-config + brew install proj@8 + brew install gdal + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + dependencies: '"all"' + extra-packages: | + rcmdcheck + + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + + - uses: r-lib/actions/check-r-package@v2 + with: + args: '"--as-cran"' + build_args: 'character()' + #error-on: '"note"' diff --git a/.github/workflows/R-CMD-check-no-suggests.yaml b/.github/workflows/R-CMD-check-no-suggests.yaml deleted file mode 100644 index cea02e9..0000000 --- a/.github/workflows/R-CMD-check-no-suggests.yaml +++ /dev/null @@ -1,96 +0,0 @@ -# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. -# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions -# -# Largely copied from: https://github.com/inlabru-org/inlabru/blob/devel/.github/workflows/R-CMD-check-no-suggests.yaml -# Want to test without suggests to ensure things don't fail on cran when INLA isn't there. - -on: - push: - branches: - '**' - pull_request: - branches: - - devel - - master - -name: R-CMD-check-no-suggests - -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: windows-latest, r: 'release'} - # - {os: macOS-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - extra-repositories: "https://inla.r-inla-download.org/R/testing" - - - uses: r-lib/actions/setup-pandoc@v2 - - - name: Install system dependencies on MacOS (X11, gdal) - if: runner.os == 'macOS' - run: | - brew install --cask xquartz - brew install pkg-config - brew install proj@9 - brew install gdal - - - name: Has inla? Check. - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - "INLA" %in% pkgs - shell: Rscript {0} - - - uses: r-lib/actions/setup-r-dependencies@v2 - with: - dependencies: '"hard"' - extra-packages: | - rcmdcheck - testthat - - - name: Has inla? Check, and remove. - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - "INLA" %in% pkgs - if ("INLA" %in% pkgs) { - remove.packages("INLA") - } - shell: Rscript {0} - - - name: Session info - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - sessioninfo::session_info(pkgs, include_base = TRUE) - shell: Rscript {0} - - - uses: r-lib/actions/check-r-package@v2 - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - _R_CHECK_FORCE_SUGGESTS_: false - with: - build_args: 'c("--no-manual", "--no-build-vignettes")' - args: 'c("--no-manual", "--ignore-vignettes", "--as-cran")' - - - - \ No newline at end of file diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 41332d5..68a3933 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,9 +1,11 @@ # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions + +# inspired by: https://github.com/r-spatial/sf/blob/main/.github/workflows/R-CMD-check.yaml on: push: branches: - '**' + '**' pull_request: branches: - devel @@ -21,15 +23,15 @@ jobs: fail-fast: false matrix: config: - # - {os: windows-latest, r: 'release'} - # - {os: macOS-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - {os: ubuntu-20.04, r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + - {os: ubuntu-latest, r: 'oldrel-2'} env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} steps: - uses: actions/checkout@v2 @@ -37,7 +39,6 @@ jobs: - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} - extra-repositories: "https://inla.r-inla-download.org/R/stable" - uses: r-lib/actions/setup-pandoc@v2 @@ -46,21 +47,13 @@ jobs: run: | brew install --cask xquartz brew install pkg-config - brew install proj@8 + brew install proj brew install gdal - uses: r-lib/actions/setup-r-dependencies@v2 with: - dependencies: '"all"' - extra-packages: | - rcmdcheck - - - name: Session info - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - sessioninfo::session_info(pkgs, include_base = TRUE) - shell: Rscript {0} + extra-packages: any::rcmdcheck + needs: check - uses: r-lib/actions/check-r-package@v2 env: diff --git a/.gitignore b/.gitignore index eef5be0..847e7ad 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,11 @@ -inst/doc -.Rproj.user -.Rhistory -.Rproj -.RData -*.o -*.so -vignettes/disaggregation_cache/* -vignettes/disaggregation_files/* -.github/workflows/R-CMD-check-HTML5.archyaml -vignettes/spatio_temporal_disaggregation.Rmd \ No newline at end of file +inst/doc +.Rproj.user +.Rhistory +*.Rproj +.RData +*.o +*.so +vignettes/disaggregation_cache/* +vignettes/disaggregation_files/* +.github/workflows/R-CMD-check-HTML5.archyaml +vignettes/spatio_temporal_disaggregation.Rmd diff --git a/DESCRIPTION b/DESCRIPTION index c49bce4..f4c7def 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: disaggregation Type: Package Title: Disaggregation Modelling -Version: 0.3.0 +Version: 0.4.0 Authors@R: c( person("Anita", "Nandi", email = "anita.k.nandi@gmail.com", role = "aut", comment = c(ORCID = "0000-0002-5087-2494")), person("Tim", "Lucas", email = "timcdlucas@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4694-8107")), @@ -17,7 +17,7 @@ Description: Fits disaggregation regression models using 'TMB' ('Template Model License: MIT + file LICENSE Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 Imports: splancs, Matrix, @@ -26,16 +26,15 @@ Imports: dplyr, ggplot2, cowplot, + rSPDE, sparseMVN, fmesher, tidyterra, terra, sf, utils -Additional_repositories: https://inla.r-inla-download.org/R/stable Suggests: testthat, - INLA, knitr, rmarkdown, SpatialEpi diff --git a/NAMESPACE b/NAMESPACE index 47705eb..854a626 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,11 +13,11 @@ S3method(summary,disag_prediction) export(as.disag_data) export(build_mesh) export(disag_model) -export(fit_model) export(getCovariateRasters) export(getPolygonData) export(getStartendindex) export(make_model_object) +export(plot_disag_model_data) export(predict_model) export(predict_uncertainty) export(prepare_data) diff --git a/R/build_mesh.R b/R/build_mesh.R index b005a55..70d93b5 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -7,16 +7,18 @@ #' and having a small region outside with a coarser mesh to avoid edge effects. #' #' Six mesh parameters can be specified as arguments: \emph{convex}, \emph{concave} and \emph{resolution}, -#' to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cut} and \emph{offset}, to control the mesh itself, -#' with the names meaning the same as used by INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. +#' to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cutoff} and \emph{offset}, to control the mesh itself, +#' with the names meaning the same as used by the fmesher functions \emph{fm_nonconvex_hull_inla} and \emph{fm_mesh_2d}. #' #' Defaults are: -#' pars <- list(convex = -0.01, concave = -0.5, resolution = 300, max.edge = c(3.0, 8), cut = 0.4, offset = c(1, 15)). +#' pars <- list(convex = -0.01, concave = -0.5, resolution = 300, max.edge = c(3.0, 8), cutoff = 0.4, offset = c(1, 15)). #' #' @param shapes sf covering the region under investigation. -#' @param mesh.args list of parameters that control the mesh structure. \emph{convex}, \emph{concave} and \emph{resolution}, -#' to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cut} and \emph{offset}, to control the mesh itself, +#' @param mesh_args list of parameters that control the mesh structure. \emph{convex}, \emph{concave} and \emph{resolution}, +#' to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cutoff} and \emph{offset}, to control the mesh itself, #' with the parameters having the same meaning as in the INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. +#' \emph{cut} has been deprecated - use \emph{cutoff} instead. +#' @param mesh.args Deprecated. #' #' @return An inla.mesh object #' @@ -43,25 +45,33 @@ #' #' @export -build_mesh <- function(shapes, mesh.args = NULL) { +build_mesh <- function(shapes, mesh_args = NULL, mesh.args = NULL) { + + if (!is.null(mesh.args) && missing(mesh_args)) { + mesh_args <- mesh.args + message("mesh.args is deprecated and will be removed in a future version - please use mesh_args instead") + } stopifnot(inherits(shapes, 'sf')) - if(!is.null(mesh.args)) stopifnot(inherits(mesh.args, 'list')) + if(!is.null(mesh_args)) stopifnot(inherits(mesh_args, 'list')) limits <- sf::st_bbox(shapes) hypotenuse <- sqrt((limits$xmax - limits$xmin)^2 + (limits$ymax - limits$ymin)^2) maxedge <- hypotenuse/10 + if (!(is.null(mesh_args)) && ("cut" %in% names(mesh_args))){ + mesh_args$cutoff <- mesh_args$cut + message("cut is deprecated and will be removed in a future version - please use cutoff instead") + } pars <- list(convex = -0.01, concave = -0.5, resolution = 300, max.edge = c(maxedge, maxedge * 2), - cut = 0.1, + cutoff = 0.1, offset = c(hypotenuse / 10, hypotenuse / 10)) - - pars[names(mesh.args)] <- mesh.args + pars[names(mesh_args)] <- mesh_args outline <- sf::st_sf(sf::st_union(sf::st_convex_hull(shapes))) @@ -75,7 +85,7 @@ build_mesh <- function(shapes, mesh.args = NULL) { mesh <- fmesher::fm_mesh_2d( boundary = outline.hull, max.edge = pars$max.edge, - cut = pars$cut, + cutoff = pars$cutoff, offset = pars$offset) diff --git a/R/deprecated.R b/R/deprecated.R deleted file mode 100644 index 5d3a206..0000000 --- a/R/deprecated.R +++ /dev/null @@ -1,11 +0,0 @@ -#' Deprecated functions in disaggregation -#' -#' These functions still work but will be removed (defunct) in the next version. -#' -#' \itemize{ -#' \item \code{\link{fit_model}}: This function is deprecated, and will -#' be removed in the next version of this package. -#' } -#' -#' @name disaggregation-deprecated -NULL \ No newline at end of file diff --git a/R/fit_model.R b/R/fit_model.R index e448890..b42ea67 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -1,6 +1,6 @@ #' Fit the disaggregation model #' -#' \emph{fit_model} function takes a \emph{disag_data} object created by +#' \emph{disag_model} function takes a \emph{disag_data} object created by #' \code{\link{prepare_data}} and performs a Bayesian disaggregation fit. #' #' \strong{The model definition} @@ -43,7 +43,19 @@ #' #' #' @param data disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting -#' @param priors list of prior values +#' @param priors list of prior values: +#' \itemize{ +#' \item \code{priormean_intercept} +#' \item \code{priorsd_intercept} +#' \item \code{priormean_slope} +#' \item \code{priorsd_slope} +#' \item \code{prior_rho_min} +#' \item \code{prior_rho_prob} +#' \item \code{prior_sigma_max} +#' \item \code{prior_sigma_prob} +#' \item \code{prior_iideffect_sd_max} +#' \item \code{prior_iideffect_sd_prob} +#' } #' @param family likelihood function: \emph{gaussian}, \emph{binomial} or \emph{poisson} #' @param link link function: \emph{logit}, \emph{log} or \emph{identity} #' @param iterations number of iterations to run the optimisation for @@ -66,7 +78,7 @@ #' \item{data }{The \emph{disag_data} object used as an input to the model.} #' \item{model_setup }{A list of information on the model setup. Likelihood function (\emph{family}), link function(\emph{link}), logical: whether a field was used (\emph{field}) and logical: whether an iid effect was used (\emph{iid}).} #' -#' @name fit_model +#' @name disag_model #' @references Nanda et al. (2023) disaggregation: An R Package for Bayesian #' Spatial Disaggregation Modeling. #' @@ -105,43 +117,11 @@ #' test_data <- prepare_data(polygon_shapefile = spdf, #' covariate_rasters = cov_stack) #' -#' result <- fit_model(test_data, iterations = 2) +#' result <- disag_model(test_data, iterations = 2) #' } #' #' @export -fit_model <- function(data, - priors = NULL, - family = 'gaussian', - link = 'identity', - iterations = 100, - field = TRUE, - iid = TRUE, - hess_control_parscale = NULL, - hess_control_ndeps = 1e-4, - silent = TRUE) { - - .Deprecated(new = 'disag_model', msg = "'fit_model' will be removed in the next version. Please use 'disag_model' instead") - - model_output <- disag_model(data, - priors = priors, - family = family, - link = link, - iterations = iterations, - field = field, - iid = iid, - hess_control_parscale = hess_control_parscale, - hess_control_ndeps = hess_control_ndeps, - silent = silent) - - return(model_output) - - -} - -#' @export -#' @rdname fit_model - disag_model <- function(data, priors = NULL, family = 'gaussian', @@ -180,12 +160,9 @@ disag_model <- function(data, # Calc uncertainty using the fixed hessian from above. sd_out <- TMB::sdreport(obj, getJointPrecision = TRUE, hessian.fixed = hess) - - sd_out <- TMB::sdreport(obj, getJointPrecision = TRUE) - # Rename parameters to match layers - # Need to change in sd_out as well - # names(opt$par)[names(opt$par) == 'slope'] <- names(data$covariate_rasters) + names(sd_out$par.fixed)[names(sd_out$par.fixed) == "slope"] <- names(data$covariate_rasters) + names(opt$par)[names(opt$par) == "slope"] <- names(data$covariate_rasters) model_output <- list(obj = obj, opt = opt, @@ -356,8 +333,10 @@ make_model_object <- function(data, nu = 1 # Sort out mesh bits - spde <- (INLA::inla.spde2.matern(data$mesh, alpha = nu + 1)$param.inla)[c("M0", "M1", "M2")] - Apix <- fmesher::fm_evaluator(data$mesh, loc = data$coordsForFit)$proj$A + spde <- rSPDE::matern.operators(mesh = data$mesh, alpha = nu + 1, compute_higher_order = TRUE)$fem_mesh_matrices + spde[[4]] <- NULL + names(spde) <- c("M0", "M1", "M2") + Apix <- fmesher::fm_evaluator(data$mesh, loc = data$coords_for_fit)$proj$A n_s <- nrow(spde$M0) cov_matrix <- as.matrix(data$covariate_data[, (names(data$covariate_data) %in% names(data$covariate_rasters))]) @@ -423,7 +402,7 @@ make_model_object <- function(data, aggregation_values = data$aggregation_pixels, Apixel = Apix, spde = spde, - startendindex = data$startendindex, + start_end_index = data$start_end_index, polygon_response_data = data$polygon_data$response, response_sample_size = data$polygon_data$N, family = family_id, diff --git a/R/plotting.R b/R/plotting.R index f9b9a2b..57a2083 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -42,24 +42,18 @@ plot.disag_data <- function(x, which = c(1,2,3), ...) { return(invisible(plots)) } -#' Plot results of fitted model -#' -#' Plotting function for class \emph{disag_model} (the result of the disaggregation fitting). -#' -#' Produces two plots: results of the fixed effects and in-sample observed vs predicted plot. + +#' Convert results of the model ready for plotting #' #' @param x Object of class \emph{disag_model} to be plotted. -#' @param ... Further arguments to \emph{plot} function. #' -#' @return A list of two ggplot plots: results of the fixed effects and an in-sample observed vs predicted plot -#' -#' @import ggplot2 -#' @method plot disag_model +#' @return A list that contains: +#' \item{posteriors}{A data.frame of posteriors} +#' \item{data}{A data.frame of observed and predicted data} +#' \item{title}{The title of the observed vs. predicted plot} #' #' @export - - -plot.disag_model <- function(x, ...){ +plot_disag_model_data <- function(x){ parameter <- sd <- obs <- pred <- NULL posteriors <- as.data.frame(summary(x$sd_out, select = 'fixed')) @@ -74,15 +68,6 @@ plot.disag_model <- function(x, ...){ posteriors$parameter[grepl('slope', posteriors$parameter)] <- names(x$data$covariate_rasters) } - fixedeffects <- ggplot() + - geom_errorbar(posteriors, mapping = aes(x = parameter, ymin = mean - sd, - ymax = mean + sd), - width = 0.2, color = "blue") + - geom_point(posteriors, mapping = aes(x = parameter, y = mean)) + - facet_wrap( ~ type , scales = 'free') + - coord_flip() + - ggtitle("Parameters (excluding random effects)") - report <- x$obj$report() # Form of the observed and predicted results depends on the likelihood function used @@ -100,12 +85,80 @@ plot.disag_model <- function(x, ...){ title <- 'In sample performance: incidence rate' } - data <- data.frame(obs = observed_data, pred = predicted_data) + if (x$model_setup$iid){ + pars <- x$obj$env$last.par.best + iid_poly <- pars[names(pars) == "iideffect"] + pred_no_iid <- predicted_data / exp(iid_poly) + data <- data.frame(obs = observed_data, pred = predicted_data, pred_no_iid = pred_no_iid) + } else { + data <- data.frame(obs = observed_data, pred = predicted_data) + } + - obspred <- ggplot(data, aes(x = obs, y = pred)) + - geom_point() + - geom_abline(intercept = 0, slope = 1, color = 'blue') + - ggtitle(title) + return(list(posteriors = posteriors, + data = data, + title = title)) + +} + +#' Plot results of fitted model +#' +#' Plotting function for class \emph{disag_model} (the result of the disaggregation fitting). +#' +#' Produces two plots: results of the fixed effects and in-sample observed vs predicted plot. +#' +#' @param x Object of class \emph{disag_model} to be plotted. +#' @param include_iid logical. Whether or not to include predictions that include the +#' IID effect. +#' @param ... Further arguments to \emph{plot} function. +#' +#' @return A list of two ggplot plots: results of the fixed effects and an in-sample observed vs predicted plot +#' +#' @import ggplot2 +#' @method plot disag_model +#' +#' @export + +plot.disag_model <- function(x, include_iid = FALSE, ...){ + + mod_data <- plot_disag_model_data(x) + + parameter <- obs <- pred <- pred_no_iid <- NULL + + posteriors <- mod_data$posteriors + data <- mod_data$data + title <- mod_data$title + + fixedeffects <- ggplot() + + geom_errorbar(posteriors, mapping = aes(x = parameter, ymin = mean - sd, + ymax = mean + sd), + width = 0.2, color = "blue") + + geom_point(posteriors, mapping = aes(x = parameter, y = mean)) + + facet_wrap( ~ type , scales = 'free') + + coord_flip() + + ggtitle("Parameters (excluding random effects)") + + if (x$model_setup$iid){ + obspred <- ggplot(data, aes(x = obs, y = pred_no_iid, color = "Without IID")) + + geom_point() + + geom_abline(intercept = 0, slope = 1, color = 'blue') + + scale_color_manual(values = c("Without IID" = "black")) + + ggtitle(title) + + labs(x = "Observed", y = "Predicted", color = NULL) + + theme(legend.position.inside = c(0, 1), + legend.justification = c(0, 1)) + if (include_iid){ + obspred$scales$scales <- list() + obspred <- obspred + + geom_point(data = data, aes(x = obs, y = pred, color = "With IID")) + + scale_color_manual(values = c("Without IID" = "black", "With IID" = "red")) + } + } else { + obspred <- ggplot(data, aes(x = obs, y = pred)) + + geom_point() + + geom_abline(intercept = 0, slope = 1, color = 'blue') + + ggtitle(title) + } plots <- list(fixedeffects, obspred) print(cowplot::plot_grid(plotlist = plots)) @@ -156,8 +209,8 @@ plot_polygon_data <- function(polygon_shapefile, names) { # Rename the response variable for plotting shp <- polygon_shapefile - shp <- dplyr::rename(shp, 'response' = names$response_var) - shp <- dplyr::rename(shp, 'area_id' = names$id_var) + names(shp)[names(shp) == names$id_var] <- 'area_id' + names(shp)[names(shp) == names$response_var] <- 'response' area_id <- long <- lat <- group <- response <- NULL stopifnot(inherits(shp, 'sf')) diff --git a/R/predict.R b/R/predict.R index d1b1097..c692ebc 100644 --- a/R/predict.R +++ b/R/predict.R @@ -4,21 +4,22 @@ #' predicts mean and uncertainty maps. #' #' To predict over a different spatial extent to that used in the model, -#' a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. +#' a SpatRaster covering the region to make predictions over is passed to the argument \emph{new_data}. #' If this is not given predictions are made over the data used in the fit. #' #' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. #' -#' For the uncertainty calculations, the number of the realisations and the size of the confidence interval to be calculated +#' For the uncertainty calculations, the number of the realisations and the size of the credible interval to be calculated #' are given by the arguments \emph{N} and \emph{CI} respectively. #' #' @param object disag_model object returned by disag_model function. -#' @param newdata If NULL, predictions are made using the data in model_output. +#' @param new_data If NULL, predictions are made using the data in model_output. #' If this is a raster stack or brick, predictions will be made over this data. #' @param predict_iid logical. If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE. #' @param N Number of realisations. Default: 100. -#' @param CI Confidence interval to be calculated from the realisations. Default: 0.95. +#' @param CI Credible interval to be calculated from the realisations. Default: 0.95. #' @param ... Further arguments passed to or from other methods. +#' @param newdata Deprecated. #' #' @return An object of class \emph{disag_prediction} which consists of a list of two objects: #' \item{mean_prediction }{List of: @@ -45,11 +46,16 @@ #' @export -predict.disag_model <- function(object, newdata = NULL, predict_iid = FALSE, N = 100, CI = 0.95, ...) { +predict.disag_model <- function(object, new_data = NULL, predict_iid = FALSE, N = 100, CI = 0.95, newdata = NULL, ...) { - mean_prediction <- predict_model(object, newdata = newdata, predict_iid) + if (!is.null(newdata) && missing(new_data)) { + new_data <- newdata + message("newdata is deprecated and will be removed in a future version - please use new_data instead") + } + + mean_prediction <- predict_model(object, new_data = new_data, predict_iid) - uncertainty_prediction <- predict_uncertainty(object, newdata = newdata, predict_iid, N, CI) + uncertainty_prediction <- predict_uncertainty(object, new_data = new_data, predict_iid, N, CI) prediction <- list(mean_prediction = mean_prediction, uncertainty_prediction = uncertainty_prediction) @@ -68,15 +74,16 @@ predict.disag_model <- function(object, newdata = NULL, predict_iid = FALSE, N = #' to the linear predictor. #' #' To predict over a different spatial extent to that used in the model, -#' a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. +#' a SpatRaster covering the region to make predictions over is passed to the argument \emph{new_data}. #' If this is not given predictions are made over the data used in the fit. #' #' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. #' #' @param model_output disag_model object returned by disag_model function -#' @param newdata If NULL, predictions are made using the data in model_output. -#' If this is a raster stack or brick, predictions will be made over this data. Default NULL. +#' @param new_data If NULL, predictions are made using the data in model_output. +#' If this is a SpatRaster, predictions will be made over this data. Default NULL. #' @param predict_iid If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE. +#' @param newdata Deprecated. #' #' @return The mean prediction, which is a list of: #' \itemize{ @@ -95,9 +102,14 @@ predict.disag_model <- function(object, newdata = NULL, predict_iid = FALSE, N = #' #' @export -predict_model <- function(model_output, newdata = NULL, predict_iid = FALSE) { +predict_model <- function(model_output, new_data = NULL, predict_iid = FALSE, newdata = NULL) { - objects_for_prediction <- setup_objects(model_output, newdata = newdata, predict_iid) + if (!is.null(newdata) && missing(new_data)) { + new_data <- newdata + message("newdata is deprecated and will be removed in a future version - please use new_data instead") + } + + objects_for_prediction <- setup_objects(model_output, new_data = new_data, predict_iid) pars <- model_output$obj$env$last.par.best pars <- split(pars, names(pars)) @@ -118,20 +130,21 @@ predict_model <- function(model_output, newdata = NULL, predict_iid = FALSE) { #' Function returns a SpatRaster of the realisations as well as the upper and lower credible interval rasters. #' #' To predict over a different spatial extent to that used in the model, -#' a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. +#' a SpatRaster covering the region to make predictions over is passed to the argument \emph{new_data}. #' If this is not given predictions are made over the data used in the fit. #' #' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. #' -#' The number of the realisations and the size of the confidence interval to be calculated. +#' The number of the realisations and the size of the credible interval to be calculated. #' are given by the arguments \emph{N} and \emph{CI} respectively. #' #' @param model_output disag_model object returned by disag_model function. -#' @param newdata If NULL, predictions are made using the data in model_output. +#' @param new_data If NULL, predictions are made using the data in model_output. #' If this is a raster stack or brick, predictions will be made over this data. Default NULL. #' @param predict_iid If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE. #' @param N number of realisations. Default: 100. -#' @param CI confidence interval. Default: 0.95. +#' @param CI credible interval. Default: 0.95. +#' @param newdata Deprecated. #' #' @return The uncertainty prediction, which is a list of: #' \itemize{ @@ -148,9 +161,14 @@ predict_model <- function(model_output, newdata = NULL, predict_iid = FALSE) { #' #' @export -predict_uncertainty <- function(model_output, newdata = NULL, predict_iid = FALSE, N = 100, CI = 0.95) { +predict_uncertainty <- function(model_output, new_data = NULL, predict_iid = FALSE, N = 100, CI = 0.95, newdata = NULL) { + + if (!is.null(newdata) && missing(new_data)) { + new_data <- newdata + message("newdata is deprecated and will be removed in a future version - please use new_data instead") + } - objects_for_prediction <- setup_objects(model_output, newdata = newdata, predict_iid) + objects_for_prediction <- setup_objects(model_output, new_data = new_data, predict_iid) parameters <- model_output$obj$env$last.par.best @@ -218,34 +236,34 @@ getCoords <- function(data) { } # Helper to check and sort out new raster data. -check_newdata <- function(newdata, model_output){ - if(is.null(newdata)) return(NULL) - if(!is.null(newdata)){ - if(!(inherits(newdata, c('SpatRaster')))){ - stop('newdata should be NULL or a SpatRaster') +check_new_data <- function(new_data, model_output){ + if(is.null(new_data)) return(NULL) + if(!is.null(new_data)){ + if(!(inherits(new_data, c('SpatRaster')))){ + stop('new_data should be NULL or a SpatRaster') } - if(!all(names(model_output$data$covariate_rasters) %in% names(newdata))){ - stop('All covariates used to fit the model must be in newdata') + if(!all(names(model_output$data$covariate_rasters) %in% names(new_data))){ + stop('All covariates used to fit the model must be in new_data') } # Take just the covariates we need and in the right order - newdata <- newdata[[names(model_output$data$covariate_rasters)]] + new_data <- new_data[[names(model_output$data$covariate_rasters)]] } - return(newdata) + return(new_data) } # Function to setup covariates, field and iid objects for prediction -setup_objects <- function(model_output, newdata = NULL, predict_iid = FALSE) { +setup_objects <- function(model_output, new_data = NULL, predict_iid = FALSE) { - newdata <- check_newdata(newdata, model_output) + new_data <- check_new_data(new_data, model_output) # Pull out original data data <- model_output$data # Decide which covariates to predict over - if(is.null(newdata)){ + if(is.null(new_data)){ covariates <- data$covariate_rasters } else { - covariates <- newdata + covariates <- new_data } data$covariate_rasters <- covariates @@ -256,8 +274,8 @@ setup_objects <- function(model_output, newdata = NULL, predict_iid = FALSE) { } if(model_output$model_setup$field) { - if(is.null(newdata)) { - coords <- data$coordsForPrediction + if(is.null(new_data)) { + coords <- data$coords_for_prediction } else { coords <- getCoords(data) } diff --git a/R/prepare_data.R b/R/prepare_data.R index f95cd31..0d7014e 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -1,7 +1,7 @@ #' Prepare data for disaggregation modelling #' #' \emph{prepare_data} function is used to extract all the data required for fitting a disaggregation model. -#' Designed to be used in the \emph{disaggregation::fit_model} function. +#' Designed to be used in the \emph{disaggregation::disag_model} function. #' #' Takes a sf object with the response data and a SpatRaster of covariates. #' @@ -32,9 +32,12 @@ #' @param id_var Name of column in sf object with the polygon id. #' @param response_var Name of column in sf object with the response data. #' @param sample_size_var For survey data, name of column in sf object (if it exists) with the sample size data. -#' @param mesh.args list of parameters that control the mesh structure with the same names as used by INLA. -#' @param na.action logical. If TRUE, NAs in response will be removed, covariate NAs will be given the median value, aggregation NAs will be set to zero. Default FALSE (NAs in response or covariate data within the polygons will give errors). -#' @param makeMesh logical. If TRUE, build INLA mesh, takes some time. Default TRUE. +#' @param mesh_args list of parameters that control the mesh structure with the same names as used by INLA. +#' @param na_action logical. If TRUE, NAs in response will be removed, covariate NAs will be given the median value, aggregation NAs will be set to zero. Default FALSE (NAs in response or covariate data within the polygons will give errors). +#' @param make_mesh logical. If TRUE, build INLA mesh, takes some time. Default TRUE. +#' @param mesh.args Deprecated. +#' @param na.action Deprecated. +#' @param makeMesh Deprecated. #' @param ncores Deprecated. #' #' @return A list is returned of class \code{disag_data}. @@ -45,9 +48,9 @@ #' \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} #' \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} #' \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} -#' \item{coordsForFit }{A matrix with two columns of x, y coordinates of pixels within the polygons. Used to make the spatial field.} -#' \item{coordsForPrediction }{A matrix with two columns of x, y coordinates of pixels in the whole Raster. Used to make predictions.} -#' \item{startendindex }{A matrix with two columns containing the start and end index of the pixels within each polygon.} +#' \item{coords_for_fit }{A matrix with two columns of x, y coordinates of pixels within the polygons. Used to make the spatial field.} +#' \item{coords_for_prediction }{A matrix with two columns of x, y coordinates of pixels in the whole Raster. Used to make predictions.} +#' \item{start_end_index }{A matrix with two columns containing the start and end index of the pixels within each polygon.} #' \item{mesh }{A INLA mesh to be used for the spatial field of the disaggregation model.} #' @import splancs #' @import utils @@ -91,37 +94,51 @@ prepare_data <- function(polygon_shapefile, id_var = 'area_id', response_var = 'response', sample_size_var = NULL, + mesh_args = NULL, + na_action = FALSE, + make_mesh = TRUE, mesh.args = NULL, - na.action = FALSE, - makeMesh = TRUE, + na.action = NULL, + makeMesh = NULL, ncores = NULL) { - if (!missing("ncores")) - warning("The ncores argument has been deprecated") + # Deal with deprecated parameters + + if (!is.null(na.action) && missing(na_action)) { + na_action <- na.action + message("na.action is deprecated and will be removed in a future version - please use na_action instead") + } + + if (!is.null(mesh.args) && missing(mesh_args)) { + mesh_args <- mesh.args + message("mesh.args is deprecated and will be removed in a future version - please use mesh_args instead") + } + + if (!is.null(makeMesh) && missing(make_mesh)) { + make_mesh <- makeMesh + message("makeMesh is deprecated and will be removed in a future version - please use make_mesh instead") + } + + if (!missing("ncores")) + warning("ncores is deprecated and will be removed in a future version") stopifnot(inherits(polygon_shapefile, 'sf')) stopifnot(inherits(covariate_rasters, 'SpatRaster')) if(!is.null(aggregation_raster)) stopifnot(inherits(aggregation_raster, 'SpatRaster')) stopifnot(inherits(id_var, 'character')) stopifnot(inherits(response_var, 'character')) - if(!is.null(mesh.args)) stopifnot(inherits(mesh.args, 'list')) + if(!is.null(mesh_args)) stopifnot(inherits(mesh_args, 'list')) # Check for NAs in response data na_rows <- is.na(polygon_shapefile[, response_var, drop = TRUE]) if(sum(na_rows) != 0) { - if(na.action) { + if(na_action) { polygon_shapefile <- polygon_shapefile[!na_rows, ] } else { stop('There are NAs in the response data. Please deal with these, or set na.action = TRUE') } } - polygon_data <- getPolygonData(polygon_shapefile, id_var, response_var, sample_size_var) - - - # Save raster layer names so we can reassign it to make sure names don't change. - cov_names <- names(covariate_rasters) - # If no aggregation raster is given, use a 'unity' raster if(is.null(aggregation_raster)) { aggregation_raster <- covariate_rasters[[1]] @@ -129,6 +146,39 @@ prepare_data <- function(polygon_shapefile, } names(aggregation_raster) <- 'aggregation_raster' + # Check for polygons with zero aggregation + aggregation_sum <- terra::extract(aggregation_raster, polygon_shapefile, cells = TRUE, na.rm = TRUE, ID = TRUE, fun = "sum") + zero_aggregation <- aggregation_sum[aggregation_sum[, 2] == 0,] + if (nrow(zero_aggregation) > 0){ + zero_polygons <- polygon_shapefile[zero_aggregation$ID,] + if (na_action){ + for (p in zero_polygons[[id_var]]){ + message(paste0(p, " has zero aggregation values and has been removed from the response data")) + } + polygon_shapefile <- polygon_shapefile[-zero_aggregation$ID,] + } else { + for (p in zero_polygons[[id_var]]){ + message(paste0(p, " has zero aggregation values")) + } + stop("Please remove the polygons from the response data or set na.action = TRUE") + } + } + + polygon_data <- getPolygonData(polygon_shapefile, id_var, response_var, sample_size_var) + + # Check for non-numeric covariate values + cv_df <- terra::as.data.frame(covariate_rasters, xy = FALSE) + cv_classes <- unlist(lapply(cv_df, class)) + cv_check <- all(as.vector(cv_classes) == "numeric") + if (!cv_check){ + non_numeric <- which(cv_classes != "numeric") + for (raster in non_numeric){ + warning(paste0("The values of ", names(covariate_rasters)[raster], " are not numeric")) + } + } + + # Save raster layer names so we can reassign it to make sure names don't change. + cov_names <- names(covariate_rasters) covariate_rasters <- c(covariate_rasters, aggregation_raster) covariate_data <- terra::extract(covariate_rasters, terra::vect(polygon_shapefile), cells=TRUE, na.rm=TRUE, ID=TRUE) @@ -150,31 +200,31 @@ prepare_data <- function(polygon_shapefile, # Check for NAs in population data if(sum(is.na(aggregation_pixels)) != 0) { - if(na.action) { + if(na_action) { aggregation_pixels[is.na(aggregation_pixels)] <- 0 } else { - stop('There are NAs in the aggregation rasters within polygons. Please deal with these, or set na.action = TRUE') + stop('There are NAs in the aggregation rasters within polygons. Please deal with these, or set na_action = TRUE') } } # Check for NAs in covariate data if(sum(is.na(covariate_data)) != 0) { - if(na.action) { + if(na_action) { cov_filter <- !(names(covariate_data) %in% c(id_var,'cell')) covariate_data[ , cov_filter] <- sapply(covariate_data[ , cov_filter], function(x) { x[is.na(x)] <- stats::median(x, na.rm = T); return(x) }) } else { - stop('There are NAs in the covariate rasters within polygons. Please deal with these, or set na.action = TRUE') + stop('There are NAs in the covariate rasters within polygons. Please deal with these, or set na_action = TRUE') } } - coordsForFit <- extractCoordsForMesh(covariate_rasters, selectIds = covariate_data$cell) + coords_for_fit <- extractCoordsForMesh(covariate_rasters, selectIds = covariate_data$cell) - coordsForPrediction <- extractCoordsForMesh(covariate_rasters) + coords_for_prediction <- extractCoordsForMesh(covariate_rasters) - startendindex <- getStartendindex(covariate_data, polygon_data, id_var = id_var) + start_end_index <- getStartendindex(covariate_data, polygon_data, id_var = id_var) - if(makeMesh) { - mesh <- build_mesh(polygon_shapefile, mesh.args) + if(make_mesh) { + mesh <- build_mesh(polygon_shapefile, mesh_args) } else { mesh <- NULL message("A mesh is not being built. You will not be able to run a spatial model without a mesh.") @@ -186,9 +236,9 @@ prepare_data <- function(polygon_shapefile, polygon_data = polygon_data, covariate_data = covariate_data, aggregation_pixels = aggregation_pixels, - coordsForFit = coordsForFit, - coordsForPrediction = coordsForPrediction, - startendindex = startendindex, + coords_for_fit = coords_for_fit, + coords_for_prediction = coords_for_prediction, + start_end_index = start_end_index, mesh = mesh) class(disag_data) <- c('disag_data', 'list') @@ -205,9 +255,12 @@ prepare_data <- function(polygon_shapefile, #' @param polygon_data data.frame with two columns: polygon id and response #' @param covariate_data data.frame with cell id, polygon id and covariate columns #' @param aggregation_pixels vector with value of aggregation raster at each pixel -#' @param coordsForFit coordinates of the covariate data points within the polygons in x -#' @param coordsForPrediction coordinates of the covariate data points in the whole raster extent -#' @param startendindex matrix containing the start and end index for each polygon +#' @param coords_for_fit coordinates of the covariate data points within the polygons in x +#' @param coords_for_prediction coordinates of the covariate data points in the whole raster extent +#' @param start_end_index matrix containing the start and end index for each polygon +#' @param coordsForFit Deprecated. +#' @param coordsForPrediction Deprecated. +#' @param startendindex Deprecated. #' @param mesh inla.mesh object to use in the fit #' #' @return A list is returned of class \code{disag_data}. @@ -218,9 +271,9 @@ prepare_data <- function(polygon_shapefile, #' \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} #' \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} #' \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} -#' \item{coordsForFit }{A matrix with two columns of x, y coordinates of pixels within the polygons. Used to make the spatial field.} -#' \item{coordsForPrediction }{A matrix with two columns of x, y coordinates of pixels in the whole Raster. Used to make predictions.} -#' \item{startendindex }{A matrix with two columns containing the start and end index of the pixels within each polygon.} +#' \item{coords_for_fit }{A matrix with two columns of x, y coordinates of pixels within the polygons. Used to make the spatial field.} +#' \item{coords_for_prediction }{A matrix with two columns of x, y coordinates of pixels in the whole Raster. Used to make predictions.} +#' \item{start_end_index }{A matrix with two columns containing the start and end index of the pixels within each polygon.} #' \item{mesh }{A INLA mesh to be used for the spatial field of the disaggregation model.} #' #' @name as.disag_data @@ -234,10 +287,29 @@ as.disag_data <- function(polygon_shapefile, polygon_data, covariate_data, aggregation_pixels, - coordsForFit, - coordsForPrediction, - startendindex, - mesh = NULL) { + coords_for_fit, + coords_for_prediction, + start_end_index, + mesh = NULL, + coordsForFit = NULL, + coordsForPrediction = NULL, + startendindex = NULL) { + + # Handle deprecated variables + if (!is.null(coordsForFit) && missing(coords_for_fit)) { + coords_for_fit <- coordsForFit + message("coordsForFit is deprecated and will be removed in a future version - please use coords_for_fit instead") + } + + if (!is.null(coordsForPrediction) && missing(coords_for_prediction)) { + coords_for_prediction <- coordsForPrediction + message("coordsForPrediction is deprecated and will be removed in a future version - please use coords_for_prediction instead") + } + + if (!is.null(startendindex) && missing(start_end_index)) { + start_end_index <- startendindex + message("startendindex is deprecated and will be removed in a future version - please use start_end_index instead") + } stopifnot(inherits(polygon_shapefile, 'sf')) stopifnot(inherits(shapefile_names, 'list')) @@ -245,9 +317,9 @@ as.disag_data <- function(polygon_shapefile, stopifnot(inherits(polygon_data, 'data.frame')) stopifnot(inherits(covariate_data, 'data.frame')) stopifnot(inherits(aggregation_pixels, 'numeric')) - stopifnot(inherits(coordsForFit, 'matrix')) - stopifnot(inherits(coordsForPrediction, 'matrix')) - stopifnot(inherits(startendindex, 'matrix')) + stopifnot(inherits(coords_for_fit, 'matrix')) + stopifnot(inherits(coords_for_prediction, 'matrix')) + stopifnot(inherits(start_end_index, 'matrix')) if(!is.null(mesh)) { stopifnot(inherits(mesh, 'inla.mesh')) } @@ -258,9 +330,9 @@ as.disag_data <- function(polygon_shapefile, polygon_data = polygon_data, covariate_data = covariate_data, aggregation_pixels = aggregation_pixels, - coordsForFit = coordsForFit, - coordsForPrediction = coordsForPrediction, - startendindex = startendindex, + coords_for_fit = coords_for_fit, + coords_for_prediction = coords_for_prediction, + start_end_index = start_end_index, mesh = mesh) class(disag_data) <- c('disag_data', 'list') diff --git a/R/summary.R b/R/summary.R index 2c34c07..f725db8 100644 --- a/R/summary.R +++ b/R/summary.R @@ -18,7 +18,10 @@ summary.disag_model <- function(object, ...) { pred <- obs <- NULL - model_params <- summary(object$sd_out, select = 'fixed') + sd_out <- object$sd_out + names(sd_out$par.fixed)[names(sd_out$par.fixed) == "slope"] <- names(object$data$covariate_rasters) + + model_params <- summary(sd_out, select = 'fixed') report <- object$obj$report() nll <- report$nll @@ -118,13 +121,14 @@ summary.disag_data <- function(object, ...) { n_polygons <- nrow(object$polygon_shapefile) n_covariates <- as.integer(terra::nlyr(object$covariate_rasters)) - cat(paste("They data contains", n_polygons, "polygons and", nrow(object$covariate_data), "pixels\n")) + cat(paste("The data contains", n_polygons, "polygons and", nrow(object$covariate_data), "pixels\n")) cat(paste("The largest polygon contains", max(table(object$covariate_data[ , object$shapefile_names$id_var])), "pixels", "and the smallest polygon contains", min(table(object$covariate_data[ , object$shapefile_names$id_var])), "pixels\n")) cat(paste("There are", n_covariates, "covariates\n")) + covariate_summary <- summary(object$covariate_data[ , names(object$covariate_rasters)]) cat("\nCovariate summary:\n") diff --git a/README.md b/README.md index c0dd5ac..ab4445f 100644 --- a/README.md +++ b/README.md @@ -34,34 +34,34 @@ data_for_model <- prepare_data(polygon_shapefile = shps, ### Input data * A SpatRaster of covariate rasters to be used in the model (covariate_rasters) -* A sf (polygon_shapefile) containing at least two columns: one with the id for the polygons (id_var) and one with the response count data (response_var); for binomial data, i.e survey data, it can also contain a sample size column (sample_size_var). +* A sf (polygon_shapefile) containing at least two columns: one with the id for the polygons (id_var) and one with the response count data (response_var); for binomial data, i.e survey data, it can also contain a sample size column (sample_size_var). * (Optional) SpatRaster used to aggregate the pixel level predictions (aggregation_raster) to polygon level (usually population). If this is not supplied a uniform raster will be used ### Controlling the mesh -The argument mesh.args in prepare_data allows you to supply a list of INLA mesh parameter to control the mesh used for the spatial field +The argument mesh_args in prepare_data allows you to supply a list of INLA mesh parameters to control the mesh used for the spatial field ```R data_for_model <- prepare_data(shps, covariate_stack, - mesh.args = list(max.edge = c(0.2, 8), - cut = 0.05, + mesh_args = list(max.edge = c(0.2, 8), + cutoff = 0.05, offset = c(1, 0.7))) ``` ### Dealing with NAs -There is an na.action flag that is automatically off. If there are any NAs in your response or covariate data within the polygons the prepare_data method will fail. We advise you to sort out the NAs in your data yourself, however, if you want the function to automatically deal with NAs you can set na.action = TRUE. This will remove any polygons that have NAs as a response, set any aggregation pixels with NA to zero and set covariate NAs pixels to the median value for the that covariate. +There is an na_action flag that is automatically off. If there are any NAs in your response or covariate data within the polygons the prepare_data method will fail. We advise you to sort out the NAs in your data yourself, however, if you want the function to automatically deal with NAs you can set na_action = TRUE. This will remove any polygons that have NAs as a response or a population of zero, set any aggregation pixels with NA to zero and set covariate NAs pixels to the median value for the that covariate. ```R data_for_model <- prepare_data(shps, covariate_stack, aggregation_raster = population_raster, - na.action = TRUE) + na_action = TRUE) ``` ## Model fitting -Function fit_model takes data structure returned by prepare_data and fits a TMB disaggregation model. Here you can specify priors, likelihood function, link function and whether to include a field or iid effect (default includes both) +Function disag_model takes the data structure returned by prepare_data and fits a TMB disaggregation model. Here you can specify priors, likelihood function, link function and whether to include a field or iid effect (default includes both). ```R model_result <- disag_model(data_for_model, @@ -87,7 +87,7 @@ model_result <- disag_model(data_for_model, ### Predict model -Function predict takes data structure returned by fit_model to predict model results and uncertainty. +Function predict takes data structure returned by disag_model to predict model results and uncertainty. ```R prediction <- predict(model_result) diff --git a/man/as.disag_data.Rd b/man/as.disag_data.Rd index 803d050..35dec38 100644 --- a/man/as.disag_data.Rd +++ b/man/as.disag_data.Rd @@ -11,10 +11,13 @@ as.disag_data( polygon_data, covariate_data, aggregation_pixels, - coordsForFit, - coordsForPrediction, - startendindex, - mesh = NULL + coords_for_fit, + coords_for_prediction, + start_end_index, + mesh = NULL, + coordsForFit = NULL, + coordsForPrediction = NULL, + startendindex = NULL ) } \arguments{ @@ -30,13 +33,19 @@ as.disag_data( \item{aggregation_pixels}{vector with value of aggregation raster at each pixel} -\item{coordsForFit}{coordinates of the covariate data points within the polygons in x} +\item{coords_for_fit}{coordinates of the covariate data points within the polygons in x} -\item{coordsForPrediction}{coordinates of the covariate data points in the whole raster extent} +\item{coords_for_prediction}{coordinates of the covariate data points in the whole raster extent} -\item{startendindex}{matrix containing the start and end index for each polygon} +\item{start_end_index}{matrix containing the start and end index for each polygon} \item{mesh}{inla.mesh object to use in the fit} + +\item{coordsForFit}{Deprecated.} + +\item{coordsForPrediction}{Deprecated.} + +\item{startendindex}{Deprecated.} } \value{ A list is returned of class \code{disag_data}. @@ -47,9 +56,9 @@ The list of class \code{disag_data} contains: \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} - \item{coordsForFit }{A matrix with two columns of x, y coordinates of pixels within the polygons. Used to make the spatial field.} - \item{coordsForPrediction }{A matrix with two columns of x, y coordinates of pixels in the whole Raster. Used to make predictions.} - \item{startendindex }{A matrix with two columns containing the start and end index of the pixels within each polygon.} + \item{coords_for_fit }{A matrix with two columns of x, y coordinates of pixels within the polygons. Used to make the spatial field.} + \item{coords_for_prediction }{A matrix with two columns of x, y coordinates of pixels in the whole Raster. Used to make predictions.} + \item{start_end_index }{A matrix with two columns containing the start and end index of the pixels within each polygon.} \item{mesh }{A INLA mesh to be used for the spatial field of the disaggregation model.} } \description{ diff --git a/man/build_mesh.Rd b/man/build_mesh.Rd index 77561ca..0bcbf05 100644 --- a/man/build_mesh.Rd +++ b/man/build_mesh.Rd @@ -4,14 +4,17 @@ \alias{build_mesh} \title{Build mesh for disaggregaton model} \usage{ -build_mesh(shapes, mesh.args = NULL) +build_mesh(shapes, mesh_args = NULL, mesh.args = NULL) } \arguments{ \item{shapes}{sf covering the region under investigation.} -\item{mesh.args}{list of parameters that control the mesh structure. \emph{convex}, \emph{concave} and \emph{resolution}, -to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cut} and \emph{offset}, to control the mesh itself, -with the parameters having the same meaning as in the INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}.} +\item{mesh_args}{list of parameters that control the mesh structure. \emph{convex}, \emph{concave} and \emph{resolution}, +to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cutoff} and \emph{offset}, to control the mesh itself, +with the parameters having the same meaning as in the INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. +\emph{cut} has been deprecated - use \emph{cutoff} instead.} + +\item{mesh.args}{Deprecated.} } \value{ An inla.mesh object @@ -25,11 +28,11 @@ and a coarser mesh outside. This speeds up computation time by only having a ver and having a small region outside with a coarser mesh to avoid edge effects. Six mesh parameters can be specified as arguments: \emph{convex}, \emph{concave} and \emph{resolution}, -to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cut} and \emph{offset}, to control the mesh itself, -with the names meaning the same as used by INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. +to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cutoff} and \emph{offset}, to control the mesh itself, +with the names meaning the same as used by the fmesher functions \emph{fm_nonconvex_hull_inla} and \emph{fm_mesh_2d}. Defaults are: -pars <- list(convex = -0.01, concave = -0.5, resolution = 300, max.edge = c(3.0, 8), cut = 0.4, offset = c(1, 15)). +pars <- list(convex = -0.01, concave = -0.5, resolution = 300, max.edge = c(3.0, 8), cutoff = 0.4, offset = c(1, 15)). } \examples{ \dontrun{ diff --git a/man/fit_model.Rd b/man/disag_model.Rd similarity index 91% rename from man/fit_model.Rd rename to man/disag_model.Rd index 771fac7..41ba149 100644 --- a/man/fit_model.Rd +++ b/man/disag_model.Rd @@ -1,154 +1,152 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fit_model.R -\name{fit_model} -\alias{fit_model} -\alias{disag_model} -\title{Fit the disaggregation model} -\usage{ -fit_model( - data, - priors = NULL, - family = "gaussian", - link = "identity", - iterations = 100, - field = TRUE, - iid = TRUE, - hess_control_parscale = NULL, - hess_control_ndeps = 1e-04, - silent = TRUE -) - -disag_model( - data, - priors = NULL, - family = "gaussian", - link = "identity", - iterations = 100, - field = TRUE, - iid = TRUE, - hess_control_parscale = NULL, - hess_control_ndeps = 1e-04, - silent = TRUE -) -} -\arguments{ -\item{data}{disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting} - -\item{priors}{list of prior values} - -\item{family}{likelihood function: \emph{gaussian}, \emph{binomial} or \emph{poisson}} - -\item{link}{link function: \emph{logit}, \emph{log} or \emph{identity}} - -\item{iterations}{number of iterations to run the optimisation for} - -\item{field}{logical. Flag the spatial field on or off} - -\item{iid}{logical. Flag the iid effect on or off} - -\item{hess_control_parscale}{Argument to scale parameters during the calculation of the Hessian. -Must be the same length as the number of parameters. See \code{\link[stats]{optimHess}} for details.} - -\item{hess_control_ndeps}{Argument to control step sizes during the calculation of the Hessian. -Either length 1 (same step size applied to all parameters) or the same length as the number of parameters. -Default is 1e-3, try setting a smaller value if you get NaNs in the standard error of the parameters. -See \code{\link[stats]{optimHess}} for details.} - -\item{silent}{logical. Suppress verbose output.} -} -\value{ -A list is returned of class \code{disag_model}. -The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_model}. -The list of class \code{disag_model} contains: - \item{obj }{The TMB model object returned by \code{\link[TMB]{MakeADFun}}.} - \item{opt }{The optimized model object returned by \code{\link[stats]{nlminb}}.} - \item{sd_out }{The TMB object returned by \code{\link[TMB]{sdreport}}.} - \item{data }{The \emph{disag_data} object used as an input to the model.} - \item{model_setup }{A list of information on the model setup. Likelihood function (\emph{family}), link function(\emph{link}), logical: whether a field was used (\emph{field}) and logical: whether an iid effect was used (\emph{iid}).} -} -\description{ -\emph{fit_model} function takes a \emph{disag_data} object created by -\code{\link{prepare_data}} and performs a Bayesian disaggregation fit. -} -\details{ -\strong{The model definition} - -The disaggregation model makes predictions at the pixel level: -\deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} - -And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): -\deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} -\deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} - -The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): -\itemize{ - \item Gaussian: - If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where - \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} - \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. - \item Binomial: - For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. - \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. - \item Poisson: - \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. -} - -Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field -are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field -where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect - -The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. -The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. -The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. -These are specified as strings. - -The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. - -The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. - -The \emph{silent} argument can be used to publish/suppress verbose output. Default TRUE. -} -\examples{ -\dontrun{ -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i \%\% n_polygon_per_side != 0, i \%\% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), - c(ymax, ymax, ymin, ymin, ymax))) -} - -polys <- lapply(polygons,sf::st_polygon) -N <- floor(runif(n_polygons, min = 1, max = 100)) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) - -spdf <- sf::st_sf(response_df, geometry = polys) - -# Create raster stack -r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -terra::ext(r) <- terra::ext(spdf) -r[] <- sapply(1:terra::ncell(r), function(x){ -rnorm(1, ifelse(x \%\% n_pixels_per_side != 0, x \%\% n_pixels_per_side, n_pixels_per_side), 3))} -r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -terra::ext(r2) <- terra::ext(spdf) -r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- c(r, r2) -names(cov_stack) <- c('layer1', 'layer2') - -test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack) - - result <- fit_model(test_data, iterations = 2) - } - -} -\references{ -Nanda et al. (2023) disaggregation: An R Package for Bayesian -Spatial Disaggregation Modeling. -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fit_model.R +\name{disag_model} +\alias{disag_model} +\title{Fit the disaggregation model} +\usage{ +disag_model( + data, + priors = NULL, + family = "gaussian", + link = "identity", + iterations = 100, + field = TRUE, + iid = TRUE, + hess_control_parscale = NULL, + hess_control_ndeps = 1e-04, + silent = TRUE +) +} +\arguments{ +\item{data}{disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting} + +\item{priors}{list of prior values: +\itemize{ +\item \code{priormean_intercept} +\item \code{priorsd_intercept} +\item \code{priormean_slope} +\item \code{priorsd_slope} +\item \code{prior_rho_min} +\item \code{prior_rho_prob} +\item \code{prior_sigma_max} +\item \code{prior_sigma_prob} +\item \code{prior_iideffect_sd_max} +\item \code{prior_iideffect_sd_prob} + }} + +\item{family}{likelihood function: \emph{gaussian}, \emph{binomial} or \emph{poisson}} + +\item{link}{link function: \emph{logit}, \emph{log} or \emph{identity}} + +\item{iterations}{number of iterations to run the optimisation for} + +\item{field}{logical. Flag the spatial field on or off} + +\item{iid}{logical. Flag the iid effect on or off} + +\item{hess_control_parscale}{Argument to scale parameters during the calculation of the Hessian. +Must be the same length as the number of parameters. See \code{\link[stats]{optimHess}} for details.} + +\item{hess_control_ndeps}{Argument to control step sizes during the calculation of the Hessian. +Either length 1 (same step size applied to all parameters) or the same length as the number of parameters. +Default is 1e-3, try setting a smaller value if you get NaNs in the standard error of the parameters. +See \code{\link[stats]{optimHess}} for details.} + +\item{silent}{logical. Suppress verbose output.} +} +\value{ +A list is returned of class \code{disag_model}. +The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_model}. +The list of class \code{disag_model} contains: + \item{obj }{The TMB model object returned by \code{\link[TMB]{MakeADFun}}.} + \item{opt }{The optimized model object returned by \code{\link[stats]{nlminb}}.} + \item{sd_out }{The TMB object returned by \code{\link[TMB]{sdreport}}.} + \item{data }{The \emph{disag_data} object used as an input to the model.} + \item{model_setup }{A list of information on the model setup. Likelihood function (\emph{family}), link function(\emph{link}), logical: whether a field was used (\emph{field}) and logical: whether an iid effect was used (\emph{iid}).} +} +\description{ +\emph{disag_model} function takes a \emph{disag_data} object created by +\code{\link{prepare_data}} and performs a Bayesian disaggregation fit. +} +\details{ +\strong{The model definition} + +The disaggregation model makes predictions at the pixel level: +\deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} + +And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): +\deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} +\deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} + +The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): +\itemize{ + \item Gaussian: + If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where + \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} + \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. + \item Binomial: + For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. + \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. + \item Poisson: + \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. +} + +Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field +are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field +where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field +where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect + +The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. +The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. +The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. +These are specified as strings. + +The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. + +The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. + +The \emph{silent} argument can be used to publish/suppress verbose output. Default TRUE. +} +\examples{ +\dontrun{ +polygons <- list() +n_polygon_per_side <- 10 +n_polygons <- n_polygon_per_side * n_polygon_per_side +n_pixels_per_side <- n_polygon_per_side * 2 + +for(i in 1:n_polygons) { + row <- ceiling(i/n_polygon_per_side) + col <- ifelse(i \%\% n_polygon_per_side != 0, i \%\% n_polygon_per_side, n_polygon_per_side) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} + +polys <- lapply(polygons,sf::st_polygon) +N <- floor(runif(n_polygons, min = 1, max = 100)) +response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) + +spdf <- sf::st_sf(response_df, geometry = polys) + +# Create raster stack +r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x){ +rnorm(1, ifelse(x \%\% n_pixels_per_side != 0, x \%\% n_pixels_per_side, n_pixels_per_side), 3))} +r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- c(r, r2) +names(cov_stack) <- c('layer1', 'layer2') + +test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_stack) + + result <- disag_model(test_data, iterations = 2) + } + +} +\references{ +Nanda et al. (2023) disaggregation: An R Package for Bayesian +Spatial Disaggregation Modeling. +} diff --git a/man/disaggregation-deprecated.Rd b/man/disaggregation-deprecated.Rd deleted file mode 100644 index ad9c87d..0000000 --- a/man/disaggregation-deprecated.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/deprecated.R -\name{disaggregation-deprecated} -\alias{disaggregation-deprecated} -\title{Deprecated functions in disaggregation} -\description{ -These functions still work but will be removed (defunct) in the next version. -} -\details{ -\itemize{ - \item \code{\link{fit_model}}: This function is deprecated, and will - be removed in the next version of this package. -} -} diff --git a/man/plot.disag_model.Rd b/man/plot.disag_model.Rd index 421e14c..e97259d 100644 --- a/man/plot.disag_model.Rd +++ b/man/plot.disag_model.Rd @@ -4,11 +4,14 @@ \alias{plot.disag_model} \title{Plot results of fitted model} \usage{ -\method{plot}{disag_model}(x, ...) +\method{plot}{disag_model}(x, include_iid = FALSE, ...) } \arguments{ \item{x}{Object of class \emph{disag_model} to be plotted.} +\item{include_iid}{logical. Whether or not to include predictions that include the +IID effect.} + \item{...}{Further arguments to \emph{plot} function.} } \value{ diff --git a/man/plot_disag_model_data.Rd b/man/plot_disag_model_data.Rd new file mode 100644 index 0000000..9113c3f --- /dev/null +++ b/man/plot_disag_model_data.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting.R +\name{plot_disag_model_data} +\alias{plot_disag_model_data} +\title{Convert results of the model ready for plotting} +\usage{ +plot_disag_model_data(x) +} +\arguments{ +\item{x}{Object of class \emph{disag_model} to be plotted.} +} +\value{ +A list that contains: + \item{posteriors}{A data.frame of posteriors} + \item{data}{A data.frame of observed and predicted data} + \item{title}{The title of the observed vs. predicted plot} +} +\description{ +Convert results of the model ready for plotting +} diff --git a/man/predict.disag_model.Rd b/man/predict.disag_model.Rd index f85ed7a..aac2536 100644 --- a/man/predict.disag_model.Rd +++ b/man/predict.disag_model.Rd @@ -4,19 +4,29 @@ \alias{predict.disag_model} \title{Predict mean and uncertainty from the disaggregation model result} \usage{ -\method{predict}{disag_model}(object, newdata = NULL, predict_iid = FALSE, N = 100, CI = 0.95, ...) +\method{predict}{disag_model}( + object, + new_data = NULL, + predict_iid = FALSE, + N = 100, + CI = 0.95, + newdata = NULL, + ... +) } \arguments{ \item{object}{disag_model object returned by disag_model function.} -\item{newdata}{If NULL, predictions are made using the data in model_output. +\item{new_data}{If NULL, predictions are made using the data in model_output. If this is a raster stack or brick, predictions will be made over this data.} \item{predict_iid}{logical. If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE.} \item{N}{Number of realisations. Default: 100.} -\item{CI}{Confidence interval to be calculated from the realisations. Default: 0.95.} +\item{CI}{Credible interval to be calculated from the realisations. Default: 0.95.} + +\item{newdata}{Deprecated.} \item{...}{Further arguments passed to or from other methods.} } @@ -41,12 +51,12 @@ predicts mean and uncertainty maps. } \details{ To predict over a different spatial extent to that used in the model, -a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. +a SpatRaster covering the region to make predictions over is passed to the argument \emph{new_data}. If this is not given predictions are made over the data used in the fit. The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. -For the uncertainty calculations, the number of the realisations and the size of the confidence interval to be calculated +For the uncertainty calculations, the number of the realisations and the size of the credible interval to be calculated are given by the arguments \emph{N} and \emph{CI} respectively. } \examples{ diff --git a/man/predict_model.Rd b/man/predict_model.Rd index 3e0387a..d85d1c8 100644 --- a/man/predict_model.Rd +++ b/man/predict_model.Rd @@ -4,15 +4,22 @@ \alias{predict_model} \title{Function to predict mean from the model result} \usage{ -predict_model(model_output, newdata = NULL, predict_iid = FALSE) +predict_model( + model_output, + new_data = NULL, + predict_iid = FALSE, + newdata = NULL +) } \arguments{ \item{model_output}{disag_model object returned by disag_model function} -\item{newdata}{If NULL, predictions are made using the data in model_output. -If this is a raster stack or brick, predictions will be made over this data. Default NULL.} +\item{new_data}{If NULL, predictions are made using the data in model_output. +If this is a SpatRaster, predictions will be made over this data. Default NULL.} \item{predict_iid}{If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE.} + +\item{newdata}{Deprecated.} } \value{ The mean prediction, which is a list of: @@ -32,7 +39,7 @@ Function returns rasters of the mean predictions as well as the covariate and f to the linear predictor. To predict over a different spatial extent to that used in the model, -a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. +a SpatRaster covering the region to make predictions over is passed to the argument \emph{new_data}. If this is not given predictions are made over the data used in the fit. The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. diff --git a/man/predict_uncertainty.Rd b/man/predict_uncertainty.Rd index ccf361d..59373a2 100644 --- a/man/predict_uncertainty.Rd +++ b/man/predict_uncertainty.Rd @@ -6,23 +6,26 @@ \usage{ predict_uncertainty( model_output, - newdata = NULL, + new_data = NULL, predict_iid = FALSE, N = 100, - CI = 0.95 + CI = 0.95, + newdata = NULL ) } \arguments{ \item{model_output}{disag_model object returned by disag_model function.} -\item{newdata}{If NULL, predictions are made using the data in model_output. +\item{new_data}{If NULL, predictions are made using the data in model_output. If this is a raster stack or brick, predictions will be made over this data. Default NULL.} \item{predict_iid}{If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE.} \item{N}{number of realisations. Default: 100.} -\item{CI}{confidence interval. Default: 0.95.} +\item{CI}{credible interval. Default: 0.95.} + +\item{newdata}{Deprecated.} } \value{ The uncertainty prediction, which is a list of: @@ -39,12 +42,12 @@ The uncertainty prediction, which is a list of: Function returns a SpatRaster of the realisations as well as the upper and lower credible interval rasters. To predict over a different spatial extent to that used in the model, -a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. +a SpatRaster covering the region to make predictions over is passed to the argument \emph{new_data}. If this is not given predictions are made over the data used in the fit. The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. -The number of the realisations and the size of the confidence interval to be calculated. +The number of the realisations and the size of the credible interval to be calculated. are given by the arguments \emph{N} and \emph{CI} respectively. } \examples{ diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd index 5b5d5ab..efe0e50 100644 --- a/man/prepare_data.Rd +++ b/man/prepare_data.Rd @@ -11,9 +11,12 @@ prepare_data( id_var = "area_id", response_var = "response", sample_size_var = NULL, + mesh_args = NULL, + na_action = FALSE, + make_mesh = TRUE, mesh.args = NULL, - na.action = FALSE, - makeMesh = TRUE, + na.action = NULL, + makeMesh = NULL, ncores = NULL ) } @@ -30,11 +33,17 @@ prepare_data( \item{sample_size_var}{For survey data, name of column in sf object (if it exists) with the sample size data.} -\item{mesh.args}{list of parameters that control the mesh structure with the same names as used by INLA.} +\item{mesh_args}{list of parameters that control the mesh structure with the same names as used by INLA.} -\item{na.action}{logical. If TRUE, NAs in response will be removed, covariate NAs will be given the median value, aggregation NAs will be set to zero. Default FALSE (NAs in response or covariate data within the polygons will give errors).} +\item{na_action}{logical. If TRUE, NAs in response will be removed, covariate NAs will be given the median value, aggregation NAs will be set to zero. Default FALSE (NAs in response or covariate data within the polygons will give errors).} -\item{makeMesh}{logical. If TRUE, build INLA mesh, takes some time. Default TRUE.} +\item{make_mesh}{logical. If TRUE, build INLA mesh, takes some time. Default TRUE.} + +\item{mesh.args}{Deprecated.} + +\item{na.action}{Deprecated.} + +\item{makeMesh}{Deprecated.} \item{ncores}{Deprecated.} } @@ -47,14 +56,14 @@ The list of class \code{disag_data} contains: \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} - \item{coordsForFit }{A matrix with two columns of x, y coordinates of pixels within the polygons. Used to make the spatial field.} - \item{coordsForPrediction }{A matrix with two columns of x, y coordinates of pixels in the whole Raster. Used to make predictions.} - \item{startendindex }{A matrix with two columns containing the start and end index of the pixels within each polygon.} + \item{coords_for_fit }{A matrix with two columns of x, y coordinates of pixels within the polygons. Used to make the spatial field.} + \item{coords_for_prediction }{A matrix with two columns of x, y coordinates of pixels in the whole Raster. Used to make predictions.} + \item{start_end_index }{A matrix with two columns containing the start and end index of the pixels within each polygon.} \item{mesh }{A INLA mesh to be used for the spatial field of the disaggregation model.} } \description{ \emph{prepare_data} function is used to extract all the data required for fitting a disaggregation model. -Designed to be used in the \emph{disaggregation::fit_model} function. +Designed to be used in the \emph{disaggregation::disag_model} function. } \details{ Takes a sf object with the response data and a SpatRaster of covariates. diff --git a/src/disaggregation.cpp b/src/disaggregation.cpp index d476bbd..332fb20 100644 --- a/src/disaggregation.cpp +++ b/src/disaggregation.cpp @@ -1,262 +1,262 @@ -// -// Author: Anita Nandi -// Date: 2019-02-14 - -// Data: Spatial field mesh and matrices, polygon data, covariate pixel data - - -#define TMB_LIB_INIT R_init_disaggregation -#include - -template -Type objective_function::operator()() -{ - - using namespace R_inla; - using namespace density; - using namespace Eigen; - - // ------------------------------------------------------------------------ // - // Spatial field data - // ------------------------------------------------------------------------ // - - // The A matrices are for projecting the mesh to a point for the pixel and point data respectively. - DATA_SPARSE_MATRIX(Apixel); - DATA_STRUCT(spde, spde_t); - - // ------------------------------------------------------------------------ // - // Polygon level data - // ------------------------------------------------------------------------ // - - // Covariate pixel data - DATA_MATRIX(x); - - // two col matrix with start end indices for each shape case. - DATA_IARRAY(startendindex); - - // Shape data. Cases and region id. - DATA_VECTOR(polygon_response_data); - DATA_VECTOR(response_sample_size); - - // Use to aggreagte pixel response values to polygon level - DATA_VECTOR(aggregation_values); - - // ------------------------------------------------------------------------ // - // Likelihood and link functions - // ------------------------------------------------------------------------ // - - DATA_INTEGER(family); - DATA_INTEGER(link); - - // ------------------------------------------------------------------------ // - // Parameters - // ------------------------------------------------------------------------ // - - PARAMETER(intercept); - PARAMETER_VECTOR(slope); - - DATA_SCALAR(priormean_intercept); - DATA_SCALAR(priorsd_intercept); - DATA_SCALAR(priormean_slope); - DATA_SCALAR(priorsd_slope); - - // Priors for likelihood - PARAMETER(log_tau_gaussian); - Type tau_gaussian = exp(log_tau_gaussian); - Type gaussian_sd = 1 / sqrt(tau_gaussian); - - // INLA defines a loggamma prior on log tau. - // We evaluate a gamma prior on tau, but the parameters are - // therefore the same. - Type prior_gamma_shape = 1; - Type prior_gamma_rate = 5e-05; - - PARAMETER_VECTOR(iideffect); - PARAMETER(iideffect_log_tau); - Type iideffect_tau = exp(iideffect_log_tau); - Type iideffect_sd = 1 / sqrt(iideffect_tau); - - Type iideffect_mean = 0.0; - - // Priors on iid random effect for polygons - DATA_SCALAR(prior_iideffect_sd_max); - DATA_SCALAR(prior_iideffect_sd_prob); - - // spde hyperparameters - PARAMETER(log_sigma); - PARAMETER(log_rho); - Type sigma = exp(log_sigma); - Type rho = exp(log_rho); - - // Priors on spde hyperparameters - DATA_SCALAR(prior_rho_min); - DATA_SCALAR(prior_rho_prob); - DATA_SCALAR(prior_sigma_max); - DATA_SCALAR(prior_sigma_prob); - - // Convert hyperparameters to natural scale - DATA_SCALAR(nu); - Type kappa = sqrt(8.0) / rho; - - // Random effect parameters - PARAMETER_VECTOR(nodemean); - - // Model component flags - DATA_INTEGER(field); - DATA_INTEGER(iid); - - // Number of polygons - int n_polygons = polygon_response_data.size(); - // Number of pixels - int n_pixels = x.rows(); - - Type nll = 0.0; - - // ------------------------------------------------------------------------ // - // Likelihood from priors - // ------------------------------------------------------------------------ // - - nll -= dnorm(intercept, priormean_intercept, priorsd_intercept, true); - for (int s = 0; s < slope.size(); s++) { - nll -= dnorm(slope[s], priormean_slope, priorsd_slope, true); - } - - if(iid) { - // Likelihood of hyperparameter of polygon iid random effect. - // From https://projecteuclid.org/euclid.ss/1491465621 (Eqn 3.3) - Type lambda = -log(prior_iideffect_sd_prob) / prior_iideffect_sd_max; - Type log_pcdensity_iid = log(lambda / 2) - (3/2)*iideffect_log_tau - lambda * pow(iideffect_tau, -1/2); - // log(iideffect_sd) from the Jacobian - nll -= log_pcdensity_iid + iideffect_log_tau; - - // Likelihood of random effect for polygons - for(int p = 0; p < iideffect.size(); p++) { - nll -= dnorm(iideffect[p], iideffect_mean, iideffect_sd, true); - } - } - - // Likelihood from the gaussian prior. - // log(prec) ~ loggamma - // prec ~ gamma - if(family == 0) { - nll -= dgamma(tau_gaussian, prior_gamma_shape, prior_gamma_rate, true); - } - - if(field) { - // Likelihood of hyperparameters for field. - // From https://www.tandfonline.com/doi/full/10.1080/01621459.2017.1415907 (Theorem 2.6) - Type lambdatilde1 = -log(prior_rho_prob) * prior_rho_min; - Type lambdatilde2 = -log(prior_sigma_prob) / prior_sigma_max; - Type log_pcdensity = log(lambdatilde1) + log(lambdatilde2) - 2*log_rho - lambdatilde1 * pow(rho, -1) - lambdatilde2 * sigma; - // log_rho and log_sigma from the Jacobian - nll -= log_pcdensity + log_rho + log_sigma; - - // Build spde matrix - SparseMatrix Q = Q_spde(spde, kappa); - - // From Lindgren (2011) https://doi.org/10.1111/j.1467-9868.2011.00777.x, see equation for the marginal variance - Type scaling_factor = sqrt(exp(lgamma(nu)) / (exp(lgamma(nu + 1)) * 4 * M_PI * pow(kappa, 2*nu))); - - // Likelihood of the random field. - nll += SCALE(GMRF(Q), sigma / scaling_factor)(nodemean); - } - - Type nll_priors = nll; - - // ------------------------------------------------------------------------ // - // Likelihood from data - // ------------------------------------------------------------------------ // - - vector pixel_linear_pred(n_pixels); - pixel_linear_pred = intercept + x * slope; - - if(field) { - // Calculate field for pixel data - vector linear_pred_field(n_pixels); - linear_pred_field = Apixel * nodemean; - pixel_linear_pred += linear_pred_field.array(); - } - - // recalculate startendindices to be in the form start, n - startendindex.col(1) = startendindex.col(1) - startendindex.col(0) + 1; - - Type polygon_response; - Type normalised_polygon_response; - Type normalisation_total; - Type pred_polygoncases; - Type pred_polygonrate; - Type polygon_sd; - vector pixel_pred; - vector numerator_pixels; - vector normalisation_pixels; - vector reportnormalisation(n_polygons); - vector reportprediction_cases(n_polygons); - vector reportprediction_rate(n_polygons); - vector reportnll(n_polygons); - vector reportpolygonsd(n_polygons); - - // For each shape get pixel predictions within and aggregate to polygon level - for (int polygon = 0; polygon < n_polygons; polygon++) { - - // Get pixel level predictions (rate) - pixel_pred = pixel_linear_pred.segment(startendindex(polygon, 0), startendindex(polygon, 1)).array(); - if(iid) { - pixel_pred += iideffect[polygon]; - } - // Use correct link function - if(link == 0) { - pixel_pred = invlogit(pixel_pred); - } else if(link == 1) { - pixel_pred = exp(pixel_pred); - } else if(link == 2){ - // Don't need to do anything, i.e. pixel_pred = pixel_pred; - } else { - error("Link function not implemented."); - } - - // Aggregate to polygon prediction - numerator_pixels = pixel_pred * aggregation_values.segment(startendindex(polygon, 0), startendindex(polygon, 1)).array(); - normalisation_pixels = aggregation_values.segment(startendindex(polygon, 0), startendindex(polygon, 1)); - normalisation_total = sum(normalisation_pixels); - pred_polygoncases = sum(numerator_pixels); - pred_polygonrate = pred_polygoncases/normalisation_total; - - reportnormalisation[polygon] = normalisation_total; - reportprediction_cases[polygon] = pred_polygoncases; - reportprediction_rate[polygon] = pred_polygonrate; - - // Use correct likelihood function - if(family == 0) { - // Scale the pixel sd to polygon level - polygon_sd = gaussian_sd * sqrt((normalisation_pixels * normalisation_pixels).sum()) / normalisation_total; - reportpolygonsd[polygon] = polygon_sd; - // Calculate normal likelihood in rate space - polygon_response = polygon_response_data(polygon); - normalised_polygon_response = polygon_response/normalisation_total; - nll -= dnorm(normalised_polygon_response, pred_polygonrate, polygon_sd, true); - reportnll[polygon] = -dnorm(normalised_polygon_response, pred_polygonrate, polygon_sd, true); - } else if(family == 1) { - nll -= dbinom(polygon_response_data[polygon], response_sample_size[polygon], pred_polygonrate, true); - reportnll[polygon] = -dbinom(polygon_response_data[polygon], response_sample_size[polygon], pred_polygonrate, true); - } else if(family == 2) { - nll -= dpois(polygon_response_data[polygon], pred_polygoncases, true); - reportnll[polygon] = -dpois(polygon_response_data[polygon], pred_polygoncases, true); - } else { - error("Likelihood not implemented."); - } - - } - - REPORT(reportprediction_cases); - REPORT(reportprediction_rate); - REPORT(reportnormalisation); - REPORT(reportnll); - REPORT(polygon_response_data); - REPORT(nll_priors); - REPORT(nll); - if(family == 0) { - REPORT(reportpolygonsd); - } - - return nll; -} +// +// Author: Anita Nandi +// Date: 2019-02-14 + +// Data: Spatial field mesh and matrices, polygon data, covariate pixel data + + +#define TMB_LIB_INIT R_init_disaggregation +#include + +template +Type objective_function::operator()() +{ + + using namespace R_inla; + using namespace density; + using namespace Eigen; + + // ------------------------------------------------------------------------ // + // Spatial field data + // ------------------------------------------------------------------------ // + + // The A matrices are for projecting the mesh to a point for the pixel and point data respectively. + DATA_SPARSE_MATRIX(Apixel); + DATA_STRUCT(spde, spde_t); + + // ------------------------------------------------------------------------ // + // Polygon level data + // ------------------------------------------------------------------------ // + + // Covariate pixel data + DATA_MATRIX(x); + + // two col matrix with start end indices for each shape case. + DATA_IARRAY(start_end_index); + + // Shape data. Cases and region id. + DATA_VECTOR(polygon_response_data); + DATA_VECTOR(response_sample_size); + + // Use to aggreagte pixel response values to polygon level + DATA_VECTOR(aggregation_values); + + // ------------------------------------------------------------------------ // + // Likelihood and link functions + // ------------------------------------------------------------------------ // + + DATA_INTEGER(family); + DATA_INTEGER(link); + + // ------------------------------------------------------------------------ // + // Parameters + // ------------------------------------------------------------------------ // + + PARAMETER(intercept); + PARAMETER_VECTOR(slope); + + DATA_SCALAR(priormean_intercept); + DATA_SCALAR(priorsd_intercept); + DATA_SCALAR(priormean_slope); + DATA_SCALAR(priorsd_slope); + + // Priors for likelihood + PARAMETER(log_tau_gaussian); + Type tau_gaussian = exp(log_tau_gaussian); + Type gaussian_sd = 1 / sqrt(tau_gaussian); + + // INLA defines a loggamma prior on log tau. + // We evaluate a gamma prior on tau, but the parameters are + // therefore the same. + Type prior_gamma_shape = 1; + Type prior_gamma_rate = 5e-05; + + PARAMETER_VECTOR(iideffect); + PARAMETER(iideffect_log_tau); + Type iideffect_tau = exp(iideffect_log_tau); + Type iideffect_sd = 1 / sqrt(iideffect_tau); + + Type iideffect_mean = 0.0; + + // Priors on iid random effect for polygons + DATA_SCALAR(prior_iideffect_sd_max); + DATA_SCALAR(prior_iideffect_sd_prob); + + // spde hyperparameters + PARAMETER(log_sigma); + PARAMETER(log_rho); + Type sigma = exp(log_sigma); + Type rho = exp(log_rho); + + // Priors on spde hyperparameters + DATA_SCALAR(prior_rho_min); + DATA_SCALAR(prior_rho_prob); + DATA_SCALAR(prior_sigma_max); + DATA_SCALAR(prior_sigma_prob); + + // Convert hyperparameters to natural scale + DATA_SCALAR(nu); + Type kappa = sqrt(8.0) / rho; + + // Random effect parameters + PARAMETER_VECTOR(nodemean); + + // Model component flags + DATA_INTEGER(field); + DATA_INTEGER(iid); + + // Number of polygons + int n_polygons = polygon_response_data.size(); + // Number of pixels + int n_pixels = x.rows(); + + Type nll = 0.0; + + // ------------------------------------------------------------------------ // + // Likelihood from priors + // ------------------------------------------------------------------------ // + + nll -= dnorm(intercept, priormean_intercept, priorsd_intercept, true); + for (int s = 0; s < slope.size(); s++) { + nll -= dnorm(slope[s], priormean_slope, priorsd_slope, true); + } + + if(iid) { + // Likelihood of hyperparameter of polygon iid random effect. + // From https://projecteuclid.org/euclid.ss/1491465621 (Eqn 3.3) + Type lambda = -log(prior_iideffect_sd_prob) / prior_iideffect_sd_max; + Type log_pcdensity_iid = log(lambda / 2) - (3/2)*iideffect_log_tau - lambda * pow(iideffect_tau, -1/2); + // log(iideffect_sd) from the Jacobian + nll -= log_pcdensity_iid + iideffect_log_tau; + + // Likelihood of random effect for polygons + for(int p = 0; p < iideffect.size(); p++) { + nll -= dnorm(iideffect[p], iideffect_mean, iideffect_sd, true); + } + } + + // Likelihood from the gaussian prior. + // log(prec) ~ loggamma + // prec ~ gamma + if(family == 0) { + nll -= dgamma(tau_gaussian, prior_gamma_shape, prior_gamma_rate, true); + } + + if(field) { + // Likelihood of hyperparameters for field. + // From https://www.tandfonline.com/doi/full/10.1080/01621459.2017.1415907 (Theorem 2.6) + Type lambdatilde1 = -log(prior_rho_prob) * prior_rho_min; + Type lambdatilde2 = -log(prior_sigma_prob) / prior_sigma_max; + Type log_pcdensity = log(lambdatilde1) + log(lambdatilde2) - 2*log_rho - lambdatilde1 * pow(rho, -1) - lambdatilde2 * sigma; + // log_rho and log_sigma from the Jacobian + nll -= log_pcdensity + log_rho + log_sigma; + + // Build spde matrix + SparseMatrix Q = Q_spde(spde, kappa); + + // From Lindgren (2011) https://doi.org/10.1111/j.1467-9868.2011.00777.x, see equation for the marginal variance + Type scaling_factor = sqrt(exp(lgamma(nu)) / (exp(lgamma(nu + 1)) * 4 * M_PI * pow(kappa, 2*nu))); + + // Likelihood of the random field. + nll += SCALE(GMRF(Q), sigma / scaling_factor)(nodemean); + } + + Type nll_priors = nll; + + // ------------------------------------------------------------------------ // + // Likelihood from data + // ------------------------------------------------------------------------ // + + vector pixel_linear_pred(n_pixels); + pixel_linear_pred = intercept + x * slope; + + if(field) { + // Calculate field for pixel data + vector linear_pred_field(n_pixels); + linear_pred_field = Apixel * nodemean; + pixel_linear_pred += linear_pred_field.array(); + } + + // recalculate startendindices to be in the form start, n + start_end_index.col(1) = start_end_index.col(1) - start_end_index.col(0) + 1; + + Type polygon_response; + Type normalised_polygon_response; + Type normalisation_total; + Type pred_polygoncases; + Type pred_polygonrate; + Type polygon_sd; + vector pixel_pred; + vector numerator_pixels; + vector normalisation_pixels; + vector reportnormalisation(n_polygons); + vector reportprediction_cases(n_polygons); + vector reportprediction_rate(n_polygons); + vector reportnll(n_polygons); + vector reportpolygonsd(n_polygons); + + // For each shape get pixel predictions within and aggregate to polygon level + for (int polygon = 0; polygon < n_polygons; polygon++) { + + // Get pixel level predictions (rate) + pixel_pred = pixel_linear_pred.segment(start_end_index(polygon, 0), start_end_index(polygon, 1)).array(); + if(iid) { + pixel_pred += iideffect[polygon]; + } + // Use correct link function + if(link == 0) { + pixel_pred = invlogit(pixel_pred); + } else if(link == 1) { + pixel_pred = exp(pixel_pred); + } else if(link == 2){ + // Don't need to do anything, i.e. pixel_pred = pixel_pred; + } else { + Rf_error("Link function not implemented."); + } + + // Aggregate to polygon prediction + numerator_pixels = pixel_pred * aggregation_values.segment(start_end_index(polygon, 0), start_end_index(polygon, 1)).array(); + normalisation_pixels = aggregation_values.segment(start_end_index(polygon, 0), start_end_index(polygon, 1)); + normalisation_total = sum(normalisation_pixels); + pred_polygoncases = sum(numerator_pixels); + pred_polygonrate = pred_polygoncases/normalisation_total; + + reportnormalisation[polygon] = normalisation_total; + reportprediction_cases[polygon] = pred_polygoncases; + reportprediction_rate[polygon] = pred_polygonrate; + + // Use correct likelihood function + if(family == 0) { + // Scale the pixel sd to polygon level + polygon_sd = gaussian_sd * sqrt((normalisation_pixels * normalisation_pixels).sum()) / normalisation_total; + reportpolygonsd[polygon] = polygon_sd; + // Calculate normal likelihood in rate space + polygon_response = polygon_response_data(polygon); + normalised_polygon_response = polygon_response/normalisation_total; + nll -= dnorm(normalised_polygon_response, pred_polygonrate, polygon_sd, true); + reportnll[polygon] = -dnorm(normalised_polygon_response, pred_polygonrate, polygon_sd, true); + } else if(family == 1) { + nll -= dbinom(polygon_response_data[polygon], response_sample_size[polygon], pred_polygonrate, true); + reportnll[polygon] = -dbinom(polygon_response_data[polygon], response_sample_size[polygon], pred_polygonrate, true); + } else if(family == 2) { + nll -= dpois(polygon_response_data[polygon], pred_polygoncases, true); + reportnll[polygon] = -dpois(polygon_response_data[polygon], pred_polygoncases, true); + } else { + Rf_error("Likelihood not implemented."); + } + + } + + REPORT(reportprediction_cases); + REPORT(reportprediction_rate); + REPORT(reportnormalisation); + REPORT(reportnll); + REPORT(polygon_response_data); + REPORT(nll_priors); + REPORT(nll); + if(family == 0) { + REPORT(reportpolygonsd); + } + + return nll; +} diff --git a/tests/testthat/helper_data.R b/tests/testthat/helper_data.R index 89deef8..8ef4831 100644 --- a/tests/testthat/helper_data.R +++ b/tests/testthat/helper_data.R @@ -35,3 +35,10 @@ names(cov_stack) <- c('layer1', 'layer2') test_data <- prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack) + +result <- disag_model(test_data, + field = TRUE, + iid = TRUE, + iterations = 100, + family = "poisson", + link = "log") diff --git a/tests/testthat/test-build-mesh.R b/tests/testthat/test-build-mesh.R index 8bd19a2..a66b9d5 100644 --- a/tests/testthat/test-build-mesh.R +++ b/tests/testthat/test-build-mesh.R @@ -3,13 +3,12 @@ context("Build mesh") test_that("build_mesh behaves as expected", { - skip_on_cran() - my_mesh <- build_mesh(spdf) expect_error(build_mesh(response_df)) - expect_error(build_mesh(spdf, mesh.args = c(4, 8))) + expect_error(build_mesh(spdf, mesh_args = c(4, 8))) + expect_message(build_mesh(spdf, mesh_args = list(cut = 0.1))) expect_is(my_mesh, 'inla.mesh') - expect_is(build_mesh(spdf, mesh.args = list(max.edge = c(50, 100))), 'inla.mesh') + expect_is(build_mesh(spdf, mesh_args = list(max.edge = c(50, 100))), 'inla.mesh') }) diff --git a/tests/testthat/test-extract.R b/tests/testthat/test-extract.R index 84ba7b4..582bda6 100644 --- a/tests/testthat/test-extract.R +++ b/tests/testthat/test-extract.R @@ -3,8 +3,6 @@ context("Extract covariates and polygon data") test_that("getPolygonData function", { - skip_on_cran() - expect_error(getPolygonData(spdf, id_var = 'id', response_var = 'response')) expect_error(getPolygonData(spdf, id_var = 'area_id', response_var = 'data')) @@ -29,8 +27,6 @@ test_that("getPolygonData function", { test_that("getCovariateData function gives errors when it should", { - skip_on_cran() - expect_error(getCovariateRasters('/home/rasters', '.tif$', spdf)) # Save .tif files in tempdir() @@ -43,8 +39,6 @@ test_that("getCovariateData function gives errors when it should", { test_that("extractCoordsForMesh function behaves as it should", { - skip_on_cran() - cov_data <- terra::extract(cov_stack, spdf, cells = TRUE, na.rm = TRUE, ID = TRUE) names(cov_data)[1] <- 'area_id' diff --git a/tests/testthat/test-fit-model.R b/tests/testthat/test-fit-model.R index a3a7774..e5b6053 100644 --- a/tests/testthat/test-fit-model.R +++ b/tests/testthat/test-fit-model.R @@ -3,12 +3,9 @@ context("Fitting model") test_that("disag_model produces errors when expected", { - skip_if_not_installed('INLA') - skip_on_cran() - expect_error(disag_model(list())) expect_error(disag_model(test_data, iterations = 'iterations')) - expect_error(disag_model(test_data, priors = list(polygon_sd_men = 0.3, polygon_sd_sd = 0.4))) + expect_error(disag_model(test_data, iid = FALSE, priors = list(polygon_sd_men = 0.3, polygon_sd_sd = 0.4))) expect_error(disag_model(test_data, priors = c(polygon_sd_mean = 1.2))) expect_error(disag_model(test_data, family = 'banana')) expect_error(disag_model(test_data, link = 'apple')) @@ -17,45 +14,41 @@ test_that("disag_model produces errors when expected", { test_that("disag_model behaves as expected", { - skip_if_not_installed('INLA') - skip_on_cran() - - result <- disag_model(test_data, iterations = 100, iid = FALSE) + result <- disag_model(test_data, iterations = 100, family = 'poisson', link = 'log') expect_is(result, 'disag_model') expect_equal(length(result), 5) expect_equal(length(result$sd_out$par.fixed), terra::nlyr(test_data$covariate_rasters) + 4) - expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) + expect_equal(unique(names(result$sd_out$par.random)), c("iideffect", "nodemean")) + expect_true(all(c("layer1", "layer2") %in% names(result$sd_out$par.fixed))) + expect_false(any(names(result$sd_out$par.fixed) == "slope")) + expect_true(all(c("layer1", "layer2") %in% names(result$opt$par))) + expect_false(any(names(result$opt$par) == "slope")) }) test_that("disag_model with 1 covariate behaves as expected", { - skip_if_not_installed('INLA') - skip_on_cran() - test_data2 <- test_data test_data2$covariate_rasters <- test_data2$covariate_rasters[[1]] test_data2$covariate_data <- test_data2$covariate_data[, 1:3] - result <- disag_model(test_data2, iterations = 100, iid = FALSE) + result <- disag_model(test_data2, iterations = 100, iid = FALSE, family = 'poisson', link = 'log') expect_is(result, 'disag_model') expect_equal(length(result), 5) # Should be intercept, 1 slope, tau gaussian, and 2 for space. None for iid anymore. - expect_equal(length(result$sd_out$par.fixed), terra::nlyr(test_data$covariate_rasters) + 3) + expect_equal(length(result$sd_out$par.fixed), terra::nlyr(test_data2$covariate_rasters) + 3) expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) - # Confirm only two covariates were fitted. - expect_equal(sum(names(result$opt$par) == 'slope'), 1) + # Confirm only one covariate was fitted. + expect_equal(sum(names(result$opt$par) == "layer1"), 1) + expect_false(any(names(result$opt$par) == "layer2")) }) test_that("user defined model setup is working as expected", { - skip_if_not_installed('INLA') - skip_on_cran() - binom_data <- prepare_data(polygon_shapefile = spdf_binom, covariate_rasters = cov_stack, sample_size_var = 'sample_size') @@ -96,10 +89,7 @@ test_that("user defined model setup is working as expected", { test_that("make_model_object behaves as expected", { - skip_if_not_installed('INLA') - skip_on_cran() - - result <- make_model_object(test_data) + result <- make_model_object(test_data, family = 'poisson', link = 'log') expect_is(result, 'list') expect_equal(sum(sapply(c("par", "fn", "gr", "report"), function(x) !(x %in% names(result)))), 0) @@ -108,14 +98,11 @@ test_that("make_model_object behaves as expected", { test_that("setup_hess_control behaves as expected", { - skip_if_not_installed('INLA') - skip_on_cran() - - obj <- make_model_object(test_data) + obj <- make_model_object(test_data, family = 'poisson', link = 'log') opt <- stats::nlminb(obj$par, obj$fn, obj$gr, control = list(iter.max = 2, trace = 0)) - hess_control <- setup_hess_control(opt, hess_control_parscale = c(rep(c(0.9, 1.1), 3), 1), hess_control_ndeps = 1e-3) + hess_control <- setup_hess_control(opt, hess_control_parscale = rep(c(0.9, 1.1), 3), hess_control_ndeps = 1e-3) expect_is(hess_control, 'list') expect_equal(length(hess_control$parscale), length(opt$par)) diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R index ab0e0e6..78b554e 100644 --- a/tests/testthat/test-plotting.R +++ b/tests/testthat/test-plotting.R @@ -2,8 +2,6 @@ context("Plotting data") test_that("Check plot_polygon_data function works as expected", { - skip_on_cran() - p <- plot_polygon_data(spdf, list(id_var = 'area_id', response_var = 'response')) expect_error(plot_polygon_data(polys, list(id_var = 'area_id', response_var = 'response'))) expect_is(p, 'ggplot') @@ -15,9 +13,6 @@ test_that("Check plot_polygon_data function works as expected", { test_that("Check plot.disag.data function works as expected", { - skip_if_not_installed('INLA') - skip_on_cran() - test_data2 <- prepare_data(polygon_shapefile = spdf2, covariate_rasters = cov_stack, response_var = 'n_positive') @@ -44,47 +39,28 @@ test_that("Check plot.disag.data function works as expected", { test_that("Check plot.disag_model function works as expected", { - skip_if_not_installed('INLA') - skip_on_cran() - - fit_result <- disag_model(test_data, iterations = 10) - - fit_result_nofield <- disag_model(test_data, iterations = 10, field = FALSE) + fit_result_nofield <- disag_model(test_data, iterations = 100, field = FALSE, family = "poisson", link = "log") - p1 <- plot(fit_result) + p1 <- plot(result) p2 <- plot(fit_result_nofield) + p3 <- plot(result, include_iid = TRUE) + expect_is(p1, 'list') expect_equal(length(p1), 2) expect_is(p2, 'list') expect_equal(length(p2), 2) + expect_is(p3, 'list') + expect_equal(length(p3), 2) }) test_that("Check plot.disag_prediction function works as expected", { - skip_if_not_installed('INLA') - skip_on_cran() - - fit_result <- disag_model(test_data, iterations = 1000, - iid = TRUE, - field = TRUE, - family = 'poisson', - link = 'log', - priors = list(priormean_intercept = 0, - priorsd_intercept = 0.1, - priormean_slope = 0.0, - priorsd_slope = 0.1, - prior_rho_min = 5, - prior_rho_prob = 0.01, - prior_sigma_max = 0.1, - prior_sigma_prob = 0.01, - prior_iideffect_sd_max = 0.0001, - prior_iideffect_sd_prob = 0.01)) - pred <- predict(fit_result) + pred <- predict(result) p <- plot(pred) expect_is(p, 'gg') diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index d2cd848..e8b5c82 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -2,25 +2,6 @@ context("Predict model") test_that("Check predict.disag_model function works as expected", { - skip_if_not_installed('INLA') - skip_on_cran() - - result <- disag_model(test_data, iterations = 1000, - iid = TRUE, - field = TRUE, - family = 'poisson', - link = 'log', - priors = list(priormean_intercept = 0, - priorsd_intercept = 0.1, - priormean_slope = 0.0, - priorsd_slope = 0.1, - prior_rho_min = 5, - prior_rho_prob = 0.01, - prior_sigma_max = 0.1, - prior_sigma_prob = 0.01, - prior_iideffect_sd_max = 0.0001, - prior_iideffect_sd_prob = 0.01)) - pred2 <- predict(result) expect_is(pred2, 'disag_prediction') @@ -67,9 +48,9 @@ test_that("Check predict.disag_model function works as expected", { # For a model with no field or iid - result <- disag_model(test_data, iterations = 100, field = FALSE, iid = FALSE) + result2 <- disag_model(test_data, iterations = 100, field = FALSE, iid = FALSE) - pred2 <- predict(result) + pred2 <- predict(result2) expect_is(pred2, 'disag_prediction') expect_equal(length(pred2), 2) @@ -94,27 +75,12 @@ test_that("Check predict.disag_model function works as expected", { -test_that("Check predict.disag_model function works with newdata", { - - skip_if_not_installed('INLA') - skip_on_cran() +test_that("Check predict.disag_model function works with new data", { - result <- disag_model(test_data, field = FALSE, iid = TRUE, iterations = 100, - priors = list(priormean_intercept = 0, - priorsd_intercept = 1, - priormean_slope = 0.0, - priorsd_slope = 0.4, - prior_rho_min = 1, - prior_rho_prob = 0.01, - prior_sigma_max = 0.1, - prior_sigma_prob = 0.01, - prior_iideffect_sd_max = 0.0001, - prior_iideffect_sd_prob = 0.01)) - - newdata <- terra::crop(c(r, r2), c(0, 10, 0, 10)) - names(newdata) <- c('layer1', 'layer2') + new_data <- terra::crop(c(r, r2), c(0, 10, 0, 10)) + names(new_data) <- c('layer1', 'layer2') pred1 <- predict(result) - pred2 <- predict(result, newdata, predict_iid = TRUE, N = 5) + pred2 <- predict(result, new_data, predict_iid = TRUE, N = 5) expect_is(pred2, 'disag_prediction') expect_equal(length(pred2), 2) @@ -124,7 +90,7 @@ test_that("Check predict.disag_model function works with newdata", { expect_equal(length(pred2$mean_prediction), 4) expect_equal(names(pred2$mean_prediction), c('prediction', 'field', 'iid', 'covariates')) expect_is(pred2$mean_prediction$prediction, 'SpatRaster') - expect_true(is.null(pred2$mean_prediction$field)) + expect_true(!is.null(pred2$mean_prediction$field)) expect_is(pred2$mean_prediction$iid, 'SpatRaster') expect_is(pred2$mean_prediction$covariates, 'SpatRaster') @@ -141,52 +107,30 @@ test_that("Check predict.disag_model function works with newdata", { }) -test_that('Check that check_newdata works', { +test_that('Check that check_new_data works', { - skip_if_not_installed('INLA') - skip_on_cran() + new_data <- terra::crop(c(r, r2), c(0, 10, 0, 10)) + names(new_data) <- c('layer1', 'layer2') - result <- disag_model(test_data, field = FALSE, iterations = 100) - - newdata <- terra::crop(c(r, r2), c(0, 10, 0, 10)) - names(newdata) <- c('layer1', 'layer2') - - nd1 <- check_newdata(newdata, result) + nd1 <- check_new_data(new_data, result) expect_is(nd1, 'SpatRaster') - nn <- newdata[[1]] + nn <- new_data[[1]] names(nn) <- 'extra_unneeded' - newdata2 <- c(newdata, nn) - expect_error(check_newdata(newdata2, result), NA) + new_data2 <- c(new_data, nn) + expect_error(check_new_data(new_data2, result), NA) - newdata3 <- newdata[[1]] - expect_error(check_newdata(newdata3, result), 'All covariates') + new_data3 <- new_data[[1]] + expect_error(check_new_data(new_data3, result), 'All covariates') - newdata4 <- result$data$covariate_data - expect_error(check_newdata(newdata4, result), 'newdata should be NULL or') + new_data4 <- result$data$covariate_data + expect_error(check_new_data(new_data4, result), 'new_data should be NULL or') }) test_that('Check that setup_objects works', { - skip_if_not_installed('INLA') - skip_on_cran() - - result <- disag_model(test_data, iterations = 100, - iid = TRUE, - field = TRUE, - priors = list(priormean_intercept = 0, - priorsd_intercept = 1, - priormean_slope = 0.0, - priorsd_slope = 0.4, - prior_rho_min = 1, - prior_rho_prob = 0.01, - prior_sigma_max = 0.1, - prior_sigma_prob = 0.01, - prior_iideffect_sd_max = 0.01, - prior_iideffect_sd_prob = 0.01)) - objects <- setup_objects(result) expect_is(objects, 'list') @@ -195,9 +139,9 @@ test_that('Check that setup_objects works', { expect_is(objects$field_objects, 'list') expect_true(is.null(objects$iid_objects)) - newdata <- terra::crop(c(r, r2), c(0, 180, -90, 90)) - names(newdata) <- c('layer1', 'layer2') - objects2 <- setup_objects(result, newdata) + new_data <- terra::crop(c(r, r2), c(0, 180, -90, 90)) + names(new_data) <- c('layer1', 'layer2') + objects2 <- setup_objects(result, new_data) expect_is(objects2, 'list') expect_equal(length(objects2), 3) @@ -217,23 +161,6 @@ test_that('Check that setup_objects works', { test_that('Check that predict_single_raster works', { - skip_if_not_installed('INLA') - skip_on_cran() - - result <- disag_model(test_data, iterations = 100, - iid = TRUE, - field = TRUE, - priors = list(priormean_intercept = 0, - priorsd_intercept = 1, - priormean_slope = 0.0, - priorsd_slope = 0.4, - prior_rho_min = 1, - prior_rho_prob = 0.01, - prior_sigma_max = 0.1, - prior_sigma_prob = 0.01, - prior_iideffect_sd_max = 0.01, - prior_iideffect_sd_prob = 0.01)) - objects <- setup_objects(result) pars <- result$obj$env$last.par.best diff --git a/tests/testthat/test-prepare-data.R b/tests/testthat/test-prepare-data.R index f71c4dd..4e13cfc 100644 --- a/tests/testthat/test-prepare-data.R +++ b/tests/testthat/test-prepare-data.R @@ -2,106 +2,110 @@ context("Preparing data") test_that("Check prepare_data function works as expected", { - skip_on_cran() - result <- prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack) expect_is(result, 'disag_data') expect_equal(length(result), 10) expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', - 'aggregation_pixels', 'coordsForFit', 'coordsForPrediction', 'startendindex', 'mesh')) + 'aggregation_pixels', 'coords_for_fit', 'coords_for_prediction', 'start_end_index', 'mesh')) expect_is(result$polygon_shapefile, 'sf') expect_is(result$shapefile_names, 'list') expect_is(result$covariate_rasters, 'SpatRaster') expect_is(result$polygon_data, 'data.frame') expect_is(result$covariate_data, 'data.frame') expect_is(result$aggregation_pixels, 'numeric') - expect_is(result$coordsForFit, 'matrix') - expect_is(result$coordsForPrediction, 'matrix') - expect_is(result$startendindex, 'matrix') + expect_is(result$coords_for_fit, 'matrix') + expect_is(result$coords_for_prediction, 'matrix') + expect_is(result$start_end_index, 'matrix') expect_is(result$mesh, 'inla.mesh') expect_equal(sum(is.na(result$polygon_data$N)), length(result$polygon_data$N)) - expect_equal(nrow(result$polygon_data), nrow(result$startendindex)) - expect_equal(nrow(result$covariate_data), nrow(result$coordsForFit)) + expect_equal(nrow(result$polygon_data), nrow(result$start_end_index)) + expect_equal(nrow(result$covariate_data), nrow(result$coords_for_fit)) }) test_that("Check prepare_data function with sample size works as expected", { - skip_on_cran() - result <- prepare_data(polygon_shapefile = spdf_binom, covariate_rasters = cov_stack, sample_size_var = 'sample_size', - makeMesh = FALSE) + make_mesh = FALSE) expect_is(result, 'disag_data') expect_equal(length(result), 10) expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', - 'aggregation_pixels', 'coordsForFit', 'coordsForPrediction', 'startendindex', 'mesh')) + 'aggregation_pixels', 'coords_for_fit', 'coords_for_prediction', 'start_end_index', 'mesh')) expect_is(result$polygon_shapefile, 'sf') expect_is(result$shapefile_names, 'list') expect_is(result$covariate_rasters, 'SpatRaster') expect_is(result$polygon_data, 'data.frame') expect_is(result$covariate_data, 'data.frame') expect_is(result$aggregation_pixels, 'numeric') - expect_is(result$coordsForFit, 'matrix') - expect_is(result$coordsForPrediction, 'matrix') - expect_is(result$startendindex, 'matrix') + expect_is(result$coords_for_fit, 'matrix') + expect_is(result$coords_for_prediction, 'matrix') + expect_is(result$start_end_index, 'matrix') expect_true(is.null(result$mesh)) expect_equal(sum(is.na(result$polygon_data$N)), 0) - expect_equal(nrow(result$polygon_data), nrow(result$startendindex)) - expect_equal(nrow(result$covariate_data), nrow(result$coordsForFit)) + expect_equal(nrow(result$polygon_data), nrow(result$start_end_index)) + expect_equal(nrow(result$covariate_data), nrow(result$coords_for_fit)) }) test_that("Check prepare_data function deals with NAs as expected", { - skip_on_cran() - cov_stack_na <- cov_stack cov_stack_na[[1]][c(1:10)] <- NA aggregation_raster_na <- r aggregation_raster_na[c(1:10)] <- NA - expect_error(prepare_data(polygon_shapefile = spdf_na, covariate_rasters = cov_stack, makeMesh = FALSE)) - expect_error(prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack_na, makeMesh = FALSE)) - expect_error(prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack, aggregation_raster = aggregation_raster_na, makeMesh = FALSE)) + aggregation_raster_zero <- r + aggregation_raster_zero[c(1:2, 21:22)] <- 0 + + expect_error(prepare_data(polygon_shapefile = spdf_na, covariate_rasters = cov_stack, make_mesh = FALSE)) + expect_error(prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack_na, make_mesh = FALSE)) + expect_error(prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack, aggregation_raster = aggregation_raster_na, make_mesh = FALSE)) + expect_error(prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack, aggregation_raster = aggregation_raster_zero, make_mesh = FALSE)) result <- prepare_data(polygon_shapefile = spdf_na, covariate_rasters = cov_stack_na, aggregation_raster = aggregation_raster_na, - na.action = TRUE, - makeMesh = FALSE) + na_action = TRUE, + make_mesh = FALSE) expect_is(result, 'disag_data') expect_equal(length(result), 10) expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', - 'aggregation_pixels', 'coordsForFit', 'coordsForPrediction', 'startendindex', 'mesh')) + 'aggregation_pixels', 'coords_for_fit', 'coords_for_prediction', 'start_end_index', 'mesh')) expect_is(result$polygon_shapefile, 'sf') expect_is(result$shapefile_names, 'list') expect_is(result$covariate_rasters, 'SpatRaster') expect_is(result$polygon_data, 'data.frame') expect_is(result$covariate_data, 'data.frame') expect_is(result$aggregation_pixels, 'numeric') - expect_is(result$coordsForFit, 'matrix') - expect_is(result$startendindex, 'matrix') + expect_is(result$coords_for_fit, 'matrix') + expect_is(result$start_end_index, 'matrix') expect_true(is.null(result$mesh)) - expect_equal(nrow(result$polygon_data), nrow(result$startendindex)) - expect_equal(nrow(result$covariate_data), nrow(result$coordsForFit)) + expect_equal(nrow(result$polygon_data), nrow(result$start_end_index)) + expect_equal(nrow(result$covariate_data), nrow(result$coords_for_fit)) expect_equal(sum(is.na(result$polygon_data$response)), 0) expect_equal(sum(is.na(result$covariate_data)), 0) expect_equal(sum(is.na(result$aggregation_pixels)), 0) expect_equal(nrow(result$polygon_shapefile), nrow(spdf_na) - 1) + + result <- prepare_data(polygon_shapefile = spdf_na, + covariate_rasters = cov_stack_na, + aggregation_raster = aggregation_raster_zero, + na_action = TRUE, + make_mesh = FALSE) + + expect_equal(nrow(result$polygon_shapefile), nrow(spdf_na) - 2) }) test_that("Check as.disag_data function works as expected", { - skip_on_cran() - polygon_data <- getPolygonData(spdf, id_var = 'area_id', response_var = 'response') cov_data <- terra::extract(cov_stack, spdf, cells=TRUE, na.rm=TRUE, ID=TRUE) @@ -109,11 +113,11 @@ test_that("Check as.disag_data function works as expected", { aggregation_data <- rep(1, nrow(cov_data)) - coordsForFit <- extractCoordsForMesh(cov_stack, cov_data$cellid) + coords_for_fit <- extractCoordsForMesh(cov_stack, cov_data$cellid) - coordsForPrediction <- extractCoordsForMesh(cov_stack) + coords_for_prediction <- extractCoordsForMesh(cov_stack) - startendindex <- getStartendindex(cov_data, polygon_data, 'area_id') + start_end_index <- getStartendindex(cov_data, polygon_data, 'area_id') result <- as.disag_data(spdf, list('area_id', 'response'), @@ -121,27 +125,39 @@ test_that("Check as.disag_data function works as expected", { polygon_data, cov_data, aggregation_data, - coordsForFit, - coordsForPrediction, - startendindex, + coords_for_fit, + coords_for_prediction, + start_end_index, mesh = NULL) expect_is(result, 'disag_data') expect_equal(length(result), 10) expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', - 'aggregation_pixels', 'coordsForFit', 'coordsForPrediction', 'startendindex', 'mesh')) + 'aggregation_pixels', 'coords_for_fit', 'coords_for_prediction', 'start_end_index', 'mesh')) expect_is(result$polygon_shapefile, 'sf') expect_is(result$shapefile_names, 'list') expect_is(result$covariate_rasters, 'SpatRaster') expect_is(result$polygon_data, 'data.frame') expect_is(result$covariate_data, 'data.frame') expect_is(result$aggregation_pixels, 'numeric') - expect_is(result$coordsForFit, 'matrix') - expect_is(result$coordsForPrediction, 'matrix') - expect_is(result$startendindex, 'matrix') + expect_is(result$coords_for_fit, 'matrix') + expect_is(result$coords_for_prediction, 'matrix') + expect_is(result$start_end_index, 'matrix') expect_true(is.null(result$mesh)) - expect_equal(nrow(result$polygon_data), nrow(result$startendindex)) - expect_equal(nrow(result$covariate_data), nrow(result$coordsForFit)) + expect_equal(nrow(result$polygon_data), nrow(result$start_end_index)) + expect_equal(nrow(result$covariate_data), nrow(result$coords_for_fit)) }) +test_that("Check prepare_data warns about non-numeric covariates", { + r3 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) + terra::ext(r3) <- terra::ext(spdf) + r3[] <- sapply(1:terra::ncell(r), function(x) as.integer(x)) + + cov_stack <- c(r, r2, r3) + names(cov_stack) <- c('layer1', 'layer2', 'layer3') + + expect_warning(prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack), + "The values of layer3 are not numeric") + +}) diff --git a/tests/testthat/test-summary.R b/tests/testthat/test-summary.R index 0538441..0839587 100644 --- a/tests/testthat/test-summary.R +++ b/tests/testthat/test-summary.R @@ -2,8 +2,6 @@ context("Summary functions") test_that("Check summary.disag_data function works as expected", { - skip_on_cran() - data_summary <- summary(test_data) expect_is(data_summary, 'list') @@ -18,8 +16,6 @@ test_that("Check summary.disag_data function works as expected", { test_that("Check print.disag_data function works as expected", { - skip_on_cran() - print_output <- print(test_data) expect_is(print_output, 'disag_data') @@ -29,16 +25,13 @@ test_that("Check print.disag_data function works as expected", { test_that("Check summary.disag_model function works as expected", { - skip_if_not_installed('INLA') - skip_on_cran() - - result <- disag_model(test_data, field = FALSE, iterations = 2) - model_summary <- summary(result) expect_is(model_summary, 'list') expect_equal(length(model_summary), 3) expect_equal(names(model_summary), c('model_params', 'nll', 'metrics')) + expect_true(c("layer1" %in% rownames(model_summary$model_params))) + expect_true(c("layer2" %in% rownames(model_summary$model_params))) expect_is(model_summary$model_params, 'matrix') expect_is(model_summary$nll, 'numeric') expect_is(model_summary$metrics, 'data.frame') @@ -48,11 +41,6 @@ test_that("Check summary.disag_model function works as expected", { test_that("Check print.disag_model function works as expected", { - skip_if_not_installed('INLA') - skip_on_cran() - - result <- disag_model(test_data, field = FALSE, iterations = 2) - print_output <- print(result) expect_is(print_output, 'disag_model') @@ -62,21 +50,6 @@ test_that("Check print.disag_model function works as expected", { test_that("Check summary.disag_predictions function works as expected", { - skip_if_not_installed('INLA') - skip_on_cran() - - result <- disag_model(test_data, iid = FALSE, iterations = 100, - list(priormean_intercept = 0, - priorsd_intercept = 0.1, - priormean_slope = 0.0, - priorsd_slope = 0.1, - prior_rho_min = 5, - prior_rho_prob = 0.01, - prior_sigma_max = 0.1, - prior_sigma_prob = 0.01, - prior_iideffect_sd_max = 0.00001, - prior_iideffect_sd_prob = 0.01)) - pred <- predict(result) model_summary <- summary(pred) @@ -92,21 +65,6 @@ test_that("Check summary.disag_predictions function works as expected", { test_that("Check print.disag_predictions function works as expected", { - skip_if_not_installed('INLA') - skip_on_cran() - - result <- disag_model(test_data, iid = FALSE, iterations = 100, - list(priormean_intercept = 0, - priorsd_intercept = 0.1, - priormean_slope = 0.0, - priorsd_slope = 0.1, - prior_rho_min = 5, - prior_rho_prob = 0.01, - prior_sigma_max = 0.1, - prior_sigma_prob = 0.01, - prior_iideffect_sd_max = 0.0001, - prior_iideffect_sd_prob = 0.01)) - pred <- predict(result) print_output <- print(pred) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 9892b98..efa58f9 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -19,10 +19,6 @@ knitr::opts_chunk$set( ) ``` -```{r, echo = FALSE, eval = TRUE} -isINLA <- requireNamespace('INLA', quietly = TRUE) -``` - The **disaggregation** package contains functions to run Bayesian disaggregation models. Aggregated response data over large heterogeneous regions can be used alongside fine-scale covariate information to predict fine-scale response across the region. The github page for this package can be found [here](https://github.com/aknandi/disaggregation). Install **disaggregation** using: @@ -37,7 +33,7 @@ or from github using devtools::install_github("aknandi/disaggregation") ``` -The key functions are `prepare_data`, `fit_model` and `predict`. The `prepare_data` function takes the aggregated data and covariate data to be used in the model and produces an object to be use by `fit_model`. This functions runs the disaggregation model and the out can be passed to `predict` to produce fine-scale predicted maps of the response variable. +The key functions are `prepare_data`, `disag_model` and `predict`. The `prepare_data` function takes the aggregated data and covariate data to be used in the model and produces an object to be use by `disag_model`. This functions runs the disaggregation model and the out can be passed to `predict` to produce fine-scale predicted maps of the response variable. To use the disaggregation `prepare_data` function, you must have the aggregated data as a `sf` object and a `SpatRaster` of the covariate data to be used in the model. @@ -112,20 +108,20 @@ Now we have setup the data we can use the `prepare_data` function to create the The user can also control the parameters of the mesh that is used to create the spatial field. The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest and having a small region outside with a coarser mesh to avoid edge effects. The mesh parameters: `concave`, `convex` and `resolution` refer to the parameters used to create the mesh boundary using the [fm_nonconvex_hull_inla function](https://rdrr.io/cran/fmesher/man/fm_nonconvex_hull_inla.html), while the mesh parameters `max.edge`, `cut` and `offset` refer to the parameters used to create the mesh using the [fm_mesh_2d function](https://rdrr.io/cran/fmesher/man/fm_mesh_2d.html). -```{r, fig.show='hold', eval= isINLA} +```{r, fig.show='hold'} data_for_model <- prepare_data(polygon_shapefile = df, covariate_rasters = cov_stack, aggregation_raster = pop_raster, response_var = 'cases', id_var = 'censustract.FIPS', - mesh.args = list(cut = 0.01, + mesh_args = list(cutoff = 0.01, offset = c(0.1, 0.5), max.edge = c(0.1, 0.2), resolution = 250), - na.action = TRUE) + na_action = TRUE) ``` -```{r, fig.show='hold', eval= isINLA} +```{r, fig.show='hold'} plot(data_for_model) ``` @@ -164,7 +160,7 @@ $dpois(y_j, cases_j)$ By default the model contains a spatial field and a polygon iid effect. These can be turned off in the `disag_model` function, using `field = FALSE` or `iid = FALSE`. -```{r, fig.show='hold', eval=isINLA} +```{r, fig.show='hold'} model_result <- disag_model(data_for_model, iterations = 1000, family = 'poisson', @@ -181,14 +177,14 @@ model_result <- disag_model(data_for_model, prior_iideffect_sd_prob = 0.01)) ``` -```{r, fig.show='hold', eval=isINLA} +```{r, fig.show='hold'} plot(model_result) ``` Now we have the results from the model of the fitted parameters, we can predict Leukemia incidence rate at fine-scale (the scale of the covariate data) across New York. The `predict` function takes the model result and predicts both the mean raster surface and predicts and summarises `N` parameter draws, where `N` is set by the user (default 100). The uncertainty is summarised via the confidence interval set by the user (default 95% CI). -```{r, fig.show='hold', eval=isINLA} +```{r, fig.show='hold'} preds <- predict(model_result, N = 100, CI = 0.95)