diff --git a/.Rbuildignore b/.Rbuildignore index af84591..3ad7674 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,7 +1,10 @@ -^.*\.Rproj$ -^\.Rproj\.user$ -^\.git* -README.md -.travis.yml -vignettes/disaggregation_cache/* -cran-comments.md +^.*\.Rproj$ +^\.Rproj\.user$ +^\.git* +README.md +.travis.yml +vignettes/disaggregation_cache/* +cran-comments.md +^\.github$ +.github/workflows/R-CMD-check-HTML5.archyaml +vignettes/spatio_temporal_disaggregation.Rmd \ No newline at end of file diff --git a/vignettes/.gitignore b/.github/.gitignore similarity index 63% rename from vignettes/.gitignore rename to .github/.gitignore index 097b241..2d19fc7 100644 --- a/vignettes/.gitignore +++ b/.github/.gitignore @@ -1,2 +1 @@ *.html -*.R diff --git a/.github/workflows/R-CMD-build.yaml b/.github/workflows/R-CMD-build.yaml new file mode 100644 index 0000000..01e9167 --- /dev/null +++ b/.github/workflows/R-CMD-build.yaml @@ -0,0 +1,75 @@ +# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. +# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +on: + push: + branches: + 'build' + pull_request: + branches: + - master + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macOS-12, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + extra-repositories: "https://inla.r-inla-download.org/R/testing" + + - name: Install system dependencies on MacOS (X11, gdal) + if: runner.os == 'macOS' + run: | + brew install --cask xquartz + brew install pkg-config + brew install proj@9 + brew install gdal + + - name: Install system dependencies on Linux (GL) + if: runner.os == 'Linux' + run: | + sudo apt-get update -y && sudo apt-get install -y libglu1-mesa-dev + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + dependencies: '"all"' + extra-packages: | + rcmdcheck + + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + + - uses: r-lib/actions/check-r-package@v2 + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + with: + args: 'c("--no-manual", "--as-cran")' diff --git a/.github/workflows/R-CMD-check-HTML5.yaml b/.github/workflows/R-CMD-check-HTML5.yaml new file mode 100644 index 0000000..a4c492c --- /dev/null +++ b/.github/workflows/R-CMD-check-HTML5.yaml @@ -0,0 +1,58 @@ +# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. +# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +on: + push: + branches: [html5] + pull_request: + branches: [html5] + +name: R-CMD-check-html5 + + +jobs: + HTML5-check: + runs-on: ubuntu-latest + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + extra-repositories: "https://inla.r-inla-download.org/R/stable" + + + - name: Install pdflatex + run: sudo apt-get install texlive-latex-base texlive-fonts-recommended texlive-fonts-extra texlive-latex-extra + + - uses: r-lib/actions/setup-pandoc@v2 + + - name: Install system dependencies on MacOS (X11, gdal) + if: runner.os == 'macOS' + run: | + brew install --cask xquartz + brew install pkg-config + brew install proj@8 + brew install gdal + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + dependencies: '"all"' + extra-packages: | + rcmdcheck + + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + + - uses: r-lib/actions/check-r-package@v2 + with: + args: '"--as-cran"' + build_args: 'character()' + #error-on: '"note"' diff --git a/.github/workflows/R-CMD-check-no-suggests.yaml b/.github/workflows/R-CMD-check-no-suggests.yaml new file mode 100644 index 0000000..cea02e9 --- /dev/null +++ b/.github/workflows/R-CMD-check-no-suggests.yaml @@ -0,0 +1,96 @@ +# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. +# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +# +# Largely copied from: https://github.com/inlabru-org/inlabru/blob/devel/.github/workflows/R-CMD-check-no-suggests.yaml +# Want to test without suggests to ensure things don't fail on cran when INLA isn't there. + +on: + push: + branches: + '**' + pull_request: + branches: + - devel + - master + +name: R-CMD-check-no-suggests + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: windows-latest, r: 'release'} + # - {os: macOS-latest, r: 'release'} + - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + extra-repositories: "https://inla.r-inla-download.org/R/testing" + + - uses: r-lib/actions/setup-pandoc@v2 + + - name: Install system dependencies on MacOS (X11, gdal) + if: runner.os == 'macOS' + run: | + brew install --cask xquartz + brew install pkg-config + brew install proj@9 + brew install gdal + + - name: Has inla? Check. + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + "INLA" %in% pkgs + shell: Rscript {0} + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + dependencies: '"hard"' + extra-packages: | + rcmdcheck + testthat + + - name: Has inla? Check, and remove. + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + "INLA" %in% pkgs + if ("INLA" %in% pkgs) { + remove.packages("INLA") + } + shell: Rscript {0} + + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + + - uses: r-lib/actions/check-r-package@v2 + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + _R_CHECK_FORCE_SUGGESTS_: false + with: + build_args: 'c("--no-manual", "--no-build-vignettes")' + args: 'c("--no-manual", "--ignore-vignettes", "--as-cran")' + + + + \ No newline at end of file diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..41332d5 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,69 @@ +# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. +# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +on: + push: + branches: + '**' + pull_request: + branches: + - devel + - master + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + # - {os: windows-latest, r: 'release'} + # - {os: macOS-latest, r: 'release'} + - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: ubuntu-20.04, r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + extra-repositories: "https://inla.r-inla-download.org/R/stable" + + - uses: r-lib/actions/setup-pandoc@v2 + + - name: Install system dependencies on MacOS (X11, gdal) + if: runner.os == 'macOS' + run: | + brew install --cask xquartz + brew install pkg-config + brew install proj@8 + brew install gdal + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + dependencies: '"all"' + extra-packages: | + rcmdcheck + + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + + - uses: r-lib/actions/check-r-package@v2 + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + with: + args: 'c("--no-manual", "--as-cran")' diff --git a/.gitignore b/.gitignore index d8a4b5a..eef5be0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,11 @@ -inst/doc -.Rproj.user -.Rhistory -.Rproj -.RData -*.o -*.so -vignettes/disaggregation_cache/* -vignettes/disaggregation_files/* +inst/doc +.Rproj.user +.Rhistory +.Rproj +.RData +*.o +*.so +vignettes/disaggregation_cache/* +vignettes/disaggregation_files/* +.github/workflows/R-CMD-check-HTML5.archyaml +vignettes/spatio_temporal_disaggregation.Rmd \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 35ed9ee..0000000 --- a/.travis.yml +++ /dev/null @@ -1,59 +0,0 @@ - -# Continuous integration with travis -language: r -sudo: required -warnings_are_errors: false - -# cache packages to speed up builds -cache: packages - -before_install: - - if [[ "$OSTYPE" != "linux-gnu" ]]; - then sudo tlmgr install index; - else tlmgr install index; - fi - -addons: - apt: - packages: - - libgdal-dev - - libproj-dev - - libudunits2-dev - -r_packages: - - covr - - testthat - - raster - - sp - - devtools - - rgeos - - splancs - - Matrix - - stats - - TMB - - RcppEigen - -coverage: - status: - project: - default: - threshold: 5% - target: 80% - -after_success: - - Rscript -e 'library(covr);codecov()' - -matrix: - include: - - r: release - install: R -e 'install.packages("devtools", dep = TRUE);devtools::install_deps(dep = c("Depends", "Imports"));install.packages("INLA",repos=c(getOption("rep> -' - os: linux - - r: devel - install: R -e 'install.packages("devtools", dep = TRUE);devtools::install_deps(dep = c("Depends", "Imports"));install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE) -' - os: linux - - r: devel - env: _R_CHECK_FORCE_SUGGESTS_=false - install: R -e 'install.packages("devtools", dep = TRUE);devtools::install_deps(dep = c("Depends", "Imports"))' - diff --git a/DESCRIPTION b/DESCRIPTION index 49ed5cb..c49bce4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,32 +1,25 @@ Package: disaggregation Type: Package Title: Disaggregation Modelling -Version: 0.1.3 +Version: 0.3.0 Authors@R: c( - person("Anita", "Nandi", email = "anita.k.nandi@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-5087-2494")), - person("Tim", "Lucas", email = "timcdlucas@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-4694-8107")), + person("Anita", "Nandi", email = "anita.k.nandi@gmail.com", role = "aut", comment = c(ORCID = "0000-0002-5087-2494")), + person("Tim", "Lucas", email = "timcdlucas@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4694-8107")), person("Rohan", "Arambepola", email = "rarambepola@gmail.com", role = "aut"), - person("Andre", "Python", email = "python.andre@gmail.com", role = "aut", comment = c(ORCID = "0000-0001-8094-7226")) + person("Andre", "Python", email = "python.andre@gmail.com", role = "aut", comment = c(ORCID = "0000-0001-8094-7226")), + person("Simon", "Smart", email = "simon.smart@cantab.net", role = "ctb") ) Description: Fits disaggregation regression models using 'TMB' ('Template Model Builder'). When the response data are aggregated to polygon level but the predictor variables are at a higher resolution, these models can be - useful. Regression models with spatial random fields. A useful reference for disaggregation modelling is - Lucas et al. (2019) . + useful. Regression models with spatial random fields. The package is + described in detail in Nandi et al. (2023) . License: MIT + file LICENSE Encoding: UTF-8 LazyData: true -RoxygenNote: 7.0.2 +RoxygenNote: 7.2.3 Imports: - maptools, - raster, - foreach, - sp, - parallel, - doParallel, - rgeos, splancs, - rgdal, Matrix, stats, TMB, @@ -34,13 +27,18 @@ Imports: ggplot2, cowplot, sparseMVN, + fmesher, + tidyterra, + terra, + sf, utils Additional_repositories: https://inla.r-inla-download.org/R/stable Suggests: testthat, INLA, knitr, - rmarkdown + rmarkdown, + SpatialEpi LinkingTo: TMB, RcppEigen diff --git a/NAMESPACE b/NAMESPACE index d643336..47705eb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,15 +18,12 @@ export(getCovariateRasters) export(getPolygonData) export(getStartendindex) export(make_model_object) -export(parallelExtract) export(predict_model) export(predict_uncertainty) export(prepare_data) import(ggplot2) -importFrom(doParallel,registerDoParallel) -importFrom(foreach,"%dopar%") -importFrom(parallel,makeCluster) -importFrom(parallel,stopCluster) +import(splancs) +import(utils) importFrom(stats,cor) importFrom(stats,quantile) importFrom(stats,sd) diff --git a/R/build_mesh.R b/R/build_mesh.R index 6e7f48e..b005a55 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -1,79 +1,83 @@ #' Build mesh for disaggregaton model -#' -#' \emph{build_mesh} function takes a SpatialPolygons object and mesh arguments to build an appropriate mesh for the spatial field. #' -#' The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary -#' and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest +#' \emph{build_mesh} function takes a sf object and mesh arguments to build an appropriate mesh for the spatial field. +#' +#' The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary +#' and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest #' and having a small region outside with a coarser mesh to avoid edge effects. -#' +#' #' Six mesh parameters can be specified as arguments: \emph{convex}, \emph{concave} and \emph{resolution}, #' to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cut} and \emph{offset}, to control the mesh itself, -#' with the names meaing the same as used by INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. -#' +#' with the names meaning the same as used by INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. +#' #' Defaults are: #' pars <- list(convex = -0.01, concave = -0.5, resolution = 300, max.edge = c(3.0, 8), cut = 0.4, offset = c(1, 15)). -#' -#' @param shapes shapefile covering the region under investigation. +#' +#' @param shapes sf covering the region under investigation. #' @param mesh.args list of parameters that control the mesh structure. \emph{convex}, \emph{concave} and \emph{resolution}, #' to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cut} and \emph{offset}, to control the mesh itself, #' with the parameters having the same meaning as in the INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. #' #' @return An inla.mesh object -#' +#' #' @name build_mesh #' -#' @examples +#' @examples #' \dontrun{ -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) -#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } -#' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' my_mesh <- build_mesh(spdf) +#' polygons <- list() +#' for(i in 1:14) { +#' row <- ceiling(i/10) +#' col <- ifelse(i %% 10 != 0, i %% 10, 10) +#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row +#' polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), +#' c(ymax, ymax, ymin, ymin, ymax))) +#' } +#' +#' polys <- lapply(polygons, sf::st_polygon) +#' response_df <- data.frame(area_id = 1:100, +#' response = runif(100, min = 0, max = 10)) +#' spdf <- sf::st_sf(polys, response_df) +#' +#' my_mesh <- build_mesh(spdf) #' } #' #' @export build_mesh <- function(shapes, mesh.args = NULL) { - stopifnot(inherits(shapes, 'SpatialPolygons')) + stopifnot(inherits(shapes, 'sf')) if(!is.null(mesh.args)) stopifnot(inherits(mesh.args, 'list')) - + + limits <- sf::st_bbox(shapes) + hypotenuse <- sqrt((limits$xmax - limits$xmin)^2 + (limits$ymax - limits$ymin)^2) + maxedge <- hypotenuse/10 + + pars <- list(convex = -0.01, concave = -0.5, resolution = 300, - max.edge = c(3.0, 8), - cut = 0.4, - offset = c(1, 15)) - + max.edge = c(maxedge, maxedge * 2), + cut = 0.1, + offset = c(hypotenuse / 10, hypotenuse / 10)) + + pars[names(mesh.args)] <- mesh.args - outline <- maptools::unionSpatialPolygons(shapes, IDs = rep(1, length(shapes))) + outline <- sf::st_sf(sf::st_union(sf::st_convex_hull(shapes))) - coords <- list() - for(i in seq_len(length(outline@polygons[[1]]@Polygons))){ - coords[[i]] <- outline@polygons[[1]]@Polygons[[i]]@coords - } - coords <- do.call(rbind, coords) + coords <- sf::st_coordinates(outline)[, c('X', 'Y')] - outline.hull <- INLA::inla.nonconvex.hull(coords, - convex = pars$convex, + outline.hull <- fmesher::fm_nonconvex_hull_inla(coords, + convex = pars$convex, concave = pars$concave, resolution = pars$resolution) - - mesh <- INLA::inla.mesh.2d( + + mesh <- fmesher::fm_mesh_2d( boundary = outline.hull, - max.edge = pars$max.edge, - cut = pars$cut, + max.edge = pars$max.edge, + cut = pars$cut, offset = pars$offset) - - + + return(mesh) } diff --git a/R/extract.R b/R/extract.R index 7a1d7d6..dd7fd75 100644 --- a/R/extract.R +++ b/R/extract.R @@ -1,206 +1,108 @@ -#' Parallel extraction of raster stack by shape file. -#' -#' Parallelisation is performed across rasters, not shapes. -#' So this function is only useful if you are extracting -#' data from many raster layers. -#' As the overhead for parallel computation in windows is high -#' it only makes sense to parallelise in this way. +#' Extract polygon id and response data into a data.frame from a sf object #' -#' -#' @param raster A RasterBrick or RasterStack object. -#' @param shape A SpatialPolygons object. -#' @param fun The function used to aggregate the pixel data. If NULL, raw pixel data is returned. -#' @param id Name of column in shape object to be used to bind an ID column to output. -#' @param ... Other arguments to raster::extract. -#' -#' @return A data.frame with columns of polygon id, cell id (if fun = NULL) and a column for each raster in the stack -#' -#' @importFrom foreach %dopar% -#' @importFrom parallel stopCluster -#' @importFrom parallel makeCluster -#' @importFrom doParallel registerDoParallel -#' -#' @export -#' @examples -#' \dontrun{ -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) -#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } -#' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' r <- raster::raster(ncol=20, nrow=20) -#' r <- raster::setExtent(r, raster::extent(spdf)) -#' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) -#' r2 <- raster::raster(ncol=20, nrow=20) -#' r2 <- raster::setExtent(r2, raster::extent(spdf)) -#' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) -#' cov_rasters <- raster::stack(r, r2) -#' -#' cl <- parallel::makeCluster(2) -#' doParallel::registerDoParallel(cl) -#' result <- parallelExtract(cov_rasters, spdf, fun = NULL, id = 'area_id') -#' parallel::stopCluster(cl) -#' foreach::registerDoSEQ() -#' } - -parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ - - if (!requireNamespace("foreach", quietly = TRUE)) { - stop("foreach needed for this function to work. Please install it.", - call. = FALSE) - } - - shape@data[, id] <- as.character(shape@data[, id]) - - i <- NULL - # Run extract in parallel. - values <- foreach::foreach(i = seq_along(shape)) %dopar% { - raster::extract(raster, shape[i, ], fun = fun, na.rm = TRUE, cellnumbers = TRUE, ...) - } - if(!is.null(fun)){ - # If a summary function was given, just bind everything together and add ID column - df <- data.frame(do.call(rbind, values)) - if(class(shape) == 'SpatialPolygonsDataFrame'){ - df <- cbind(ID = as.data.frame(shape)[, id], df) - } else{ - df <- cbind(ID = names(shape), df) - id <- 'id' - } - - names(df) <- c(id, names(raster)) - - return(df) - } else { - # If no summary was given we get a list of length n.shapes - # each entry in the list is a dataframe with n.covariates columns - # Want to make covariates columns, rbind shapes, and add shape and cell id columns. - - # list of vectors, one for each covariate - values_id <- lapply(seq_along(values), function(x) data.frame(shape@data[, id][x], values[[x]][[1]])) - - - df <- do.call(rbind, values_id) - names(df) <- c(id, 'cellid', names(raster)) - - return(df) - } - -} - - -#' Extract polygon id and response data into a data.frame from a SpatialPolygonsDataFrame -#' -#' Returns a data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the -#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does +#' Returns a data.frame with a row for each polygon in the sf object and columns: area_id, response and N, containing the id of the +#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does #' not exist), this column will contain NAs. -#' -#' @param shape A SpatialPolygons object containing response data. +#' +#' @param shape A sf object containing response data. #' @param id_var Name of column in shape object with the polygon id. Default 'area_id'. #' @param response_var Name of column in shape object with the response data. Default 'response'. -#' @param sample_size_var For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. Default NULL. -#' -#' @return A data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the -#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does +#' @param sample_size_var For survey data, name of column in sf object (if it exists) with the sample size data. Default NULL. +#' +#' @return A data.frame with a row for each polygon in the sf object and columns: area_id, response and N, containing the id of the +#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does #' not exist), this column will contain NAs. -#' +#' #' @export #' @examples { -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) -#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } -#' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' +#' polygons <- list() +#' for(i in 1:100) { +#' row <- ceiling(i/10) +#' col <- ifelse(i %% 10 != 0, i %% 10, 10) +#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row +#' polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), +#' c(ymax, ymax, ymin, ymin, ymax))) +#' } +#' +#' polys <- lapply(polygons,sf::st_polygon) +#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) +#' spdf <- sf::st_sf(response_df, geometry = polys) +#' #' getPolygonData(spdf, id_var = 'area_id', response_var = 'response') #' } -#' -#' +#' +#' getPolygonData <- function(shape, id_var = 'area_id', response_var = 'response', sample_size_var = NULL) { - + if(is.null(sample_size_var)) { - polygon_df <- shape@data[, c(id_var, response_var)] + polygon_df <- shape[, c(id_var, response_var), drop = TRUE] polygon_df$N <- rep(NA, nrow(polygon_df)) } else { - polygon_df <- shape@data[, c(id_var, response_var, sample_size_var)] + polygon_df <- shape[, c(id_var, response_var, sample_size_var), drop = TRUE] } - + names(polygon_df) <- c('area_id', 'response', 'N') - + return(polygon_df) } -#' Get a RasterStack of covariates from a folder containing .tif files -#' -#' Looks in a specified folder for raster files. Returns a RasterStack of the rasters cropped to the extent specified by the shape parameter. -#' +#' Get a SpatRaster of covariates from a folder containing .tif files +#' +#' Looks in a specified folder for raster files. Returns a multi-layered SpatRaster of the rasters cropped to the extent specified by the shape parameter. +#' #' @param directory Filepath to the directory containing the rasters. #' @param file_pattern Pattern the filenames must match. Default is all files ending in .tif . #' @param shape An object with an extent that the rasters will be cropped to. -#' -#' @return A RasterStack of the raster files in the directory -#' +#' +#' @return A multi-layered SpatRaster of the raster files in the directory +#' #' @export -#' @examples +#' @examples #' \dontrun{ #' getCovariateRasters('/home/rasters', '.tif$', shape) #' } -#' +#' getCovariateRasters <- function(directory, file_pattern = '.tif$', shape) { - + stopifnot(dir.exists(directory)) - + covariate_files <- list.files(directory, pattern = file_pattern, full.names = TRUE) stopifnot(length(covariate_files) != 0) - - covariate_rasters <- lapply(covariate_files, function(x) raster::raster(x)) - covariate_stack <- raster::stack(covariate_rasters) - - covariate_stack <- raster::crop(covariate_stack, shape) - + + covariate_rasters <- lapply(covariate_files, function(x) terra::rast(x)) + covariate_stack <- terra::rast(covariate_rasters) + + covariate_stack <- terra::crop(covariate_stack, shape) + #covariate_stack <- terra::mask(covariate_stack, shape) + return(covariate_stack) } # Extract coordinates from raster to use constructing the INLA mesh -# -# @param cov_rasters RasterStack of the covariate rasters. +# +# @param cov_rasters SpatRaster of the covariate rasters. # @param selectIds numeric vector containing cell ids to retain. Default NULL retains all cell ids in the covariate rasters. -# +# # @return A matrix containing the coordinates used to make the mesh extractCoordsForMesh <- function(cov_rasters, selectIds = NULL) { - - stopifnot(inherits(cov_rasters, c('RasterStack', 'RasterBrick'))) + + stopifnot(inherits(cov_rasters, 'SpatRaster')) if(!is.null(selectIds)) stopifnot(inherits(selectIds, 'numeric')) - + points_raster <- cov_rasters[[1]] - points_raster[is.na(raster::values(points_raster))] <- -9999 - raster_pts <- raster::rasterToPoints(points_raster, spatial = TRUE) - coords <- raster_pts@coords - + points_raster[is.na(terra::values(points_raster, mat = FALSE))] <- -9999 + raster_pts <- terra::as.points(points_raster) + coords <- terra::crds(raster_pts) + # If specified, only retain certain pixel ids if(!is.null(selectIds)) { coords <- coords[selectIds, ] } - - return(coords) - -} - + return(coords) +} diff --git a/R/fit_model.R b/R/fit_model.R index cf58815..e448890 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -1,45 +1,46 @@ #' Fit the disaggregation model -#' -#' \emph{fit_model} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} and performs a Bayesian disaggregation fit. -#' +#' +#' \emph{fit_model} function takes a \emph{disag_data} object created by +#' \code{\link{prepare_data}} and performs a Bayesian disaggregation fit. +#' #' \strong{The model definition} -#' -#' The disaggregation model make predictions at the pixel level: +#' +#' The disaggregation model makes predictions at the pixel level: #' \deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} -#' +#' #' And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): #' \deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} #' \deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} -#' -#' The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the repsonse count data): +#' +#' The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): #' \itemize{ -#' \item Gaussian: -#' If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where +#' \item Gaussian: +#' If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where #' \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} #' \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. -#' \item Binomial: +#' \item Binomial: #' For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. #' \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. -#' \item Poisson: +#' \item Poisson: #' \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. #' } -#' -#' Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field -#' are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -#' where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}$ and $\sigma_{prob}} for the variation of the field +#' +#' Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field +#' are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field +#' where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field #' where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect -#' -#' The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. -#' The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. +#' +#' The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. +#' The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. #' The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. #' These are specified as strings. -#' +#' #' The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. -#' +#' #' The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. -#' -#' The \emph{silent} argument can be used to publish/supress verbose output. Default TRUE. -#' +#' +#' The \emph{silent} argument can be used to publish/suppress verbose output. Default TRUE. +#' #' #' @param data disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting #' @param priors list of prior values @@ -48,188 +49,212 @@ #' @param iterations number of iterations to run the optimisation for #' @param field logical. Flag the spatial field on or off #' @param iid logical. Flag the iid effect on or off -#' @param hess_control_parscale Argument to scale parameters during the calculation of the Hessian. +#' @param hess_control_parscale Argument to scale parameters during the calculation of the Hessian. #' Must be the same length as the number of parameters. See \code{\link[stats]{optimHess}} for details. -#' @param hess_control_ndeps Argument to control step sizes during the calculation of the Hessian. -#' Either length 1 (same step size applied to all parameters) or the same length as the number of parameters. -#' Default is 1e-3, try setting a smaller value if you get NaNs in the standard error of the parameters. +#' @param hess_control_ndeps Argument to control step sizes during the calculation of the Hessian. +#' Either length 1 (same step size applied to all parameters) or the same length as the number of parameters. +#' Default is 1e-3, try setting a smaller value if you get NaNs in the standard error of the parameters. #' See \code{\link[stats]{optimHess}} for details. #' @param silent logical. Suppress verbose output. -#' -#' @return A list is returned of class \code{disag_model}. -#' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_model}. +#' +#' @return A list is returned of class \code{disag_model}. +#' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_model}. #' The list of class \code{disag_model} contains: -#' \item{obj }{The TMB model object returned by \code{\link[TMB]{MakeADFun}}.} -#' \item{opt }{The optimized model object returned by \code{\link[stats]{nlminb}}.} +#' \item{obj }{The TMB model object returned by \code{\link[TMB]{MakeADFun}}.} +#' \item{opt }{The optimized model object returned by \code{\link[stats]{nlminb}}.} #' \item{sd_out }{The TMB object returned by \code{\link[TMB]{sdreport}}.} #' \item{data }{The \emph{disag_data} object used as an input to the model.} #' \item{model_setup }{A list of information on the model setup. Likelihood function (\emph{family}), link function(\emph{link}), logical: whether a field was used (\emph{field}) and logical: whether an iid effect was used (\emph{iid}).} -#' +#' #' @name fit_model +#' @references Nanda et al. (2023) disaggregation: An R Package for Bayesian +#' Spatial Disaggregation Modeling. #' -#' @examples +#' @examples #' \dontrun{ -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) +#' polygons <- list() +#' n_polygon_per_side <- 10 +#' n_polygons <- n_polygon_per_side * n_polygon_per_side +#' n_pixels_per_side <- n_polygon_per_side * 2 +#' +#' for(i in 1:n_polygons) { +#' row <- ceiling(i/n_polygon_per_side) +#' col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } -#' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' r <- raster::raster(ncol=20, nrow=20) -#' r <- raster::setExtent(r, raster::extent(spdf)) -#' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) -#' r2 <- raster::raster(ncol=20, nrow=20) -#' r2 <- raster::setExtent(r2, raster::extent(spdf)) -#' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) -#' cov_rasters <- raster::stack(r, r2) -#' -#' cl <- parallel::makeCluster(2) -#' doParallel::registerDoParallel(cl) -#' test_data <- prepare_data(polygon_shapefile = spdf, -#' covariate_rasters = cov_rasters) -#' parallel::stopCluster(cl) -#' foreach::registerDoSEQ() -#' +#' polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), +#' c(ymax, ymax, ymin, ymin, ymax))) +#' } +#' +#' polys <- lapply(polygons,sf::st_polygon) +#' N <- floor(runif(n_polygons, min = 1, max = 100)) +#' response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) +#' +#' spdf <- sf::st_sf(response_df, geometry = polys) +#' +#' # Create raster stack +#' r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +#' terra::ext(r) <- terra::ext(spdf) +#' r[] <- sapply(1:terra::ncell(r), function(x){ +#' rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3))} +#' r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +#' terra::ext(r2) <- terra::ext(spdf) +#' r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +#' cov_stack <- c(r, r2) +#' names(cov_stack) <- c('layer1', 'layer2') +#' +#' test_data <- prepare_data(polygon_shapefile = spdf, +#' covariate_rasters = cov_stack) +#' #' result <- fit_model(test_data, iterations = 2) #' } -#' +#' #' @export -fit_model <- function(data, - priors = NULL, - family = 'gaussian', - link = 'identity', - iterations = 100, - field = TRUE, +fit_model <- function(data, + priors = NULL, + family = 'gaussian', + link = 'identity', + iterations = 100, + field = TRUE, iid = TRUE, hess_control_parscale = NULL, hess_control_ndeps = 1e-4, silent = TRUE) { - + .Deprecated(new = 'disag_model', msg = "'fit_model' will be removed in the next version. Please use 'disag_model' instead") - - model_output <- disag_model(data, - priors = priors, - family = family, - link = link, - iterations = iterations, - field = field, + + model_output <- disag_model(data, + priors = priors, + family = family, + link = link, + iterations = iterations, + field = field, iid = iid, hess_control_parscale = hess_control_parscale, hess_control_ndeps = hess_control_ndeps, silent = silent) - + return(model_output) - - + + } #' @export #' @rdname fit_model -disag_model <- function(data, - priors = NULL, - family = 'gaussian', - link = 'identity', - iterations = 100, - field = TRUE, +disag_model <- function(data, + priors = NULL, + family = 'gaussian', + link = 'identity', + iterations = 100, + field = TRUE, iid = TRUE, hess_control_parscale = NULL, hess_control_ndeps = 1e-4, silent = TRUE) { - - + + stopifnot(inherits(data, 'disag_data')) if(!is.null(priors)) stopifnot(inherits(priors, 'list')) stopifnot(inherits(iterations, 'numeric')) - - obj <- make_model_object(data = data, - priors = priors, - family = family, - link = link, - field = field, + + obj <- make_model_object(data = data, + priors = priors, + family = family, + link = link, + field = field, iid = iid, silent = silent) - + message('Fitting model. This may be slow.') opt <- stats::nlminb(obj$par, obj$fn, obj$gr, control = list(iter.max = iterations, trace = 0)) - + + if(opt$convergence != 0) warning('The model did not converge. Try increasing the number of iterations') + # Get hess control parameters into a list. hess_control <- setup_hess_control(opt, hess_control_parscale, hess_control_ndeps) - + # Calculate the hessian hess <- stats::optimHess(opt$par, fn = obj$fn, gr = obj$gr, control = hess_control) - + # Calc uncertainty using the fixed hessian from above. sd_out <- TMB::sdreport(obj, getJointPrecision = TRUE, hessian.fixed = hess) - message('Fitting model. This may be slow.') - opt <- stats::nlminb(obj$par, obj$fn, obj$gr, control = list(iter.max = iterations, trace = 0)) - + sd_out <- TMB::sdreport(obj, getJointPrecision = TRUE) - - if(opt$convergence != 0) warning('The model did not converge. Try increasing the number of iterations') - + + # Rename parameters to match layers + # Need to change in sd_out as well + # names(opt$par)[names(opt$par) == 'slope'] <- names(data$covariate_rasters) + model_output <- list(obj = obj, opt = opt, sd_out = sd_out, data = data, model_setup = list(family = family, link = link, field = field, iid = iid)) - + class(model_output) <- c('disag_model', 'list') - + return(model_output) } #' Create the TMB model object for the disaggregation model -#' -#' \emph{make_model_object} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} +#' +#' \emph{make_model_object} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} #' and creates a TMB model object to be used in fitting. -#' +#' #' \strong{The model definition} -#' +#' #' The disaggregation model make predictions at the pixel level: #' \deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} -#' +#' #' And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): #' \deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} #' \deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} -#' -#' The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the repsonse count data): +#' +#' The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): #' \itemize{ -#' \item Gaussian: -#' If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where +#' \item Gaussian: +#' If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where #' \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} #' \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. -#' \item Binomial: +#' \item Binomial: #' For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. #' \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. -#' \item Poisson: +#' \item Poisson: #' \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. #' } -#' -#' Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field -#' are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -#' where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}$ and $\sigma_{prob}} for the variation of the field -#' where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect -#' -#' The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. -#' The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. +#' +#' Specify priors for the regression parameters, field and iid effect as a single named list. Hyperpriors for the field +#' are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field +#' where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field +#' where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect. +#' +#' The precise names and default values for these priors are: +#' \itemize{ +#' \item priormean_intercept: 0 +#' \item priorsd_intercept: 10.0 +#' \item priormean_slope: 0.0 +#' \item priorsd_slope: 0.5 +#' \item prior_rho_min: A third the length of the diagonal of the bounding box. +#' \item prior_rho_prob: 0.1 +#' \item prior_sigma_max: sd(response/mean(response)) +#' \item prior_sigma_prob: 0.1 +#' \item prior_iideffect_sd_max: 0.1 +#' \item prior_iideffect_sd_prob: 0.01 +#' } +#' +#' The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. +#' The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. #' The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. #' These are specified as strings. -#' +#' #' The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. -#' +#' #' The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. -#' +#' #' The \emph{silent} argument can be used to publish/supress verbose output. Default TRUE. -#' +#' #' #' @param data disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting #' @param priors list of prior values @@ -238,62 +263,68 @@ disag_model <- function(data, #' @param field logical. Flag the spatial field on or off #' @param iid logical. Flag the iid effect on or off #' @param silent logical. Suppress verbose output. -#' +#' #' @return The TMB model object returned by \code{\link[TMB]{MakeADFun}}. -#' +#' #' @name make_model_object #' -#' @examples +#' @examples #' \dontrun{ -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) +#' polygons <- list() +#' n_polygon_per_side <- 10 +#' n_polygons <- n_polygon_per_side * n_polygon_per_side +#' n_pixels_per_side <- n_polygon_per_side * 2 +#' +#' for(i in 1:n_polygons) { +#' row <- ceiling(i/n_polygon_per_side) +#' col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } -#' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' r <- raster::raster(ncol=20, nrow=20) -#' r <- raster::setExtent(r, raster::extent(spdf)) -#' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) -#' r2 <- raster::raster(ncol=20, nrow=20) -#' r2 <- raster::setExtent(r2, raster::extent(spdf)) -#' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) -#' cov_rasters <- raster::stack(r, r2) -#' -#' cl <- parallel::makeCluster(2) -#' doParallel::registerDoParallel(cl) -#' test_data <- prepare_data(polygon_shapefile = spdf, -#' covariate_rasters = cov_rasters) -#' parallel::stopCluster(cl) -#' foreach::registerDoSEQ() -#' +#' polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), +#' c(ymax, ymax, ymin, ymin, ymax))) +#' } +#' +#' polys <- lapply(polygons,sf::st_polygon) +#' N <- floor(runif(n_polygons, min = 1, max = 100)) +#' response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) +#' +#' spdf <- sf::st_sf(response_df, geometry = polys) +#' +#' # Create raster stack +#' r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +#' terra::ext(r) <- terra::ext(spdf) +#' r[] <- sapply(1:terra::ncell(r), function(x){ +#' rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3))} +#' r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +#' terra::ext(r2) <- terra::ext(spdf) +#' r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +#' cov_stack <- c(r, r2) +#' names(cov_stack) <- c('layer1', 'layer2') +#' +#' test_data <- prepare_data(polygon_shapefile = spdf, +#' covariate_rasters = cov_stack) +#' #' result <- make_model_object(test_data) #' } -#' +#' #' @export -#' +#' -make_model_object <- function(data, - priors = NULL, - family = 'gaussian', - link = 'identity', - field = TRUE, +make_model_object <- function(data, + priors = NULL, + family = 'gaussian', + link = 'identity', + field = TRUE, iid = TRUE, silent = TRUE) { - - + + # Check that binomial model has sample_size values supplied if(family == 'binomial') { if(sum(is.na(data$polygon_data$N)) != 0) { stop("There are NAs in the sample sizes. These must be supplied for a binomial likelihood") } } - + if(family == 'gaussian') { family_id = 0 } else if(family == 'binomial') { @@ -303,7 +334,7 @@ make_model_object <- function(data, } else { stop(paste(family, "is not a valid likelihood")) } - + if(link == 'logit') { link_id = 0 } else if(link == 'log') { @@ -313,41 +344,41 @@ make_model_object <- function(data, } else { stop(paste(link, "is not a valid link function")) } - + if(family == 'gaussian' & iid) { - warning('You are using both a gaussian likeihood and an iid effect. Using both of these is redundant as they are + warning('You are using both a gaussian likelihood and an iid effect. Using both of these is redundant as they are having the same effect on the model. Consider setting iid = FALSE.') } - + if(is.null(data$mesh)) { stop('Your data object must contain an INLA mesh.') } - + nu = 1 # Sort out mesh bits - spde <- (INLA::inla.spde2.matern(data$mesh, alpha = nu + 1)$param.inla)[c("M0", "M1", "M2")] - Apix <- INLA::inla.mesh.project(data$mesh, loc = data$coordsForFit)$A + spde <- (INLA::inla.spde2.matern(data$mesh, alpha = nu + 1)$param.inla)[c("M0", "M1", "M2")] + Apix <- fmesher::fm_evaluator(data$mesh, loc = data$coordsForFit)$proj$A n_s <- nrow(spde$M0) - - cov_matrix <- as.matrix(data$covariate_data[, -c(1:2)]) - # If we have exactly one column we don't have to transpose. Sure this + + cov_matrix <- as.matrix(data$covariate_data[, (names(data$covariate_data) %in% names(data$covariate_rasters))]) + # If we have exactly one column we don't have to transpose. Sure this # this could be cleaner but I don't know how. if(ncol(cov_matrix) == 1){ cov_matrix <- as.matrix(apply(cov_matrix, 1, as.numeric)) } else { cov_matrix <- t(apply(cov_matrix, 1, as.numeric)) } - + # Construct sensible default field hyperpriors - limits <- sp::bbox(data$polygon_shapefile) - hypontenuse <- sqrt((limits[1,2] - limits[1,1])^2 + (limits[2,2] - limits[2,1])^2) - prior_rho <- hypontenuse/3 - + limits <- sf::st_bbox(data$polygon_shapefile) + hypotenuse <- sqrt((limits$xmax - limits$xmin)^2 + (limits$ymax - limits$ymin)^2) + prior_rho <- hypotenuse/3 + prior_sigma <- sd(data$polygon_data$response/mean(data$polygon_data$response)) - + # Default priors if they are not specified - default_priors <- list(priormean_intercept = -4.0, - priorsd_intercept = 2.0, + default_priors <- list(priormean_intercept = 0, + priorsd_intercept = 10.0, priormean_slope = 0.0, priorsd_slope = 0.5, prior_rho_min = prior_rho, @@ -356,7 +387,7 @@ make_model_object <- function(data, prior_sigma_prob = 0.1, prior_iideffect_sd_max = 0.1, prior_iideffect_sd_prob = 0.01) - + # Replace with any specified priors if(!is.null(priors)) { final_priors <- default_priors @@ -378,7 +409,7 @@ make_model_object <- function(data, } else { final_priors <- default_priors } - + parameters <- list(intercept = -5, slope = rep(0, ncol(cov_matrix)), log_tau_gaussian = 8, @@ -387,7 +418,7 @@ make_model_object <- function(data, log_sigma = 0, log_rho = 4, nodemean = rep(0, n_s)) - + input_data <- list(x = cov_matrix, aggregation_values = data$aggregation_pixels, Apixel = Apix, @@ -400,9 +431,9 @@ make_model_object <- function(data, nu = nu, field = as.integer(field), iid = as.integer(iid)) - + input_data <- c(input_data, final_priors) - + tmb_map <- list() if(!field) { tmb_map <- c(tmb_map, list(log_sigma = as.factor(NA), @@ -416,7 +447,7 @@ make_model_object <- function(data, if(family_id != 0) { # if not gaussian do not need a dispersion in likelihood tmb_map <- c(tmb_map, list(log_tau_gaussian = as.factor(NA))) } - + random_effects <- c() if(field) { random_effects <- c(random_effects, 'nodemean') @@ -424,15 +455,15 @@ make_model_object <- function(data, if(iid) { random_effects <- c(random_effects, 'iideffect') } - + obj <- TMB::MakeADFun( - data = input_data, + data = input_data, parameters = parameters, map = tmb_map, random = random_effects, silent = silent, DLL = "disaggregation") - + return(obj) } @@ -449,7 +480,7 @@ setup_hess_control <- function(opt,hess_control_parscale, hess_control_ndeps){ hess_control$parscale <- hess_control_parscale } # hess_control_ndeps can either be length 1 (default) or correct length vecot. - if(length(hess_control_ndeps) == 1){ + if(length(hess_control_ndeps) == 1){ hess_control$ndeps <- rep(hess_control_ndeps, length(opt$par)) } else { if(length(hess_control_ndeps) != length(opt$par)){ diff --git a/R/matching.R b/R/matching.R index f98c967..615b5fc 100644 --- a/R/matching.R +++ b/R/matching.R @@ -1,12 +1,12 @@ #' Function to match pixels to their corresponding polygon -#' -#' From the covaraite data and polygon data, the function matches the polygon id between the two to find +#' +#' From the covariate data and polygon data, the function matches the polygon id between the two to find #' which pixels from the covariate data are contained in each of the polygons. -#' -#' Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, -#' and another data.frame containing polygon data with a polygon id, response and sample size column (as returned +#' +#' Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, +#' and another data.frame containing polygon data with a polygon id, response and sample size column (as returned #' by \code{getPolygonData} function). -#' +#' #' Returns a matrix with two columns and one row for each polygon. The first column is the index of the first row in #' covariate data that corresponds to that polygon, the second column is the index of the last row in #' covariate data that corresponds to that polygon. @@ -14,7 +14,7 @@ #' @param covariates data.frame with each covariate as a column an and id column. #' @param polygon_data data.frame with polygon id and response data. #' @param id_var string with the name of the column in the covariate data.frame containing the polygon id. -#' +#' #' @return A matrix with two columns and one row for each polygon. The first column is the index of the first row in #' covariate data that corresponds to that polygon, the second column is the index of the last row in #' covariate data that corresponds to that polygon. @@ -43,7 +43,7 @@ getStartendindex <- function(covariates, polygon_data, id_var = 'area_id') { startendindex <- do.call(rbind, startendindex) - whichindices <- match(polygon_data$area_id, unique(covariates[, id_var])) + whichindices <- terra::match(polygon_data$area_id, unique(covariates[, id_var])) # c++ is zero indexed. startendindex <- startendindex[whichindices, ] - 1L diff --git a/R/plotting.R b/R/plotting.R index d2f3c40..f9b9a2b 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -1,78 +1,90 @@ #' Plot input data for disaggregation #' -#' Plotting function for class \emph{disag_data} (the input data for disaggragation). -#' +#' Plotting function for class \emph{disag_data} (the input data for disaggregation). +#' #' Produces three plots: polygon response data, covariate rasters and INLA mesh. #' #' @param x Object of class \emph{disag_data} to be plotted. -#' @param which If a subset of plots is requied, specify a subset of the numbers 1:3 +#' @param which If a subset of plots is required, specify a subset of the numbers 1:3 #' @param ... Further arguments to \emph{plot} function. -#' +#' #' @return A list of three plots: the polygon plot (ggplot), covariate plot (spplot) and INLA mesh plot (ggplot) -#' +#' #' @import ggplot2 #' @method plot disag_data -#' +#' #' @export plot.disag_data <- function(x, which = c(1,2,3), ...) { - + plots <- list() titles <- c() - + if(1 %in% which) { plots$polygon <- plot_polygon_data(x$polygon_shapefile, x$shapefile_names) titles <- c(titles, 'Polygon response data') } - + if(2 %in% which) { - stopifnot(inherits(x$covariate_rasters, c('RasterStack', 'RasterBrick'))) - plots$covariates <- sp::spplot(x$covariate_rasters) + stopifnot(inherits(x$covariate_rasters, c('SpatRaster'))) + plots$covariates <- ggplot2::ggplot() + tidyterra::geom_spatraster(data=x$covariate_rasters) + ggplot2::facet_wrap(~lyr) + tidyterra::scale_fill_terrain_c() titles <- c(titles, 'Covariate rasters') } - + if(3 %in% which & !is.null(x$mesh)) { stopifnot(inherits(x$mesh, 'inla.mesh')) plots$mesh <- plot_mesh(x$mesh) titles <- c(titles, 'INLA mesh for spatial field') } - + print(cowplot::plot_grid(plotlist = plots, labels = titles, label_size = 10)) - + return(invisible(plots)) } #' Plot results of fitted model #' -#' Plotting function for class \emph{disag_model} (the result of the disaggragation fitting). -#' +#' Plotting function for class \emph{disag_model} (the result of the disaggregation fitting). +#' #' Produces two plots: results of the fixed effects and in-sample observed vs predicted plot. -#' +#' #' @param x Object of class \emph{disag_model} to be plotted. #' @param ... Further arguments to \emph{plot} function. -#' -#' @return A list of two ggplot plots: results of the fixed effects and an in-sample observed vs predicted plot -#' +#' +#' @return A list of two ggplot plots: results of the fixed effects and an in-sample observed vs predicted plot +#' #' @import ggplot2 #' @method plot disag_model -#' +#' #' @export plot.disag_model <- function(x, ...){ - + parameter <- sd <- obs <- pred <- NULL posteriors <- as.data.frame(summary(x$sd_out, select = 'fixed')) posteriors <- dplyr::mutate(posteriors, name = rownames(posteriors)) names(posteriors) <- c('mean', 'sd', 'parameter') + posteriors$fixed <- grepl('slope', posteriors$parameter) + posteriors$type <- ifelse(posteriors$fixed, 'Slope', 'Other') + + # Check name lengths match before substituting. + lengths_match <- terra::nlyr(x$data$covariate_rasters) == sum(posteriors$fixed) + if(lengths_match){ + posteriors$parameter[grepl('slope', posteriors$parameter)] <- names(x$data$covariate_rasters) + } + + fixedeffects <- ggplot() + + geom_errorbar(posteriors, mapping = aes(x = parameter, ymin = mean - sd, + ymax = mean + sd), + width = 0.2, color = "blue") + + geom_point(posteriors, mapping = aes(x = parameter, y = mean)) + + facet_wrap( ~ type , scales = 'free') + + coord_flip() + + ggtitle("Parameters (excluding random effects)") - fixedeffects <- ggplot() + - geom_errorbar(posteriors, mapping = aes(x = parameter, ymin = mean - sd, ymax = mean + sd), width = 0.2, color = "blue") + - geom_point(posteriors, mapping = aes(x = parameter, y = mean)) + - ggtitle("Fixed effects") - report <- x$obj$report() - + # Form of the observed and predicted results depends on the likelihood function used if(x$model_setup$family == 'gaussian') { observed_data = report$polygon_response_data/report$reportnormalisation @@ -87,79 +99,76 @@ plot.disag_model <- function(x, ...){ predicted_data = report$reportprediction_rate title <- 'In sample performance: incidence rate' } - + data <- data.frame(obs = observed_data, pred = predicted_data) - - obspred <- ggplot(data, aes(x = obs, y = pred)) + - geom_point() + - geom_abline(intercept = 0, slope = 1, color = 'blue') + + + obspred <- ggplot(data, aes(x = obs, y = pred)) + + geom_point() + + geom_abline(intercept = 0, slope = 1, color = 'blue') + ggtitle(title) - + plots <- list(fixedeffects, obspred) print(cowplot::plot_grid(plotlist = plots)) - + return(invisible(plots)) } #' Plot mean and uncertainty predictions from the disaggregation model results #' -#' Plotting function for class \emph{disag_prediction} (the mean and uncertainty predictions of the disaggragation fitting). -#' +#' Plotting function for class \emph{disag_prediction} (the mean and uncertainty predictions of the disaggregation fitting). +#' +#' #' Produces raster plots of the mean prediction, and the lower and upper confidence intervals. #' #' @param x Object of class \emph{disag_prediction} to be plotted. #' @param ... Further arguments to \emph{plot} function. -#' +#' #' @return A list of plots of rasters from the prediction: mean prediction, lower CI and upper CI. -#' +#' #' @method plot disag_prediction -#' +#' #' @export plot.disag_prediction <- function(x, ...) { - rasters_to_plot <- raster::stack(x$mean_prediction$prediction, x$uncertainty_prediction$predictions_ci) + rasters_to_plot <- terra::rast(list(x$mean_prediction$prediction, x$uncertainty_prediction$predictions_ci)) names(rasters_to_plot) <- c('mean prediction', 'lower CI', 'upper CI') - - plots <- sp::spplot(rasters_to_plot) - + + plots <- ggplot2::ggplot() + tidyterra::geom_spatraster(data=rasters_to_plot) + ggplot2::facet_wrap(~lyr) + tidyterra::scale_fill_terrain_c() + print(plots) - + return(invisible(plots)) } -# Plot polygon data from SpatialPolygonDataFrame +# Plot polygon data from sf object # # @param x Object to be plotted # @param names list of 2 names: polygon id variable and response variable names -# +# # @return A ggplot of the polygon data -# +# # @name plot_polygon_data -plot_polygon_data <- function(x, names) { +plot_polygon_data <- function(polygon_shapefile, names) { # Rename the response variable for plotting - shp <- x - shp@data <- dplyr::rename(shp@data, 'response' = names$response_var) - shp@data <- dplyr::rename(shp@data, 'area_id' = names$id_var) - + shp <- polygon_shapefile + shp <- dplyr::rename(shp, 'response' = names$response_var) + shp <- dplyr::rename(shp, 'area_id' = names$id_var) + area_id <- long <- lat <- group <- response <- NULL - stopifnot(inherits(shp, 'SpatialPolygonsDataFrame')) - - df_fortify <- fortify(shp, region = 'area_id') - - df <- shp@data - df <- dplyr::mutate(df, area_id = as.character(area_id)) - df <- dplyr::left_join(df_fortify, df, by = c('id' = 'area_id')) - - p <- ggplot(df, aes(long, lat, group = group, fill = response)) + - geom_polygon() + - coord_equal() + + stopifnot(inherits(shp, 'sf')) + + shp <- dplyr::mutate(shp, area_id = as.character(area_id)) + + p <- ggplot(shp, aes(fill = response)) + + geom_sf() + + #coord_equal() + scale_fill_viridis_c(trans = 'identity') - + return(invisible(p)) } @@ -170,24 +179,24 @@ plot_polygon_data <- function(x, names) { # @param lwd Line width # @param linecol The colour for the mesh edges # @param size size Size of data points -# +# # @name plot_mesh plot_mesh <- function(x, main = '', col = 'blue', lwd = 0.5, linecol = 'darkgrey', size = 1.2) { - + mesh <- x # extract point data d <- data.frame(x = mesh$loc[, 1], y = mesh$loc[, 2], type = 'evertices') levels(d$type) <- c('evertices', 'adata') d[mesh$idx$loc, 'type'] <- 'adata' - # extract lines data. + # extract lines data. # mesh$graph$tv column 1, 2, 3 are points in triangles. # Therefore need 1 to 2, 2 to 3 and 3 to 1. - idx = rbind(mesh$graph$tv[, 1:2, drop = FALSE], - mesh$graph$tv[, 2:3, drop = FALSE], + idx = rbind(mesh$graph$tv[, 1:2, drop = FALSE], + mesh$graph$tv[, 2:3, drop = FALSE], mesh$graph$tv[, c(3, 1), drop = FALSE]) segments <- data.frame(mesh$loc[idx[, 1], 1:2], mesh$loc[idx[, 2], 1:2], type = 'bsegments') - + innerouter <- data.frame(mesh$loc[mesh$segm$bnd$idx[, 1], 1:2], mesh$loc[mesh$segm$bnd$idx[, 2], 1:2], type = 'cbinding', stringsAsFactors = FALSE) @@ -202,28 +211,30 @@ plot_mesh <- function(x, main = '', col = 'blue', lwd = 0.5, linecol = 'darkgrey #innerouter[nrow(innerouter), 5] <- 'dinternal' innerouter$type = factor(innerouter$type, levels = c('dinternal', 'cbinding')) } - - + + names(segments) <- c('x1', 'y1', 'x2', 'y2', 'type') names(innerouter) <- c('x1', 'y1', 'x2', 'y2', 'type') - + segments <- rbind(segments, innerouter) - - - p <- ggplot2::ggplot(data = d, - ggplot2::aes_string('x', 'y', - colour = 'type', - size = 'type')) + - ggplot2::geom_segment(data = segments, - ggplot2::aes_string(x = 'x1', y = 'y1', xend = 'x2', yend = 'y2')) + - ggplot2::geom_point() + + + #size = .data$type + p <- ggplot2::ggplot(data = d, + ggplot2::aes(.data$x, .data$y, + colour = .data$type)) + + ggplot2::geom_segment(data = segments, + ggplot2::aes(x = .data$x1, y = .data$y1, + xend = .data$x2, yend = .data$y2, + linewidth = .data$type)) + + ggplot2::geom_point(aes(size = .data$type)) + ggplot2::theme_minimal() + ggplot2::theme(legend.position = 'none') #stroke p <- p + ggplot2::scale_colour_manual(values = c(col, linecol, 'black', 'black', 'black'), drop = FALSE) + ggplot2::scale_size_manual(values = c(size, lwd, 1.3, 1.3, 0), drop = FALSE) + + ggplot2::scale_linewidth_manual(values = c(size, lwd, 1.3, 1.3, 0), drop = FALSE) + ggtitle(main) - + return(invisible(p)) } diff --git a/R/predict.R b/R/predict.R index 47a04b0..6cbfc67 100644 --- a/R/predict.R +++ b/R/predict.R @@ -1,80 +1,80 @@ #' Predict mean and uncertainty from the disaggregation model result -#' -#' \emph{predict.disag_model} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and +#' +#' \emph{predict.disag_model} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and #' predicts mean and uncertainty maps. -#' -#' To predict over a different spatial extent to that used in the model, -#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +#' +#' To predict over a different spatial extent to that used in the model, +#' a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. #' If this is not given predictions are made over the data used in the fit. -#' -#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. -#' -#' For the uncertainty calculations, the number of the realisations and the size of the confidence interval to be calculated -#' are given by the arguments \emph{N} and \emph{CI} respectively. -#' +#' +#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. +#' +#' For the uncertainty calculations, the number of the realisations and the size of the confidence interval to be calculated +#' are given by the arguments \emph{N} and \emph{CI} respectively. +#' #' @param object disag_model object returned by disag_model function. -#' @param newdata If NULL, predictions are made using the data in model_output. -#' If this is a raster stack or brick, predictions will be made over this data. +#' @param newdata If NULL, predictions are made using the data in model_output. +#' If this is a raster stack or brick, predictions will be made over this data. #' @param predict_iid logical. If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE. #' @param N Number of realisations. Default: 100. #' @param CI Confidence interval to be calculated from the realisations. Default: 0.95. #' @param ... Further arguments passed to or from other methods. #' -#' @return An object of class \emph{disag_prediction} which consists of a list of two objects: +#' @return An object of class \emph{disag_prediction} which consists of a list of two objects: #' \item{mean_prediction }{List of: #' \itemize{ #' \item \emph{prediction} Raster of mean predictions based. #' \item \emph{field} Raster of the field component of the linear predictor. #' \item \emph{iid} Raster of the iid component of the linear predictor. #' \item \emph{covariates} Raster of the covariate component of the linear predictor. -#' }} +#' }} #' \item{uncertainty_prediction: }{List of: #' \itemize{ -#' \item \emph{realisations} RasterStack of realisations of predictions. Number of realisations defined by argument \emph{N}. -#' \item \emph{predictions_ci} RasterStack of the upper and lower credible intervals. Defined by argument \emph{CI}. -#' }} +#' \item \emph{realisations} SpatRaster of realisations of predictions. Number of realisations defined by argument \emph{N}. +#' \item \emph{predictions_ci} SpatRaster of the upper and lower credible intervals. Defined by argument \emph{CI}. +#' }} #' #' #' @method predict disag_model #' -#' @examples +#' @examples #' \dontrun{ #' predict(fit_result) #' } -#' +#' #' @export predict.disag_model <- function(object, newdata = NULL, predict_iid = FALSE, N = 100, CI = 0.95, ...) { - + mean_prediction <- predict_model(object, newdata = newdata, predict_iid) - + uncertainty_prediction <- predict_uncertainty(object, newdata = newdata, predict_iid, N, CI) - + prediction <- list(mean_prediction = mean_prediction, uncertainty_prediction = uncertainty_prediction) - + class(prediction) <- c('disag_prediction', 'list') - + return(prediction) } #' Function to predict mean from the model result -#' -#' \emph{predict_model} function takes a \emph{disag_model} object created by -#' \emph{disaggregation::disag_model} and predicts mean maps. -#' +#' +#' \emph{predict_model} function takes a \emph{disag_model} object created by +#' \emph{disaggregation::disag_model} and predicts mean maps. +#' #' Function returns rasters of the mean predictions as well as the covariate and field contributions #' to the linear predictor. -#' -#' To predict over a different spatial extent to that used in the model, -#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +#' +#' To predict over a different spatial extent to that used in the model, +#' a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. #' If this is not given predictions are made over the data used in the fit. -#' -#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. -#' +#' +#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. +#' #' @param model_output disag_model object returned by disag_model function -#' @param newdata If NULL, predictions are made using the data in model_output. +#' @param newdata If NULL, predictions are made using the data in model_output. #' If this is a raster stack or brick, predictions will be made over this data. Default NULL. #' @param predict_iid If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE. #' @@ -85,78 +85,84 @@ predict.disag_model <- function(object, newdata = NULL, predict_iid = FALSE, N = #' \item \emph{iid} Raster of the iid component of the linear predictor. #' \item \emph{covariates} Raster of the covariate component of the linear predictor. #' } -#' +#' #' @name predict_model #' -#' @examples +#' @examples #' \dontrun{ #' predict_model(result) #' } -#' +#' #' @export predict_model <- function(model_output, newdata = NULL, predict_iid = FALSE) { - + objects_for_prediction <- setup_objects(model_output, newdata = newdata, predict_iid) - + pars <- model_output$obj$env$last.par.best pars <- split(pars, names(pars)) - - prediction <- predict_single_raster(pars, + + prediction <- predict_single_raster(pars, objects_for_prediction, - link_function = model_output$model_setup$link) - + link_function = model_output$model_setup$link) + return(prediction) - + } #' Function to predict uncertainty from the model result -#' -#' \emph{predict_uncertainty} function takes a \emph{disag_model} object created by -#' \emph{disaggregation::disag_model} and predicts upper and lower credible interval maps. -#' -#' Function returns a RasterStack of the realisations as well as the upper and lower credible interval rasters. -#' -#' To predict over a different spatial extent to that used in the model, -#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +#' +#' \emph{predict_uncertainty} function takes a \emph{disag_model} object created by +#' \emph{disaggregation::disag_model} and predicts upper and lower credible interval maps. +#' +#' Function returns a SpatRaster of the realisations as well as the upper and lower credible interval rasters. +#' +#' To predict over a different spatial extent to that used in the model, +#' a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. #' If this is not given predictions are made over the data used in the fit. -#' -#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. -#' +#' +#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. +#' #' The number of the realisations and the size of the confidence interval to be calculated. -#' are given by the arguments \emph{N} and \emph{CI} respectively. -#' +#' are given by the arguments \emph{N} and \emph{CI} respectively. +#' #' @param model_output disag_model object returned by disag_model function. -#' @param newdata If NULL, predictions are made using the data in model_output. +#' @param newdata If NULL, predictions are made using the data in model_output. #' If this is a raster stack or brick, predictions will be made over this data. Default NULL. #' @param predict_iid If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE. #' @param N number of realisations. Default: 100. #' @param CI confidence interval. Default: 0.95. -#' +#' #' @return The uncertainty prediction, which is a list of: #' \itemize{ -#' \item \emph{realisations} RasterStack of realisations of predictions. Number of realisations defined by argument \emph{N}. -#' \item \emph{predictions_ci} RasterStack of the upper and lower credible intervals. Defined by argument \emph{CI}. +#' \item \emph{realisations} SpatRaster of realisations of predictions. Number of realisations defined by argument \emph{N}. +#' \item \emph{predictions_ci} SpatRaster of the upper and lower credible intervals. Defined by argument \emph{CI}. #' } -#' +#' #' @name predict_uncertainty #' -#' @examples +#' @examples #' \dontrun{ #' predict_uncertainty(result) #' } -#' +#' #' @export predict_uncertainty <- function(model_output, newdata = NULL, predict_iid = FALSE, N = 100, CI = 0.95) { - + objects_for_prediction <- setup_objects(model_output, newdata = newdata, predict_iid) - + parameters <- model_output$obj$env$last.par.best - + # If we have either of the random effects, we have the jointPrecision matrix. # but if we have neither, we don't get that matrix and should use the # covariance matrix instead + + #CH <- Matrix::Cholesky(as(S, 'dsCMatrix')) + #x <- rmvn.sparse(10, mu, CH, prec=FALSE) ## 10 random draws of x + #d <- dmvn.sparse(x, mu, CH, prec=FALSE) ## densities of the 10 draws + + if(model_output$model_setup$iid | model_output$model_setup$field){ ch <- Matrix::Cholesky(model_output$sd_out$jointPrecision) par_draws <- sparseMVN::rmvn.sparse(N, parameters, ch, prec = TRUE) @@ -165,78 +171,59 @@ predict_uncertainty <- function(model_output, newdata = NULL, predict_iid = FALS ch <- Matrix::Cholesky(covariance_matrix) par_draws <- sparseMVN::rmvn.sparse(N, parameters, ch, prec = FALSE) } - + predictions <- list() - + for(r in seq_len(N)) { - + p <- split(par_draws[r, ], names(parameters)) - - prediction_result <- predict_single_raster(p, + + prediction_result <- predict_single_raster(p, objects_for_prediction, - link_function = model_output$model_setup$link) - + link_function = model_output$model_setup$link) + predictions[[r]] <- prediction_result$prediction } - predictions <- raster::stack(predictions) - + predictions <- terra::rast(predictions) + probs <- c((1 - CI) / 2, 1 - (1 - CI) / 2) - predictions_ci <- raster::calc(predictions, function(x) stats::quantile(x, probs = probs, na.rm = TRUE)) + predictions_ci <- terra::app(predictions, function(x) stats::quantile(x, probs = probs, na.rm = TRUE)) + + names(predictions_ci) <- c('lower CI', 'upper CI') - + uncertainty <- list(realisations = predictions, predictions_ci = predictions_ci) - + return(uncertainty) } # Get coordinates from raster # -# @param data disag_data object -# +# @param data disag_data object +# # @return A matrix of the coordinates of the raster -# +# # @name getCoords getCoords <- function(data) { - + points_raster <- data$covariate_rasters[[1]] - points_raster[is.na(points_raster)] <- -9999 - raster_pts <- raster::rasterToPoints(points_raster, spatial = TRUE) - coords <- raster_pts@coords - - return(coords) -} + points_raster[is.na(terra::values(points_raster, mat = FALSE))] <- -9999 + raster_pts <- terra::as.points(points_raster) + coords <- terra::crds(raster_pts) -# Get Amatrix for field -# -# @param mesh mesh used in the model fitting -# @param coords coordinates extracted from raster -# -# @return An Amatrix object for the field -# -# @name getAmatrix - -getAmatrix <- function(mesh, coords) { - - spde <- (INLA::inla.spde2.matern(mesh, alpha = 2)$param.inla)[c("M0", "M1", "M2")] - n_s <- nrow(spde$M0) - - Amatrix <- INLA::inla.mesh.project(mesh, loc = as.matrix(coords))$A - - return(Amatrix) + return(coords) } - - # Helper to check and sort out new raster data. check_newdata <- function(newdata, model_output){ if(is.null(newdata)) return(NULL) if(!is.null(newdata)){ - if(!(inherits(newdata, c('RasterStack', 'RasterBrick', 'RasterLayer')))){ - stop('newdata should be NULL or a RasterStack or a RasterBrick') - } + if(!(inherits(newdata, c('SpatRaster')))){ + stop('newdata should be NULL or a SpatRaster') + } if(!all(names(model_output$data$covariate_rasters) %in% names(newdata))){ stop('All covariates used to fit the model must be in newdata') } @@ -248,50 +235,54 @@ check_newdata <- function(newdata, model_output){ # Function to setup covariates, field and iid objects for prediction setup_objects <- function(model_output, newdata = NULL, predict_iid = FALSE) { - + newdata <- check_newdata(newdata, model_output) - + # Pull out original data data <- model_output$data - + # Decide which covariates to predict over if(is.null(newdata)){ covariates <- data$covariate_rasters } else { covariates <- newdata } - + data$covariate_rasters <- covariates - + # If there is no iid effect in the model, it cannot be predicted if(!model_output$model_setup$iid) { predict_iid <- FALSE } - + if(model_output$model_setup$field) { if(is.null(newdata)) { coords <- data$coordsForPrediction } else { coords <- getCoords(data) } - Amatrix <- getAmatrix(data$mesh, coords) + Amatrix <- fmesher::fm_evaluator(data$mesh, loc = as.matrix(coords))$proj$A field_objects <- list(coords = coords, Amatrix = Amatrix) } else { field_objects <- NULL } - + if(predict_iid) { tmp_shp <- model_output$data$polygon_shapefile - tmp_shp@data <- data.frame(area_id = factor(model_output$data$polygon_data$area_id)) - shapefile_raster <- raster::rasterize(tmp_shp, - model_output$data$covariate_rasters, + #needed to avoid errors in testing + if (!("area_id" %in% names(model_output$data$polygon_shapefile))){ + tmp_shp <- dplyr::bind_cols(tmp_shp, + area_id = + factor(model_output$data$polygon_data$area_id)) + } + shapefile_raster <- terra::rasterize(tmp_shp, + model_output$data$covariate_rasters, field = 'area_id') - shapefile_ids <- raster::unique(shapefile_raster) + shapefile_ids <- terra::unique(shapefile_raster) iid_objects <- list(shapefile_raster = shapefile_raster, shapefile_ids = shapefile_ids) } else { iid_objects <- NULL } - return(list(covariates = covariates, field_objects = field_objects, iid_objects = iid_objects)) @@ -299,69 +290,79 @@ setup_objects <- function(model_output, newdata = NULL, predict_iid = FALSE) { # Function to take model parameters and predict a single raster predict_single_raster <- function(model_parameters, objects, link_function) { - + # Create linear predictor covs_by_betas <- list() - for(i in seq_len(raster::nlayers(objects$covariates))){ + for(i in seq_len(terra::nlyr(objects$covariates))){ covs_by_betas[[i]] <- model_parameters$slope[i] * objects$covariates[[i]] } - - cov_by_betas <- raster::stack(covs_by_betas) - if(raster::nlayers(cov_by_betas) > 1){ + + cov_by_betas <- terra::rast(covs_by_betas) + if(terra::nlyr(cov_by_betas) > 1){ sum_cov_by_betas <- sum(cov_by_betas) - } else { + } else { # With only 1 covariate, there's nothing to sum. Do this to avoid warnings. sum_cov_by_betas <- cov_by_betas } cov_contribution <- sum_cov_by_betas + model_parameters$intercept - - linear_pred <- cov_contribution - + + linear_pred <- cov_contribution + if(!is.null(objects$field_objects)){ # Extract field values field <- (objects$field_objects$Amatrix %*% model_parameters$nodemean)[, 1] - field_ras <- raster::rasterFromXYZ(cbind(objects$field_objects$coords, field)) + field_ras <- terra::rast(cbind(objects$field_objects$coords, field), + type = 'xyz', + crs = terra::crs(linear_pred)) linear_pred <- linear_pred + field_ras } else { field_ras <- NULL } - - if(!is.null(objects$iid_objects)) { + + if(!is.null(objects$iid_objects)) { iid_ras <- objects$iid_objects$shapefile_raster iideffect_sd <- 1/sqrt(exp(model_parameters$iideffect_log_tau)) + # todo for(i in seq_along(model_parameters$iideffect)) { - iid_ras@data@values[which(objects$iid_objects$shapefile_raster@data@values == objects$iid_objects$shapefile_ids[i])] <- + targetvals <- terra::values(objects$iid_objects$shapefile_raster, + dataframe = FALSE, mat = FALSE) + whichvals <- which(targetvals == objects$iid_objects$shapefile_ids[1, i]) + terra::values(iid_ras)[whichvals] <- model_parameters$iideffect[i] - na_pixels <- which(is.na(iid_ras@data@values)) + na_pixels <- which(is.na(terra::values(iid_ras, dataframe = FALSE, mat = FALSE))) na_iid_values <- stats::rnorm(length(na_pixels), 0, iideffect_sd) - iid_ras@data@values[na_pixels] <- na_iid_values + terra::values(iid_ras)[na_pixels] <- na_iid_values } - if(raster::extent(iid_ras) != raster::extent(linear_pred)) { + if(terra::ext(iid_ras) != terra::ext(linear_pred)) { # Extent of prediction space is different to the original model. Keep any overlapping iid values but predict to the new extent raster_new_extent <- linear_pred - raster_new_extent@data@values <- NA - iid_ras <- raster::merge(iid_ras, raster_new_extent, ext = raster::extent(raster_new_extent)) - missing_pixels <- which(is.na(iid_ras@data@values)) + terra::values(raster_new_extent) <- NA + #iid_ras <- terra::merge(iid_ras, raster_new_extent, ext = terra::ext(raster_new_extent)) + # NOt sure why we no longer need the ext argument + # SS - added a crop which I think does the same thing + iid_ras <- terra::merge(iid_ras, raster_new_extent) + iid_ras <- terra::crop(iid_ras, raster_new_extent) + missing_pixels <- which(is.na(terra::values(iid_ras, dataframe = FALSE, mat = FALSE))) missing_iid_values <- stats::rnorm(length(missing_pixels), 0, iideffect_sd) - iid_ras@data@values[missing_pixels] <- missing_iid_values + terra::values(iid_ras)[missing_pixels] <- missing_iid_values } linear_pred <- linear_pred + iid_ras } else { iid_ras <- NULL } - - if(link_function == 'logit') { + + if(link_function == 'logit') { prediction_ras <- 1 / (1 + exp(-1 * linear_pred)) } else if(link_function == 'log') { prediction_ras <- exp(linear_pred) } else if(link_function == 'identity') { prediction_ras <- linear_pred } - - predictions <- list(prediction = prediction_ras, + + predictions <- list(prediction = prediction_ras, field = field_ras, iid = iid_ras, covariates = cov_contribution) - + return(predictions) } diff --git a/R/prepare_data.R b/R/prepare_data.R index 4482048..f95cd31 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -1,47 +1,47 @@ #' Prepare data for disaggregation modelling -#' -#' \emph{prepare_data} function is used to extract all the data required for fitting a disaggregation model. +#' +#' \emph{prepare_data} function is used to extract all the data required for fitting a disaggregation model. #' Designed to be used in the \emph{disaggregation::fit_model} function. -#' -#' Takes a SpatialPolygonDataFrame with the response data and a RasterStack of covariates. -#' -#' Extract the values of the covariates (as well as the aggregation raster, if given) at each pixel within the polygons -#' (\emph{parallelExtract} function). This is done in parallel and \emph{n.cores} argument is used to set the number of cores +#' +#' Takes a sf object with the response data and a SpatRaster of covariates. +#' +#' Extract the values of the covariates (as well as the aggregation raster, if given) at each pixel within the polygons +#' (\emph{parallelExtract} function). This is done in parallel and \emph{n.cores} argument is used to set the number of cores #' to use for covariate extraction. This can be the number of covariates used in the model. -#' -#' The aggregation raster defines how the pixels within each polygon are aggregated. +#' +#' The aggregation raster defines how the pixels within each polygon are aggregated. #' The disaggregation model performs a weighted sum of the pixel prediction, weighted by the pixel values in the aggregation raster. -#' For disease incidence rate you use the population raster to aggregate pixel incidence rate by summing the number of cases -#' (rate weighted by population). If no aggregation raster is provided a uniform distribution is assumed, i.e. the pixel predictions +#' For disease incidence rate you use the population raster to aggregate pixel incidence rate by summing the number of cases +#' (rate weighted by population). If no aggregation raster is provided a uniform distribution is assumed, i.e. the pixel predictions #' are aggregated to polygon level by summing the pixel values. -#' -#' Makes a matrix that contains the start and end pixel index for each polygon. Builds an INLA mesh to use for the spatial field +#' +#' Makes a matrix that contains the start and end pixel index for each polygon. Builds an INLA mesh to use for the spatial field #' (\emph{getStartendindex} function). -#' -#' The \emph{mesh.args} argument allows you to supply a list of INLA mesh parameters to control the mesh used for the spatial field +#' +#' The \emph{mesh.args} argument allows you to supply a list of INLA mesh parameters to control the mesh used for the spatial field #' (\emph{build_mesh} function). -#' -#' The \emph{na.action} flag is automatically off. If there are any NAs in the response or covariate data within the polygons the -#' \emph{prepare_data} method will error. Ideally the NAs in the data would be dealt with beforehand, however, setting na.action = TRUE -#' will automatically deal with NAs. It removes any polygons that have NAs as a response, sets any aggregation pixels with NA to zero +#' +#' The \emph{na.action} flag is automatically off. If there are any NAs in the response or covariate data within the polygons the +#' \emph{prepare_data} method will error. Ideally the NAs in the data would be dealt with beforehand, however, setting na.action = TRUE +#' will automatically deal with NAs. It removes any polygons that have NAs as a response, sets any aggregation pixels with NA to zero #' and sets covariate NAs pixels to the median value for the that covariate. -#' -#' @param polygon_shapefile SpatialPolygonDataFrame containing at least two columns: one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}). -#' @param covariate_rasters RasterStack of covariate rasters to be used in the model. -#' @param aggregation_raster Raster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used. -#' @param id_var Name of column in SpatialPolygonDataFrame object with the polygon id. -#' @param response_var Name of column in SpatialPolygonDataFrame object with the response data. -#' @param sample_size_var For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. +#' +#' @param polygon_shapefile sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}). +#' @param covariate_rasters SpatRaster of covariate rasters to be used in the model. +#' @param aggregation_raster SpatRaster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used. +#' @param id_var Name of column in sf object with the polygon id. +#' @param response_var Name of column in sf object with the response data. +#' @param sample_size_var For survey data, name of column in sf object (if it exists) with the sample size data. #' @param mesh.args list of parameters that control the mesh structure with the same names as used by INLA. #' @param na.action logical. If TRUE, NAs in response will be removed, covariate NAs will be given the median value, aggregation NAs will be set to zero. Default FALSE (NAs in response or covariate data within the polygons will give errors). #' @param makeMesh logical. If TRUE, build INLA mesh, takes some time. Default TRUE. -#' @param ncores Number of cores used to perform covariate extraction. +#' @param ncores Deprecated. #' -#' @return A list is returned of class \code{disag_data}. -#' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. +#' @return A list is returned of class \code{disag_data}. +#' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. #' The list of class \code{disag_data} contains: -#' \item{polygon_shapefile }{The SpatialPolygonDataFrame used as an input.} -#' \item{covariate_rasters }{The RasterStack used as an input.} +#' \item{x }{The sf object used as an input.} +#' \item{covariate_rasters }{The SpatRaster used as an input.} #' \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} #' \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} #' \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} @@ -49,59 +49,65 @@ #' \item{coordsForPrediction }{A matrix with two columns of x, y coordinates of pixels in the whole Raster. Used to make predictions.} #' \item{startendindex }{A matrix with two columns containing the start and end index of the pixels within each polygon.} #' \item{mesh }{A INLA mesh to be used for the spatial field of the disaggregation model.} -#' +#' @import splancs +#' @import utils #' @name prepare_data #' -#' @examples +#' @examples #' \donttest{ -#' polygons <- list() -#' for(i in 1:100) { +#' polygons <- list() +#' for(i in 1:100) { #' row <- ceiling(i/10) #' col <- ifelse(i %% 10 != 0, i %% 10, 10) #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } -#' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' r <- raster::raster(ncol=20, nrow=20) -#' r <- raster::setExtent(r, raster::extent(spdf)) -#' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) -#' r2 <- raster::raster(ncol=20, nrow=20) -#' r2 <- raster::setExtent(r2, raster::extent(spdf)) -#' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) -#' cov_rasters <- raster::stack(r, r2) -#' -#' test_data <- prepare_data(polygon_shapefile = spdf, -#' covariate_rasters = cov_rasters) -#' } -#' +#' polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), +#' c(ymax, ymax, ymin, ymin, ymax))) +#' } +#' +#' polys <- lapply(polygons,sf::st_polygon) +#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) +#' spdf <- sf::st_sf(response_df,geometry=polys) +#' +#' r <- terra::rast(nrow=20,ncol=20) +#' terra::ext(r) <- terra::ext(spdf) +#' r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) +#' +#' r2 <- terra::rast(nrow=20,ncol=20) +#' terra::ext(r2) <- terra::ext(spdf) +#' r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) +#' cov_rasters <- c(r, r2) +#' +#' test_data <- prepare_data(polygon_shapefile = spdf, +#' covariate_rasters = cov_rasters) +#' } +#' #' @export -#' -#' +#' +#' -prepare_data <- function(polygon_shapefile, +prepare_data <- function(polygon_shapefile, covariate_rasters, aggregation_raster = NULL, - id_var = 'area_id', - response_var = 'response', + id_var = 'area_id', + response_var = 'response', sample_size_var = NULL, - mesh.args = NULL, + mesh.args = NULL, na.action = FALSE, makeMesh = TRUE, - ncores = 2) { + ncores = NULL) { + + if (!missing("ncores")) + warning("The ncores argument has been deprecated") - stopifnot(inherits(polygon_shapefile, 'SpatialPolygonsDataFrame')) - stopifnot(inherits(covariate_rasters, 'Raster')) - if(!is.null(aggregation_raster)) stopifnot(inherits(aggregation_raster, 'Raster')) + stopifnot(inherits(polygon_shapefile, 'sf')) + stopifnot(inherits(covariate_rasters, 'SpatRaster')) + if(!is.null(aggregation_raster)) stopifnot(inherits(aggregation_raster, 'SpatRaster')) stopifnot(inherits(id_var, 'character')) stopifnot(inherits(response_var, 'character')) if(!is.null(mesh.args)) stopifnot(inherits(mesh.args, 'list')) - + # Check for NAs in response data - na_rows <- is.na(polygon_shapefile@data[, response_var]) + na_rows <- is.na(polygon_shapefile[, response_var, drop = TRUE]) if(sum(na_rows) != 0) { if(na.action) { polygon_shapefile <- polygon_shapefile[!na_rows, ] @@ -109,9 +115,9 @@ prepare_data <- function(polygon_shapefile, stop('There are NAs in the response data. Please deal with these, or set na.action = TRUE') } } - + polygon_data <- getPolygonData(polygon_shapefile, id_var, response_var, sample_size_var) - + # Save raster layer names so we can reassign it to make sure names don't change. cov_names <- names(covariate_rasters) @@ -119,24 +125,29 @@ prepare_data <- function(polygon_shapefile, # If no aggregation raster is given, use a 'unity' raster if(is.null(aggregation_raster)) { aggregation_raster <- covariate_rasters[[1]] - aggregation_raster <- raster::setValues(aggregation_raster, rep(1, raster::ncell(aggregation_raster))) + terra::values(aggregation_raster) <- rep(1, terra::ncell(aggregation_raster)) } names(aggregation_raster) <- 'aggregation_raster' - - covariate_rasters <- raster::addLayer(covariate_rasters, aggregation_raster) - cl <- parallel::makeCluster(ncores) - doParallel::registerDoParallel(cl) - covariate_data <- parallelExtract(covariate_rasters, polygon_shapefile, fun = NULL, id = id_var) - parallel::stopCluster(cl) - foreach::registerDoSEQ() - - covariate_rasters <- raster::dropLayer(covariate_rasters, raster::nlayers(covariate_rasters)) + + covariate_rasters <- c(covariate_rasters, aggregation_raster) + covariate_data <- terra::extract(covariate_rasters, terra::vect(polygon_shapefile), cells=TRUE, na.rm=TRUE, ID=TRUE) + #merge to transfer area_id and then tidy up + polygon_data$area_n <- 1:nrow(polygon_data) + covariate_data <- merge(covariate_data, polygon_data, by.x = "ID", by.y = "area_n") + covariate_data <- covariate_data[ , !(names(covariate_data) %in% c("ID", "response", "N"))] + colnames(covariate_data )[colnames(covariate_data ) == "area_id"] <- id_var + polygon_data <- polygon_data[ , !(names(polygon_data) %in% c("area_n"))] + + # Remove the aggregation raster + cov_filter <- !(names(covariate_data) %in% c('aggregation_raster')) + covariate_rasters <- covariate_rasters[[cov_filter]] names(covariate_rasters) <- cov_names - - aggregation_pixels <- as.numeric(covariate_data[ , ncol(covariate_data)]) - covariate_data <- covariate_data[, -ncol(covariate_data)] - + + agg_filter <- names(covariate_data) %in% c('aggregation_raster') + aggregation_pixels <- as.numeric(covariate_data[ , agg_filter]) + covariate_data <- covariate_data[, !agg_filter] + # Check for NAs in population data if(sum(is.na(aggregation_pixels)) != 0) { if(na.action) { @@ -145,34 +156,30 @@ prepare_data <- function(polygon_shapefile, stop('There are NAs in the aggregation rasters within polygons. Please deal with these, or set na.action = TRUE') } } - + # Check for NAs in covariate data if(sum(is.na(covariate_data)) != 0) { if(na.action) { - covariate_data[-c(1:2)] <- sapply(covariate_data[-c(1:2)], function(x) { x[is.na(x)] <- stats::median(x, na.rm = T); return(x) }) + cov_filter <- !(names(covariate_data) %in% c(id_var,'cell')) + covariate_data[ , cov_filter] <- sapply(covariate_data[ , cov_filter], function(x) { x[is.na(x)] <- stats::median(x, na.rm = T); return(x) }) } else { stop('There are NAs in the covariate rasters within polygons. Please deal with these, or set na.action = TRUE') } } - - coordsForFit <- extractCoordsForMesh(covariate_rasters, selectIds = covariate_data$cellid) - + + coordsForFit <- extractCoordsForMesh(covariate_rasters, selectIds = covariate_data$cell) + coordsForPrediction <- extractCoordsForMesh(covariate_rasters) - + startendindex <- getStartendindex(covariate_data, polygon_data, id_var = id_var) - + if(makeMesh) { - if(!requireNamespace('INLA', quietly = TRUE)) { - mesh <- NULL - message("Cannot build mesh as INLA is not installed. If you need a spatial field in your model, you must install INLA.") - } else { mesh <- build_mesh(polygon_shapefile, mesh.args) - } - } else { + } else { mesh <- NULL - message("A mesh is not being built. You will not be able to run a model without a mesh.") + message("A mesh is not being built. You will not be able to run a spatial model without a mesh.") } - + disag_data <- list(polygon_shapefile = polygon_shapefile, shapefile_names = list(id_var = id_var, response_var = response_var), covariate_rasters = covariate_rasters, @@ -183,31 +190,31 @@ prepare_data <- function(polygon_shapefile, coordsForPrediction = coordsForPrediction, startendindex = startendindex, mesh = mesh) - + class(disag_data) <- c('disag_data', 'list') - + return(disag_data) - + } #' Function to fit the disaggregation model #' -#' @param polygon_shapefile SpatialPolygonDataFrame containing the response data -#' @param shapefile_names List of 2: polygon id variable name and response variable name from polygon_shapefile -#' @param covariate_rasters RasterStack of covariates +#' @param polygon_shapefile sf object containing the response data +#' @param shapefile_names List of 2: polygon id variable name and response variable name from x +#' @param covariate_rasters SpatRaster of covariates #' @param polygon_data data.frame with two columns: polygon id and response #' @param covariate_data data.frame with cell id, polygon id and covariate columns #' @param aggregation_pixels vector with value of aggregation raster at each pixel -#' @param coordsForFit coordinates of the covariate data points within the polygons in polygon_shapefile +#' @param coordsForFit coordinates of the covariate data points within the polygons in x #' @param coordsForPrediction coordinates of the covariate data points in the whole raster extent #' @param startendindex matrix containing the start and end index for each polygon #' @param mesh inla.mesh object to use in the fit -#' -#' @return A list is returned of class \code{disag_data}. -#' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. +#' +#' @return A list is returned of class \code{disag_data}. +#' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. #' The list of class \code{disag_data} contains: -#' \item{polygon_shapefile }{The SpatialPolygonDataFrame used as an input.} -#' \item{covariate_rasters }{The RasterStack used as an input.} +#' \item{x }{The sf object used as an input.} +#' \item{covariate_rasters }{The SpatRaster used as an input.} #' \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} #' \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} #' \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} @@ -217,24 +224,24 @@ prepare_data <- function(polygon_shapefile, #' \item{mesh }{A INLA mesh to be used for the spatial field of the disaggregation model.} #' #' @name as.disag_data -#' +#' #' @export -as.disag_data <- function(polygon_shapefile, +as.disag_data <- function(polygon_shapefile, shapefile_names, - covariate_rasters, - polygon_data, - covariate_data, + covariate_rasters, + polygon_data, + covariate_data, aggregation_pixels, - coordsForFit, + coordsForFit, coordsForPrediction, - startendindex, + startendindex, mesh = NULL) { - - stopifnot(inherits(polygon_shapefile, 'SpatialPolygonsDataFrame')) + + stopifnot(inherits(polygon_shapefile, 'sf')) stopifnot(inherits(shapefile_names, 'list')) - stopifnot(inherits(covariate_rasters, c('RasterBrick', 'RasterStack'))) + stopifnot(inherits(covariate_rasters, 'SpatRaster')) stopifnot(inherits(polygon_data, 'data.frame')) stopifnot(inherits(covariate_data, 'data.frame')) stopifnot(inherits(aggregation_pixels, 'numeric')) @@ -244,7 +251,7 @@ as.disag_data <- function(polygon_shapefile, if(!is.null(mesh)) { stopifnot(inherits(mesh, 'inla.mesh')) } - + disag_data <- list(polygon_shapefile = polygon_shapefile, shapefile_names = shapefile_names, covariate_rasters = covariate_rasters, @@ -255,8 +262,8 @@ as.disag_data <- function(polygon_shapefile, coordsForPrediction = coordsForPrediction, startendindex = startendindex, mesh = mesh) - + class(disag_data) <- c('disag_data', 'list') - + return(disag_data) } diff --git a/R/summary.R b/R/summary.R index 2a744e1..2c34c07 100644 --- a/R/summary.R +++ b/R/summary.R @@ -1,28 +1,28 @@ #' Summary function for disaggregation fit result -#' +#' #' Function that summarises the result of the fit from the disaggregation model. -#' +#' #' Prints the negative log likelihood, model parameters and calculates metrics from in-sample performance. #' #' @param object Object returned from disag_model. #' @param ... Further arguments to \emph{summary} function. -#' +#' #' @return A list of the model parameters, negative log likelihood and metrics from in-sample performance. -#' +#' #' @method summary disag_model -#' +#' #' @export #' @importFrom stats cor quantile sd summary.disag_model <- function(object, ...) { - + pred <- obs <- NULL - + model_params <- summary(object$sd_out, select = 'fixed') - + report <- object$obj$report() nll <- report$nll - + # Form of the observed and predicted results depends on the likelihood function used if(object$model_setup$family == 'gaussian') { observed_data = report$polygon_response_data/report$reportnormalisation @@ -34,165 +34,165 @@ summary.disag_model <- function(object, ...) { observed_data = report$polygon_response_data predicted_data = report$reportprediction_cases } - + in_sample <- data.frame(obs = observed_data, pred = predicted_data) in_sample_reduced <- in_sample[!is.na(in_sample$pred), ] - metrics <- dplyr::summarise(in_sample_reduced, + metrics <- dplyr::summarise(in_sample_reduced, RMSE = sqrt(mean((pred - obs) ^ 2)), MAE = mean(abs(pred - obs)), pearson = cor(pred, obs, method = 'pearson'), spearman = cor(pred, obs, method = 'spearman'), log_pearson = cor(log1p(pred), log1p(obs), method = 'pearson')) - + cat(paste('Likelihood function:', object$model_setup$family, '\n')) cat(paste('Link function:', object$model_setup$link, '\n')) - + cat('Model parameters:\n') print(model_params) - + cat(paste0('\nModel convergence: ', object$opt$convergence, ' (', object$opt$message, ')')) - + cat(paste('\nNegative log likelihood: ', nll, '\n')) - + cat('\nIn sample performance:\n') print(metrics) - + summary <- list(model_params = model_params, nll = nll, metrics = metrics) - + return(invisible(summary)) - + } #' Print function for disaggregation fit result. -#' +#' #' Function that prints the result of the fit from the disaggregation model. -#' +#' #' Prints the negative log likelihood, model parameters and calculates metrics from in-sample performance. #' #' @param x Object returned from disag_model. #' @param ... Further arguments to \emph{print} function. -#' +#' #' @return NULL -#' +#' #' @method print disag_model -#' +#' #' @export #' @importFrom stats cor quantile sd print.disag_model <- function(x, ...){ - + model_params <- summary(x$sd_out, select = 'fixed') - + cat('Bayesian disaggregation model result\n') cat('\n') cat(paste('Likelihood function:', x$model_setup$family, '\n')) cat(paste('Link function:', x$model_setup$link, '\n')) - + cat('\nParameter values:\n') print(model_params[ , 1]) - + return(invisible(x)) } #' Summary function for disaggregation input data -#' +#' #' Function that summarizes the input data from the disaggregation model. -#' +#' #' Prints the number of polyons and pixels, the number of pixels in the largest and smallest polygons and summaries of the covariates. #' #' @param object Object returned from prepare_data. #' @param ... Further arguments to \emph{summary} function. -#' +#' #' @return A list of the number of polyons, the number of covariates and summaries of the covariates. -#' +#' #' @method summary disag_data -#' +#' #' @export summary.disag_data <- function(object, ...) { n_polygons <- nrow(object$polygon_shapefile) - n_covariates <- raster::nlayers(object$covariate_rasters) - + n_covariates <- as.integer(terra::nlyr(object$covariate_rasters)) + cat(paste("They data contains", n_polygons, "polygons and", nrow(object$covariate_data), "pixels\n")) - - cat(paste("The largest polygon contains", max(table(object$covariate_data[ , object$shapefile_names$id_var])), "pixels", + + cat(paste("The largest polygon contains", max(table(object$covariate_data[ , object$shapefile_names$id_var])), "pixels", "and the smallest polygon contains", min(table(object$covariate_data[ , object$shapefile_names$id_var])), "pixels\n")) - + cat(paste("There are", n_covariates, "covariates\n")) - + covariate_summary <- summary(object$covariate_data[ , names(object$covariate_rasters)]) - + cat("\nCovariate summary:\n") print(covariate_summary) - + summary <- list(number_polygons = n_polygons, number_covariates = n_covariates, covariate_summary = covariate_summary) - + return(invisible(summary)) - + } #' Print function for disaggregation input data -#' +#' #' Function that prints the input data from the disaggregation model. -#' +#' #' Prints the number of polyons and pixels, the number of pixels in the largest and smallest polygons and summaries of the covariates. #' #' @param x Object returned from prepare_data. #' @param ... Further arguments to \emph{print} function. -#' +#' #' @return NULL -#' +#' #' @method print disag_data -#' +#' #' @export print.disag_data <- function(x, ...){ - + n_polygons <- nrow(x$polygon_shapefile) - n_covariates <- raster::nlayers(x$covariate_rasters) - + n_covariates <- terra::nlyr(x$covariate_rasters) + cat(paste("They data contains", n_polygons, "polygons and", nrow(x$covariate_data), "pixels\n")) - - cat(paste("The largest polygon contains", max(table(x$covariate_data[ , x$shapefile_names$id_var])), "pixels", + + cat(paste("The largest polygon contains", max(table(x$covariate_data[ , x$shapefile_names$id_var])), "pixels", "and the smallest polygon contains", min(table(x$covariate_data[ , x$shapefile_names$id_var])), "pixels\n")) - + cat(paste("There are", n_covariates, "covariates\n")) - + return(invisible(x)) } #' Summary function for disaggregation prediction -#' +#' #' Function that summarizes the prediction from the disaggregation model. -#' +#' #' Prints the number of polyons and pixels, the number of pixels in the largest and smallest polygons and summaries of the covariates. #' #' @param object Object returned from predict.disag_model #' @param ... Further arguments to \emph{summary} function. -#' +#' #' @return A list of the number of polyons, the number of covariates and summaries of the covariates. -#' +#' #' @method summary disag_prediction -#' +#' #' @export summary.disag_prediction <- function(object, ...) { - - number_realisations <- raster::nlayers(object$uncertainty_prediction$realisations) - max_mean <- max(object$mean_prediction$prediction@data@values) - min_mean <- min(object$mean_prediction$prediction@data@values) - max_iqr <- max((object$uncertainty_prediction$predictions_ci[[2]] - object$uncertainty_prediction$predictions_ci[[1]])@data@values) - min_iqr <- min((object$uncertainty_prediction$predictions_ci[[2]] - object$uncertainty_prediction$predictions_ci[[1]])@data@values) - + + number_realisations <- as.integer(terra::nlyr(object$uncertainty_prediction$realisations)) + max_mean <- max(terra::values(object$mean_prediction$prediction)) + min_mean <- min(terra::values(object$mean_prediction$prediction)) + max_iqr <- max((terra::values(object$uncertainty_prediction$predictions_ci[[2]]) - terra::values(object$uncertainty_prediction$predictions_ci[[1]]))) + min_iqr <- min((terra::values(object$uncertainty_prediction$predictions_ci[[2]]) - terra::values(object$uncertainty_prediction$predictions_ci[[1]]))) + cat('Predction from disaggregation model\n') cat('\n') cat('Components of the model: ') @@ -204,33 +204,33 @@ summary.disag_prediction <- function(object, ...) { cat('\n') cat(paste('The mean predicted values range from', signif(min_mean, 3), 'to', signif(max_mean, 3), '\n')) cat(paste('The predicted IQR takes values from', signif(min_iqr, 3), 'to', signif(max_iqr, 3), '\n')) - + summary <- list(number_realisations = number_realisations, range_mean_values = c(min_mean, max_mean), range_iqr_values = c(min_iqr, max_iqr)) - + return(invisible(summary)) - + } #' Print function for disaggregation prediction -#' +#' #' Function that prints the prediction from the disaggregation model. -#' +#' #' Prints the number of polyons and pixels, the number of pixels in the largest and smallest polygons and summaries of the covariates. #' #' @param x Object returned from predict.disag_model. #' @param ... Further arguments to \emph{print} function. -#' +#' #' @return NULL -#' +#' #' @method print disag_prediction -#' +#' #' @export print.disag_prediction <- function(x, ...){ - + cat('Predction from disaggregation model\n') cat('\n') cat('Components of the model: ') @@ -238,7 +238,7 @@ print.disag_prediction <- function(x, ...){ if(!is.null(x$mean_prediction$field)) cat('field ') if(!is.null(x$mean_prediction$iid)) cat('iid ') cat('\n\n') - cat(paste0('There are ', raster::nlayers(x$uncertainty_prediction$realisations), ' uncertainty realisations')) + cat(paste0('There are ', terra::nlyr(x$uncertainty_prediction$realisations), ' uncertainty realisations')) return(invisible(x)) -} \ No newline at end of file +} diff --git a/README.md b/README.md index a6f914e..c0dd5ac 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,13 @@ Disaggregation ============== -[![Build Status](https://travis-ci.org/aknandi/disaggregation.svg?branch=master)](https://travis-ci.org/aknandi/disaggregation?branch=master) -[![codecov.io](https://codecov.io/github/aknandi/disaggregation/coverage.svg?branch=master)](https://codecov.io/github/aknandi/disaggregation?branch=master) -A package containing useful functions for disaggregation modelling +[![CRANstatus](https://www.r-pkg.org/badges/version/disaggregation)](https://cran.r-project.org/package=disaggregation) +[![R-CMD-check](https://github.com/aknandi/disaggregation/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/aknandi/disaggregation/actions/workflows/R-CMD-check.yaml) + + +A package containing useful functions for disaggregation modelling. +An overview of the package is given in [our paper](https://www.jstatsoft.org/article/view/v106i11). Installation ------------ @@ -18,7 +21,7 @@ Overview ## Data preparation -Function prepare_data takes in SpatialPolygonDataFrame (response) and RasterStack (covariates) to produce a data structure required for the disaggregation modelling. Calls functions to extract covariate data, polygon data, aggregation (population data), match points to polygons and build an INLA mesh for the spatial field (build_mesh) +Function prepare_data takes in sf (response) and SpatRaster (covariates) to produce a data structure required for the disaggregation modelling. Calls functions to extract covariate data, polygon data, aggregation (population data), match points to polygons and build an INLA mesh for the spatial field (build_mesh) ```R data_for_model <- prepare_data(polygon_shapefile = shps, @@ -30,9 +33,9 @@ data_for_model <- prepare_data(polygon_shapefile = shps, ### Input data -* A RasterStack of covariate rasters to be used in the model (covariate_rasters) -* A SpatialPolygonsDataFrame (polygon_shapefile) containing at least two columns: one with the id for the polygons (id_var) and one with the response count data (response_var); for binomial data, i.e survey data, it can also contain a sample size column (sample_size_var). -* (Optional) Raster used to aggregate the pixel level predictions (aggregation_raster) to polygon level (usually population). If this is not supplied a uniform raster will be used +* A SpatRaster of covariate rasters to be used in the model (covariate_rasters) +* A sf (polygon_shapefile) containing at least two columns: one with the id for the polygons (id_var) and one with the response count data (response_var); for binomial data, i.e survey data, it can also contain a sample size column (sample_size_var). +* (Optional) SpatRaster used to aggregate the pixel level predictions (aggregation_raster) to polygon level (usually population). If this is not supplied a uniform raster will be used ### Controlling the mesh @@ -106,4 +109,3 @@ Summary functions for input data and model results summary(data_for_model) summary(model_result) ``` - diff --git a/cran-comments.md b/cran-comments.md index 2a601ad..1cdb171 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,56 +1,71 @@ -## Update -This is a package update (version 0.1.3). The changes in this version are: - -* Renamed fit_model function to disag_model. Deprecated fit_model, will be removed in the next version - - -* Renamed classes disag.data and fit.result to disag_data and disag_model - -* Created a disag_predictions class which is returned by the predict function and contains the - mean and uncertainty predictions. This has replaced the predictions and uncertainty classes. - Plot, summary and print methods have been implemented for the disag_predictions class - -* Extracted the function make_model_object to allow the user to make a TMB model object on its own, - so it can be used in different optimiser or a for MCMC - -* Neatened up plot.disag_data function to produce 3 plots on the same canvas, with an optional which - argument for the user to choose which plots to display - -* Made the summary and print function return different outputs. Print functions show minmial output, - summary function are more deatiled - -## Test environments -* local Windows 10, R 3.6.1 -* Ubuntu 16.04.6 LTS (on travis-ci, devel and release) -* win-builder (devel and release) - -## R CMD check results -There were no ERRORs or WARNINGs. - -There were 3 NOTEs: - -* checking CRAN incoming feasibility ... NOTE - Maintainer: 'Anita Nandi ' - - Suggests or Enhances not in mainstream repositories: - INLA - Availability using Additional_repositories specification: - INLA yes https://inla.r-inla-download.org/R/stable - - The package uses INLA, my understanding of this NOTE is that it is fine. - -* checking installed package size ... NOTE - installed size is 12.8Mb - sub-directories of 1Mb or more: - libs 12.5Mb - - Packages based on C++ can have large compiled libraries. This is as small as it can be, hope that is ok. I got a similar, but slightly different note when using R CMD check compared to devtools::check(). The gist was the same though. - -* checking compilation flags used ... NOTE - Compilation used the following non-portable flag(s): - '-Wa,-mbig-obj' - - To compile large C++ source files on Windows a compilation flag is needed - -## Downstream dependencies -There are currently no downstream dependencies for this package +## Update +This is a package update (version 0.2.0). The only real change in this version +is updating references to our Journal of Statistical Science paper that is in +press. + + + + +## Test environments +Windows, R release +Ubuntu 20, R release +Ubuntu 20, r Oldrel +Ubuntu 20, R devel + + +## R CMD check results +There were no ERRORs or WARNINGs. + +There were 3 NOTES. + + +* checking CRAN incoming feasibility ... [14s] NOTE +Maintainer: 'Tim Lucas ' + +Possibly misspelled words in DESCRIPTION: + Nandi (15:28) + +Suggests or Enhances not in mainstream repositories: + INLA +Availability using Additional_repositories specification: + INLA yes https://inla.r-inla-download.org/R/stable + +Found the following (possibly) invalid DOIs: + DOI: 10.18637/jss.v106.i11 + From: DESCRIPTION + inst/CITATION + Status: 404 + Message: Not Found + + +Examples with CPU (user + system) or elapsed time > 10s + user system elapsed +getPolygonData 9.89 0.17 10.08 + + + +Response: Anita Nandi's name is spelled correctly. The INLA availability +issue is the same as previous submissions. The doi is for our new Journal +of the Statistical Society paper and has been reserved but not registered yet. + + + +* checking package dependencies ... NOTE +Package suggested but not available for checking: 'INLA' + +Response: Same as above. + + +* checking examples ... [16s] NOTE +Examples with CPU (user + system) or elapsed time > 10s + user system elapsed +getPolygonData 9.89 0.17 10.08 + + + +Response: As this is only just over the 10 second limit we hope it is ok. We +have done our best to make the examples small throughout. + + +## Downstream dependencies +There are currently no downstream dependencies for this package diff --git a/inst/CITATION b/inst/CITATION index 0650da5..ee20286 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -1,16 +1,16 @@ -citHeader("To cite the disaggregation package in publications, please use:") - -citEntry(entry = "Article", - author = as.person(c( - "Anita K Nandi [aut, cre]", - "Tim C D Lucas [aut]", - "Rohan Arambepola [aut]", - "Peter Gething", - "Dan Weiss" - )), - title = "disaggregation: An R Package for Bayesian Spatial Disaggregation Modelling", - journal = "arxiv", - year = "2020", - url = "https://arxiv.org/abs/2001.04847", - textVersion = "Nandi, A. K., Lucas, T. C., Arambepola, R., Gething, P., & Weiss, D. J. (2020). disaggregation: An R Package for Bayesian Spatial Disaggregation Modelling. arXiv preprint arXiv:2001.04847." +bibentry(bibtype = "Article", + title = "{disaggregation}: An {R} Package for {B}ayesian Spatial Disaggregation Modeling", + author = c(person(given = c("Anita", "K."), family = "Nandi"), + person(given = c("Tim", "C.", "D."), family = "Lucas"), + person(given = "Rohan", family = "Arambepola"), + person(given = "Peter", family = "Gething"), + person(given = c("Daniel", "J."), family = "Weiss")), + journal = "Journal of Statistical Software", + year = "2023", + volume = "106", + number = "11", + pages = "1--19", + doi = "10.18637/jss.v106.i11", + header = "To cite disaggregation in publications use:" ) + diff --git a/man/as.disag_data.Rd b/man/as.disag_data.Rd index 01903e3..803d050 100644 --- a/man/as.disag_data.Rd +++ b/man/as.disag_data.Rd @@ -18,11 +18,11 @@ as.disag_data( ) } \arguments{ -\item{polygon_shapefile}{SpatialPolygonDataFrame containing the response data} +\item{polygon_shapefile}{sf object containing the response data} -\item{shapefile_names}{List of 2: polygon id variable name and response variable name from polygon_shapefile} +\item{shapefile_names}{List of 2: polygon id variable name and response variable name from x} -\item{covariate_rasters}{RasterStack of covariates} +\item{covariate_rasters}{SpatRaster of covariates} \item{polygon_data}{data.frame with two columns: polygon id and response} @@ -30,7 +30,7 @@ as.disag_data( \item{aggregation_pixels}{vector with value of aggregation raster at each pixel} -\item{coordsForFit}{coordinates of the covariate data points within the polygons in polygon_shapefile} +\item{coordsForFit}{coordinates of the covariate data points within the polygons in x} \item{coordsForPrediction}{coordinates of the covariate data points in the whole raster extent} @@ -39,11 +39,11 @@ as.disag_data( \item{mesh}{inla.mesh object to use in the fit} } \value{ -A list is returned of class \code{disag_data}. -The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. +A list is returned of class \code{disag_data}. +The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. The list of class \code{disag_data} contains: - \item{polygon_shapefile }{The SpatialPolygonDataFrame used as an input.} - \item{covariate_rasters }{The RasterStack used as an input.} + \item{x }{The sf object used as an input.} + \item{covariate_rasters }{The SpatRaster used as an input.} \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} diff --git a/man/build_mesh.Rd b/man/build_mesh.Rd index ef6f2b7..77561ca 100644 --- a/man/build_mesh.Rd +++ b/man/build_mesh.Rd @@ -7,7 +7,7 @@ build_mesh(shapes, mesh.args = NULL) } \arguments{ -\item{shapes}{shapefile covering the region under investigation.} +\item{shapes}{sf covering the region under investigation.} \item{mesh.args}{list of parameters that control the mesh structure. \emph{convex}, \emph{concave} and \emph{resolution}, to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cut} and \emph{offset}, to control the mesh itself, @@ -17,35 +17,37 @@ with the parameters having the same meaning as in the INLA functions \emph{inla. An inla.mesh object } \description{ -\emph{build_mesh} function takes a SpatialPolygons object and mesh arguments to build an appropriate mesh for the spatial field. +\emph{build_mesh} function takes a sf object and mesh arguments to build an appropriate mesh for the spatial field. } \details{ -The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary -and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest +The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary +and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest and having a small region outside with a coarser mesh to avoid edge effects. Six mesh parameters can be specified as arguments: \emph{convex}, \emph{concave} and \emph{resolution}, to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cut} and \emph{offset}, to control the mesh itself, -with the names meaing the same as used by INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. +with the names meaning the same as used by INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. Defaults are: pars <- list(convex = -0.01, concave = -0.5, resolution = 300, max.edge = c(3.0, 8), cut = 0.4, offset = c(1, 15)). } \examples{ \dontrun{ - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } +polygons <- list() +for(i in 1:14) { + row <- ceiling(i/10) + col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row +polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) +polys <- lapply(polygons, sf::st_polygon) +response_df <- data.frame(area_id = 1:100, + response = runif(100, min = 0, max = 10)) +spdf <- sf::st_sf(polys, response_df) - my_mesh <- build_mesh(spdf) +my_mesh <- build_mesh(spdf) } } diff --git a/man/fit_model.Rd b/man/fit_model.Rd index a762d3e..771fac7 100644 --- a/man/fit_model.Rd +++ b/man/fit_model.Rd @@ -1,143 +1,154 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fit_model.R -\name{fit_model} -\alias{fit_model} -\alias{disag_model} -\title{Fit the disaggregation model} -\usage{ -fit_model( - data, - priors = NULL, - family = "gaussian", - link = "identity", - iterations = 100, - field = TRUE, - iid = TRUE, - hess_control_parscale = NULL, - hess_control_ndeps = 1e-04, - silent = TRUE -) - -disag_model( - data, - priors = NULL, - family = "gaussian", - link = "identity", - iterations = 100, - field = TRUE, - iid = TRUE, - hess_control_parscale = NULL, - hess_control_ndeps = 1e-04, - silent = TRUE -) -} -\arguments{ -\item{data}{disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting} - -\item{priors}{list of prior values} - -\item{family}{likelihood function: \emph{gaussian}, \emph{binomial} or \emph{poisson}} - -\item{link}{link function: \emph{logit}, \emph{log} or \emph{identity}} - -\item{iterations}{number of iterations to run the optimisation for} - -\item{field}{logical. Flag the spatial field on or off} - -\item{iid}{logical. Flag the iid effect on or off} - -\item{hess_control_parscale}{Argument to scale parameters during the calculation of the Hessian. -Must be the same length as the number of parameters. See \code{\link[stats]{optimHess}} for details.} - -\item{hess_control_ndeps}{Argument to control step sizes during the calculation of the Hessian. -Either length 1 (same step size applied to all parameters) or the same length as the number of parameters. -Default is 1e-3, try setting a smaller value if you get NaNs in the standard error of the parameters. -See \code{\link[stats]{optimHess}} for details.} - -\item{silent}{logical. Suppress verbose output.} -} -\value{ -A list is returned of class \code{disag_model}. -The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_model}. -The list of class \code{disag_model} contains: - \item{obj }{The TMB model object returned by \code{\link[TMB]{MakeADFun}}.} - \item{opt }{The optimized model object returned by \code{\link[stats]{nlminb}}.} - \item{sd_out }{The TMB object returned by \code{\link[TMB]{sdreport}}.} - \item{data }{The \emph{disag_data} object used as an input to the model.} - \item{model_setup }{A list of information on the model setup. Likelihood function (\emph{family}), link function(\emph{link}), logical: whether a field was used (\emph{field}) and logical: whether an iid effect was used (\emph{iid}).} -} -\description{ -\emph{fit_model} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} and performs a Bayesian disaggregation fit. -} -\details{ -\strong{The model definition} - -The disaggregation model make predictions at the pixel level: -\deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} - -And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): -\deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} -\deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} - -The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the repsonse count data): -\itemize{ - \item Gaussian: - If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where - \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} - \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. - \item Binomial: - For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. - \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. - \item Poisson: - \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. -} - -Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field -are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}$ and $\sigma_{prob}} for the variation of the field -where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect - -The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. -The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. -The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. -These are specified as strings. - -The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. - -The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. - -The \emph{silent} argument can be used to publish/supress verbose output. Default TRUE. -} -\examples{ -\dontrun{ - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } - - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - - r <- raster::raster(ncol=20, nrow=20) - r <- raster::setExtent(r, raster::extent(spdf)) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2 <- raster::setExtent(r2, raster::extent(spdf)) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_rasters <- raster::stack(r, r2) - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_rasters) - parallel::stopCluster(cl) - foreach::registerDoSEQ() - - result <- fit_model(test_data, iterations = 2) - } - -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fit_model.R +\name{fit_model} +\alias{fit_model} +\alias{disag_model} +\title{Fit the disaggregation model} +\usage{ +fit_model( + data, + priors = NULL, + family = "gaussian", + link = "identity", + iterations = 100, + field = TRUE, + iid = TRUE, + hess_control_parscale = NULL, + hess_control_ndeps = 1e-04, + silent = TRUE +) + +disag_model( + data, + priors = NULL, + family = "gaussian", + link = "identity", + iterations = 100, + field = TRUE, + iid = TRUE, + hess_control_parscale = NULL, + hess_control_ndeps = 1e-04, + silent = TRUE +) +} +\arguments{ +\item{data}{disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting} + +\item{priors}{list of prior values} + +\item{family}{likelihood function: \emph{gaussian}, \emph{binomial} or \emph{poisson}} + +\item{link}{link function: \emph{logit}, \emph{log} or \emph{identity}} + +\item{iterations}{number of iterations to run the optimisation for} + +\item{field}{logical. Flag the spatial field on or off} + +\item{iid}{logical. Flag the iid effect on or off} + +\item{hess_control_parscale}{Argument to scale parameters during the calculation of the Hessian. +Must be the same length as the number of parameters. See \code{\link[stats]{optimHess}} for details.} + +\item{hess_control_ndeps}{Argument to control step sizes during the calculation of the Hessian. +Either length 1 (same step size applied to all parameters) or the same length as the number of parameters. +Default is 1e-3, try setting a smaller value if you get NaNs in the standard error of the parameters. +See \code{\link[stats]{optimHess}} for details.} + +\item{silent}{logical. Suppress verbose output.} +} +\value{ +A list is returned of class \code{disag_model}. +The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_model}. +The list of class \code{disag_model} contains: + \item{obj }{The TMB model object returned by \code{\link[TMB]{MakeADFun}}.} + \item{opt }{The optimized model object returned by \code{\link[stats]{nlminb}}.} + \item{sd_out }{The TMB object returned by \code{\link[TMB]{sdreport}}.} + \item{data }{The \emph{disag_data} object used as an input to the model.} + \item{model_setup }{A list of information on the model setup. Likelihood function (\emph{family}), link function(\emph{link}), logical: whether a field was used (\emph{field}) and logical: whether an iid effect was used (\emph{iid}).} +} +\description{ +\emph{fit_model} function takes a \emph{disag_data} object created by +\code{\link{prepare_data}} and performs a Bayesian disaggregation fit. +} +\details{ +\strong{The model definition} + +The disaggregation model makes predictions at the pixel level: +\deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} + +And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): +\deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} +\deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} + +The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): +\itemize{ + \item Gaussian: + If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where + \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} + \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. + \item Binomial: + For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. + \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. + \item Poisson: + \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. +} + +Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field +are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field +where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field +where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect + +The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. +The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. +The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. +These are specified as strings. + +The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. + +The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. + +The \emph{silent} argument can be used to publish/suppress verbose output. Default TRUE. +} +\examples{ +\dontrun{ +polygons <- list() +n_polygon_per_side <- 10 +n_polygons <- n_polygon_per_side * n_polygon_per_side +n_pixels_per_side <- n_polygon_per_side * 2 + +for(i in 1:n_polygons) { + row <- ceiling(i/n_polygon_per_side) + col <- ifelse(i \%\% n_polygon_per_side != 0, i \%\% n_polygon_per_side, n_polygon_per_side) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} + +polys <- lapply(polygons,sf::st_polygon) +N <- floor(runif(n_polygons, min = 1, max = 100)) +response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) + +spdf <- sf::st_sf(response_df, geometry = polys) + +# Create raster stack +r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x){ +rnorm(1, ifelse(x \%\% n_pixels_per_side != 0, x \%\% n_pixels_per_side, n_pixels_per_side), 3))} +r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- c(r, r2) +names(cov_stack) <- c('layer1', 'layer2') + +test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_stack) + + result <- fit_model(test_data, iterations = 2) + } + +} +\references{ +Nanda et al. (2023) disaggregation: An R Package for Bayesian +Spatial Disaggregation Modeling. +} diff --git a/man/getCovariateRasters.Rd b/man/getCovariateRasters.Rd index eeb6cc0..c39619b 100644 --- a/man/getCovariateRasters.Rd +++ b/man/getCovariateRasters.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/extract.R \name{getCovariateRasters} \alias{getCovariateRasters} -\title{Get a RasterStack of covariates from a folder containing .tif files} +\title{Get a SpatRaster of covariates from a folder containing .tif files} \usage{ getCovariateRasters(directory, file_pattern = ".tif$", shape) } @@ -14,10 +14,10 @@ getCovariateRasters(directory, file_pattern = ".tif$", shape) \item{shape}{An object with an extent that the rasters will be cropped to.} } \value{ -A RasterStack of the raster files in the directory +A multi-layered SpatRaster of the raster files in the directory } \description{ -Looks in a specified folder for raster files. Returns a RasterStack of the rasters cropped to the extent specified by the shape parameter. +Looks in a specified folder for raster files. Returns a multi-layered SpatRaster of the rasters cropped to the extent specified by the shape parameter. } \examples{ \dontrun{ diff --git a/man/getPolygonData.Rd b/man/getPolygonData.Rd index 3b57903..27759e6 100644 --- a/man/getPolygonData.Rd +++ b/man/getPolygonData.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/extract.R \name{getPolygonData} \alias{getPolygonData} -\title{Extract polygon id and response data into a data.frame from a SpatialPolygonsDataFrame} +\title{Extract polygon id and response data into a data.frame from a sf object} \usage{ getPolygonData( shape, @@ -12,37 +12,38 @@ getPolygonData( ) } \arguments{ -\item{shape}{A SpatialPolygons object containing response data.} +\item{shape}{A sf object containing response data.} \item{id_var}{Name of column in shape object with the polygon id. Default 'area_id'.} \item{response_var}{Name of column in shape object with the response data. Default 'response'.} -\item{sample_size_var}{For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. Default NULL.} +\item{sample_size_var}{For survey data, name of column in sf object (if it exists) with the sample size data. Default NULL.} } \value{ -A data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the -polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does +A data.frame with a row for each polygon in the sf object and columns: area_id, response and N, containing the id of the +polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does not exist), this column will contain NAs. } \description{ -Returns a data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the -polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does +Returns a data.frame with a row for each polygon in the sf object and columns: area_id, response and N, containing the id of the +polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does not exist), this column will contain NAs. } \examples{ { - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } +polygons <- list() +for(i in 1:100) { + row <- ceiling(i/10) + col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) +polys <- lapply(polygons,sf::st_polygon) +response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) +spdf <- sf::st_sf(response_df, geometry = polys) getPolygonData(spdf, id_var = 'area_id', response_var = 'response') } diff --git a/man/getStartendindex.Rd b/man/getStartendindex.Rd index c5a3464..e30350e 100644 --- a/man/getStartendindex.Rd +++ b/man/getStartendindex.Rd @@ -1,42 +1,42 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/matching.R -\name{getStartendindex} -\alias{getStartendindex} -\title{Function to match pixels to their corresponding polygon} -\usage{ -getStartendindex(covariates, polygon_data, id_var = "area_id") -} -\arguments{ -\item{covariates}{data.frame with each covariate as a column an and id column.} - -\item{polygon_data}{data.frame with polygon id and response data.} - -\item{id_var}{string with the name of the column in the covariate data.frame containing the polygon id.} -} -\value{ -A matrix with two columns and one row for each polygon. The first column is the index of the first row in -covariate data that corresponds to that polygon, the second column is the index of the last row in -covariate data that corresponds to that polygon. -} -\description{ -From the covaraite data and polygon data, the function matches the polygon id between the two to find -which pixels from the covariate data are contained in each of the polygons. -} -\details{ -Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, -and another data.frame containing polygon data with a polygon id, response and sample size column (as returned -by \code{getPolygonData} function). - -Returns a matrix with two columns and one row for each polygon. The first column is the index of the first row in -covariate data that corresponds to that polygon, the second column is the index of the last row in -covariate data that corresponds to that polygon. -} -\examples{ -{ - covs <- data.frame(area_id = c(1, 1, 1, 2, 2, 3, 3, 3, 3), response = c(3, 9, 5, 2, 3, 6, 7, 3, 5)) - response <- data.frame(area_id = c(1, 2, 3), response = c(4, 7, 2), N = c(NA, NA, NA)) - getStartendindex(covs, response, 'area_id') -} - - -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/matching.R +\name{getStartendindex} +\alias{getStartendindex} +\title{Function to match pixels to their corresponding polygon} +\usage{ +getStartendindex(covariates, polygon_data, id_var = "area_id") +} +\arguments{ +\item{covariates}{data.frame with each covariate as a column an and id column.} + +\item{polygon_data}{data.frame with polygon id and response data.} + +\item{id_var}{string with the name of the column in the covariate data.frame containing the polygon id.} +} +\value{ +A matrix with two columns and one row for each polygon. The first column is the index of the first row in +covariate data that corresponds to that polygon, the second column is the index of the last row in +covariate data that corresponds to that polygon. +} +\description{ +From the covariate data and polygon data, the function matches the polygon id between the two to find +which pixels from the covariate data are contained in each of the polygons. +} +\details{ +Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, +and another data.frame containing polygon data with a polygon id, response and sample size column (as returned +by \code{getPolygonData} function). + +Returns a matrix with two columns and one row for each polygon. The first column is the index of the first row in +covariate data that corresponds to that polygon, the second column is the index of the last row in +covariate data that corresponds to that polygon. +} +\examples{ +{ + covs <- data.frame(area_id = c(1, 1, 1, 2, 2, 3, 3, 3, 3), response = c(3, 9, 5, 2, 3, 6, 7, 3, 5)) + response <- data.frame(area_id = c(1, 2, 3), response = c(4, 7, 2), N = c(NA, NA, NA)) + getStartendindex(covs, response, 'area_id') +} + + +} diff --git a/man/make_model_object.Rd b/man/make_model_object.Rd index bebabe0..25022c7 100644 --- a/man/make_model_object.Rd +++ b/man/make_model_object.Rd @@ -1,110 +1,130 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fit_model.R -\name{make_model_object} -\alias{make_model_object} -\title{Create the TMB model object for the disaggregation model} -\usage{ -make_model_object( - data, - priors = NULL, - family = "gaussian", - link = "identity", - field = TRUE, - iid = TRUE, - silent = TRUE -) -} -\arguments{ -\item{data}{disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting} - -\item{priors}{list of prior values} - -\item{family}{likelihood function: \emph{gaussian}, \emph{binomial} or \emph{poisson}} - -\item{link}{link function: \emph{logit}, \emph{log} or \emph{identity}} - -\item{field}{logical. Flag the spatial field on or off} - -\item{iid}{logical. Flag the iid effect on or off} - -\item{silent}{logical. Suppress verbose output.} -} -\value{ -The TMB model object returned by \code{\link[TMB]{MakeADFun}}. -} -\description{ -\emph{make_model_object} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} -and creates a TMB model object to be used in fitting. -} -\details{ -\strong{The model definition} - -The disaggregation model make predictions at the pixel level: -\deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} - -And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): -\deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} -\deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} - -The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the repsonse count data): -\itemize{ - \item Gaussian: - If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where - \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} - \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. - \item Binomial: - For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. - \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. - \item Poisson: - \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. -} - -Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field -are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}$ and $\sigma_{prob}} for the variation of the field -where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect - -The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. -The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. -The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. -These are specified as strings. - -The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. - -The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. - -The \emph{silent} argument can be used to publish/supress verbose output. Default TRUE. -} -\examples{ -\dontrun{ - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } - - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - - r <- raster::raster(ncol=20, nrow=20) - r <- raster::setExtent(r, raster::extent(spdf)) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2 <- raster::setExtent(r2, raster::extent(spdf)) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_rasters <- raster::stack(r, r2) - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_rasters) - parallel::stopCluster(cl) - foreach::registerDoSEQ() - - result <- make_model_object(test_data) - } - -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fit_model.R +\name{make_model_object} +\alias{make_model_object} +\title{Create the TMB model object for the disaggregation model} +\usage{ +make_model_object( + data, + priors = NULL, + family = "gaussian", + link = "identity", + field = TRUE, + iid = TRUE, + silent = TRUE +) +} +\arguments{ +\item{data}{disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting} + +\item{priors}{list of prior values} + +\item{family}{likelihood function: \emph{gaussian}, \emph{binomial} or \emph{poisson}} + +\item{link}{link function: \emph{logit}, \emph{log} or \emph{identity}} + +\item{field}{logical. Flag the spatial field on or off} + +\item{iid}{logical. Flag the iid effect on or off} + +\item{silent}{logical. Suppress verbose output.} +} +\value{ +The TMB model object returned by \code{\link[TMB]{MakeADFun}}. +} +\description{ +\emph{make_model_object} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} +and creates a TMB model object to be used in fitting. +} +\details{ +\strong{The model definition} + +The disaggregation model make predictions at the pixel level: +\deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} + +And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): +\deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} +\deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} + +The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): +\itemize{ + \item Gaussian: + If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where + \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} + \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. + \item Binomial: + For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. + \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. + \item Poisson: + \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. +} + +Specify priors for the regression parameters, field and iid effect as a single named list. Hyperpriors for the field +are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field +where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field +where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect. + +The precise names and default values for these priors are: +\itemize{ +\item priormean_intercept: 0 +\item priorsd_intercept: 10.0 +\item priormean_slope: 0.0 +\item priorsd_slope: 0.5 +\item prior_rho_min: A third the length of the diagonal of the bounding box. +\item prior_rho_prob: 0.1 +\item prior_sigma_max: sd(response/mean(response)) +\item prior_sigma_prob: 0.1 +\item prior_iideffect_sd_max: 0.1 +\item prior_iideffect_sd_prob: 0.01 +} + +The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. +The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. +The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. +These are specified as strings. + +The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. + +The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. + +The \emph{silent} argument can be used to publish/supress verbose output. Default TRUE. +} +\examples{ +\dontrun{ +polygons <- list() +n_polygon_per_side <- 10 +n_polygons <- n_polygon_per_side * n_polygon_per_side +n_pixels_per_side <- n_polygon_per_side * 2 + +for(i in 1:n_polygons) { + row <- ceiling(i/n_polygon_per_side) + col <- ifelse(i \%\% n_polygon_per_side != 0, i \%\% n_polygon_per_side, n_polygon_per_side) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} + +polys <- lapply(polygons,sf::st_polygon) +N <- floor(runif(n_polygons, min = 1, max = 100)) +response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) + +spdf <- sf::st_sf(response_df, geometry = polys) + +# Create raster stack +r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x){ +rnorm(1, ifelse(x \%\% n_pixels_per_side != 0, x \%\% n_pixels_per_side, n_pixels_per_side), 3))} +r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- c(r, r2) +names(cov_stack) <- c('layer1', 'layer2') + +test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_stack) + + result <- make_model_object(test_data) + } + +} diff --git a/man/parallelExtract.Rd b/man/parallelExtract.Rd deleted file mode 100644 index 52be889..0000000 --- a/man/parallelExtract.Rd +++ /dev/null @@ -1,58 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/extract.R -\name{parallelExtract} -\alias{parallelExtract} -\title{Parallel extraction of raster stack by shape file.} -\usage{ -parallelExtract(raster, shape, fun = mean, id = "OBJECTID", ...) -} -\arguments{ -\item{raster}{A RasterBrick or RasterStack object.} - -\item{shape}{A SpatialPolygons object.} - -\item{fun}{The function used to aggregate the pixel data. If NULL, raw pixel data is returned.} - -\item{id}{Name of column in shape object to be used to bind an ID column to output.} - -\item{...}{Other arguments to raster::extract.} -} -\value{ -A data.frame with columns of polygon id, cell id (if fun = NULL) and a column for each raster in the stack -} -\description{ -Parallelisation is performed across rasters, not shapes. -So this function is only useful if you are extracting -data from many raster layers. -As the overhead for parallel computation in windows is high -it only makes sense to parallelise in this way. -} -\examples{ - \dontrun{ - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } - - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - - r <- raster::raster(ncol=20, nrow=20) - r <- raster::setExtent(r, raster::extent(spdf)) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2 <- raster::setExtent(r2, raster::extent(spdf)) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_rasters <- raster::stack(r, r2) - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - result <- parallelExtract(cov_rasters, spdf, fun = NULL, id = 'area_id') - parallel::stopCluster(cl) - foreach::registerDoSEQ() - } -} diff --git a/man/plot.disag_data.Rd b/man/plot.disag_data.Rd index 998a900..f541c61 100644 --- a/man/plot.disag_data.Rd +++ b/man/plot.disag_data.Rd @@ -1,24 +1,24 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotting.R -\name{plot.disag_data} -\alias{plot.disag_data} -\title{Plot input data for disaggregation} -\usage{ -\method{plot}{disag_data}(x, which = c(1, 2, 3), ...) -} -\arguments{ -\item{x}{Object of class \emph{disag_data} to be plotted.} - -\item{which}{If a subset of plots is requied, specify a subset of the numbers 1:3} - -\item{...}{Further arguments to \emph{plot} function.} -} -\value{ -A list of three plots: the polygon plot (ggplot), covariate plot (spplot) and INLA mesh plot (ggplot) -} -\description{ -Plotting function for class \emph{disag_data} (the input data for disaggragation). -} -\details{ -Produces three plots: polygon response data, covariate rasters and INLA mesh. -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting.R +\name{plot.disag_data} +\alias{plot.disag_data} +\title{Plot input data for disaggregation} +\usage{ +\method{plot}{disag_data}(x, which = c(1, 2, 3), ...) +} +\arguments{ +\item{x}{Object of class \emph{disag_data} to be plotted.} + +\item{which}{If a subset of plots is required, specify a subset of the numbers 1:3} + +\item{...}{Further arguments to \emph{plot} function.} +} +\value{ +A list of three plots: the polygon plot (ggplot), covariate plot (spplot) and INLA mesh plot (ggplot) +} +\description{ +Plotting function for class \emph{disag_data} (the input data for disaggregation). +} +\details{ +Produces three plots: polygon response data, covariate rasters and INLA mesh. +} diff --git a/man/plot.disag_model.Rd b/man/plot.disag_model.Rd index 35a19b3..421e14c 100644 --- a/man/plot.disag_model.Rd +++ b/man/plot.disag_model.Rd @@ -1,22 +1,22 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotting.R -\name{plot.disag_model} -\alias{plot.disag_model} -\title{Plot results of fitted model} -\usage{ -\method{plot}{disag_model}(x, ...) -} -\arguments{ -\item{x}{Object of class \emph{disag_model} to be plotted.} - -\item{...}{Further arguments to \emph{plot} function.} -} -\value{ -A list of two ggplot plots: results of the fixed effects and an in-sample observed vs predicted plot -} -\description{ -Plotting function for class \emph{disag_model} (the result of the disaggragation fitting). -} -\details{ -Produces two plots: results of the fixed effects and in-sample observed vs predicted plot. -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting.R +\name{plot.disag_model} +\alias{plot.disag_model} +\title{Plot results of fitted model} +\usage{ +\method{plot}{disag_model}(x, ...) +} +\arguments{ +\item{x}{Object of class \emph{disag_model} to be plotted.} + +\item{...}{Further arguments to \emph{plot} function.} +} +\value{ +A list of two ggplot plots: results of the fixed effects and an in-sample observed vs predicted plot +} +\description{ +Plotting function for class \emph{disag_model} (the result of the disaggregation fitting). +} +\details{ +Produces two plots: results of the fixed effects and in-sample observed vs predicted plot. +} diff --git a/man/plot.disag_prediction.Rd b/man/plot.disag_prediction.Rd index 9990eb9..213027c 100644 --- a/man/plot.disag_prediction.Rd +++ b/man/plot.disag_prediction.Rd @@ -1,22 +1,22 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotting.R -\name{plot.disag_prediction} -\alias{plot.disag_prediction} -\title{Plot mean and uncertainty predictions from the disaggregation model results} -\usage{ -\method{plot}{disag_prediction}(x, ...) -} -\arguments{ -\item{x}{Object of class \emph{disag_prediction} to be plotted.} - -\item{...}{Further arguments to \emph{plot} function.} -} -\value{ -A list of plots of rasters from the prediction: mean prediction, lower CI and upper CI. -} -\description{ -Plotting function for class \emph{disag_prediction} (the mean and uncertainty predictions of the disaggragation fitting). -} -\details{ -Produces raster plots of the mean prediction, and the lower and upper confidence intervals. -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting.R +\name{plot.disag_prediction} +\alias{plot.disag_prediction} +\title{Plot mean and uncertainty predictions from the disaggregation model results} +\usage{ +\method{plot}{disag_prediction}(x, ...) +} +\arguments{ +\item{x}{Object of class \emph{disag_prediction} to be plotted.} + +\item{...}{Further arguments to \emph{plot} function.} +} +\value{ +A list of plots of rasters from the prediction: mean prediction, lower CI and upper CI. +} +\description{ +Plotting function for class \emph{disag_prediction} (the mean and uncertainty predictions of the disaggregation fitting). +} +\details{ +Produces raster plots of the mean prediction, and the lower and upper confidence intervals. +} diff --git a/man/predict.disag_model.Rd b/man/predict.disag_model.Rd index a94b1ce..f85ed7a 100644 --- a/man/predict.disag_model.Rd +++ b/man/predict.disag_model.Rd @@ -9,7 +9,7 @@ \arguments{ \item{object}{disag_model object returned by disag_model function.} -\item{newdata}{If NULL, predictions are made using the data in model_output. +\item{newdata}{If NULL, predictions are made using the data in model_output. If this is a raster stack or brick, predictions will be made over this data.} \item{predict_iid}{logical. If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE.} @@ -21,32 +21,32 @@ If this is a raster stack or brick, predictions will be made over this data.} \item{...}{Further arguments passed to or from other methods.} } \value{ -An object of class \emph{disag_prediction} which consists of a list of two objects: +An object of class \emph{disag_prediction} which consists of a list of two objects: \item{mean_prediction }{List of: \itemize{ \item \emph{prediction} Raster of mean predictions based. \item \emph{field} Raster of the field component of the linear predictor. \item \emph{iid} Raster of the iid component of the linear predictor. \item \emph{covariates} Raster of the covariate component of the linear predictor. - }} + }} \item{uncertainty_prediction: }{List of: \itemize{ - \item \emph{realisations} RasterStack of realisations of predictions. Number of realisations defined by argument \emph{N}. - \item \emph{predictions_ci} RasterStack of the upper and lower credible intervals. Defined by argument \emph{CI}. + \item \emph{realisations} SpatRaster of realisations of predictions. Number of realisations defined by argument \emph{N}. + \item \emph{predictions_ci} SpatRaster of the upper and lower credible intervals. Defined by argument \emph{CI}. }} } \description{ -\emph{predict.disag_model} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and +\emph{predict.disag_model} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and predicts mean and uncertainty maps. } \details{ -To predict over a different spatial extent to that used in the model, -a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +To predict over a different spatial extent to that used in the model, +a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. If this is not given predictions are made over the data used in the fit. -The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. +The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. -For the uncertainty calculations, the number of the realisations and the size of the confidence interval to be calculated +For the uncertainty calculations, the number of the realisations and the size of the confidence interval to be calculated are given by the arguments \emph{N} and \emph{CI} respectively. } \examples{ diff --git a/man/predict_model.Rd b/man/predict_model.Rd index 93a9647..3e0387a 100644 --- a/man/predict_model.Rd +++ b/man/predict_model.Rd @@ -9,7 +9,7 @@ predict_model(model_output, newdata = NULL, predict_iid = FALSE) \arguments{ \item{model_output}{disag_model object returned by disag_model function} -\item{newdata}{If NULL, predictions are made using the data in model_output. +\item{newdata}{If NULL, predictions are made using the data in model_output. If this is a raster stack or brick, predictions will be made over this data. Default NULL.} \item{predict_iid}{If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE.} @@ -24,15 +24,15 @@ The mean prediction, which is a list of: } } \description{ -\emph{predict_model} function takes a \emph{disag_model} object created by +\emph{predict_model} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and predicts mean maps. } \details{ Function returns rasters of the mean predictions as well as the covariate and field contributions to the linear predictor. -To predict over a different spatial extent to that used in the model, -a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +To predict over a different spatial extent to that used in the model, +a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. If this is not given predictions are made over the data used in the fit. The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. diff --git a/man/predict_uncertainty.Rd b/man/predict_uncertainty.Rd index b51c09e..ccf361d 100644 --- a/man/predict_uncertainty.Rd +++ b/man/predict_uncertainty.Rd @@ -15,7 +15,7 @@ predict_uncertainty( \arguments{ \item{model_output}{disag_model object returned by disag_model function.} -\item{newdata}{If NULL, predictions are made using the data in model_output. +\item{newdata}{If NULL, predictions are made using the data in model_output. If this is a raster stack or brick, predictions will be made over this data. Default NULL.} \item{predict_iid}{If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE.} @@ -27,22 +27,22 @@ If this is a raster stack or brick, predictions will be made over this data. Def \value{ The uncertainty prediction, which is a list of: \itemize{ - \item \emph{realisations} RasterStack of realisations of predictions. Number of realisations defined by argument \emph{N}. - \item \emph{predictions_ci} RasterStack of the upper and lower credible intervals. Defined by argument \emph{CI}. + \item \emph{realisations} SpatRaster of realisations of predictions. Number of realisations defined by argument \emph{N}. + \item \emph{predictions_ci} SpatRaster of the upper and lower credible intervals. Defined by argument \emph{CI}. } } \description{ -\emph{predict_uncertainty} function takes a \emph{disag_model} object created by +\emph{predict_uncertainty} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and predicts upper and lower credible interval maps. } \details{ -Function returns a RasterStack of the realisations as well as the upper and lower credible interval rasters. +Function returns a SpatRaster of the realisations as well as the upper and lower credible interval rasters. -To predict over a different spatial extent to that used in the model, -a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +To predict over a different spatial extent to that used in the model, +a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. If this is not given predictions are made over the data used in the fit. -The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. +The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. The number of the realisations and the size of the confidence interval to be calculated. are given by the arguments \emph{N} and \emph{CI} respectively. diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd index e66a503..5b5d5ab 100644 --- a/man/prepare_data.Rd +++ b/man/prepare_data.Rd @@ -14,21 +14,21 @@ prepare_data( mesh.args = NULL, na.action = FALSE, makeMesh = TRUE, - ncores = 2 + ncores = NULL ) } \arguments{ -\item{polygon_shapefile}{SpatialPolygonDataFrame containing at least two columns: one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}).} +\item{polygon_shapefile}{sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}).} -\item{covariate_rasters}{RasterStack of covariate rasters to be used in the model.} +\item{covariate_rasters}{SpatRaster of covariate rasters to be used in the model.} -\item{aggregation_raster}{Raster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used.} +\item{aggregation_raster}{SpatRaster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used.} -\item{id_var}{Name of column in SpatialPolygonDataFrame object with the polygon id.} +\item{id_var}{Name of column in sf object with the polygon id.} -\item{response_var}{Name of column in SpatialPolygonDataFrame object with the response data.} +\item{response_var}{Name of column in sf object with the response data.} -\item{sample_size_var}{For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data.} +\item{sample_size_var}{For survey data, name of column in sf object (if it exists) with the sample size data.} \item{mesh.args}{list of parameters that control the mesh structure with the same names as used by INLA.} @@ -36,14 +36,14 @@ prepare_data( \item{makeMesh}{logical. If TRUE, build INLA mesh, takes some time. Default TRUE.} -\item{ncores}{Number of cores used to perform covariate extraction.} +\item{ncores}{Deprecated.} } \value{ -A list is returned of class \code{disag_data}. -The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. +A list is returned of class \code{disag_data}. +The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. The list of class \code{disag_data} contains: - \item{polygon_shapefile }{The SpatialPolygonDataFrame used as an input.} - \item{covariate_rasters }{The RasterStack used as an input.} + \item{x }{The sf object used as an input.} + \item{covariate_rasters }{The SpatRaster used as an input.} \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} @@ -53,57 +53,59 @@ The list of class \code{disag_data} contains: \item{mesh }{A INLA mesh to be used for the spatial field of the disaggregation model.} } \description{ -\emph{prepare_data} function is used to extract all the data required for fitting a disaggregation model. +\emph{prepare_data} function is used to extract all the data required for fitting a disaggregation model. Designed to be used in the \emph{disaggregation::fit_model} function. } \details{ -Takes a SpatialPolygonDataFrame with the response data and a RasterStack of covariates. +Takes a sf object with the response data and a SpatRaster of covariates. -Extract the values of the covariates (as well as the aggregation raster, if given) at each pixel within the polygons -(\emph{parallelExtract} function). This is done in parallel and \emph{n.cores} argument is used to set the number of cores +Extract the values of the covariates (as well as the aggregation raster, if given) at each pixel within the polygons +(\emph{parallelExtract} function). This is done in parallel and \emph{n.cores} argument is used to set the number of cores to use for covariate extraction. This can be the number of covariates used in the model. -The aggregation raster defines how the pixels within each polygon are aggregated. +The aggregation raster defines how the pixels within each polygon are aggregated. The disaggregation model performs a weighted sum of the pixel prediction, weighted by the pixel values in the aggregation raster. -For disease incidence rate you use the population raster to aggregate pixel incidence rate by summing the number of cases -(rate weighted by population). If no aggregation raster is provided a uniform distribution is assumed, i.e. the pixel predictions +For disease incidence rate you use the population raster to aggregate pixel incidence rate by summing the number of cases +(rate weighted by population). If no aggregation raster is provided a uniform distribution is assumed, i.e. the pixel predictions are aggregated to polygon level by summing the pixel values. -Makes a matrix that contains the start and end pixel index for each polygon. Builds an INLA mesh to use for the spatial field +Makes a matrix that contains the start and end pixel index for each polygon. Builds an INLA mesh to use for the spatial field (\emph{getStartendindex} function). -The \emph{mesh.args} argument allows you to supply a list of INLA mesh parameters to control the mesh used for the spatial field +The \emph{mesh.args} argument allows you to supply a list of INLA mesh parameters to control the mesh used for the spatial field (\emph{build_mesh} function). -The \emph{na.action} flag is automatically off. If there are any NAs in the response or covariate data within the polygons the -\emph{prepare_data} method will error. Ideally the NAs in the data would be dealt with beforehand, however, setting na.action = TRUE -will automatically deal with NAs. It removes any polygons that have NAs as a response, sets any aggregation pixels with NA to zero +The \emph{na.action} flag is automatically off. If there are any NAs in the response or covariate data within the polygons the +\emph{prepare_data} method will error. Ideally the NAs in the data would be dealt with beforehand, however, setting na.action = TRUE +will automatically deal with NAs. It removes any polygons that have NAs as a response, sets any aggregation pixels with NA to zero and sets covariate NAs pixels to the median value for the that covariate. } \examples{ \donttest{ - polygons <- list() - for(i in 1:100) { +polygons <- list() +for(i in 1:100) { row <- ceiling(i/10) col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } - - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - - r <- raster::raster(ncol=20, nrow=20) - r <- raster::setExtent(r, raster::extent(spdf)) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2 <- raster::setExtent(r2, raster::extent(spdf)) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_rasters <- raster::stack(r, r2) - - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_rasters) -} - + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} + +polys <- lapply(polygons,sf::st_polygon) +response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) +spdf <- sf::st_sf(response_df,geometry=polys) + +r <- terra::rast(nrow=20,ncol=20) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) + +r2 <- terra::rast(nrow=20,ncol=20) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) +cov_rasters <- c(r, r2) + +test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_rasters) +} + } diff --git a/src/Makevars.win b/src/Makevars.win index bd0bfc6..e69de29 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1 +0,0 @@ -PKG_CXXFLAGS = -Wa,-mbig-obj \ No newline at end of file diff --git a/src/disaggregation.cpp b/src/disaggregation.cpp index 78ca3aa..d476bbd 100644 --- a/src/disaggregation.cpp +++ b/src/disaggregation.cpp @@ -1,262 +1,262 @@ -// -// Author: Anita Nandi -// Date: 2019-02-14 - -// Data: Spatial field mesh and matrices, polygon data, covariate pixel data - - -#define TMB_LIB_INIT R_init_disaggregation -#include - -template -Type objective_function::operator()() -{ - - using namespace R_inla; - using namespace density; - using namespace Eigen; - - // ------------------------------------------------------------------------ // - // Spatial field data - // ------------------------------------------------------------------------ // - - // The A matrices are for projecting the mesh to a point for the pixel and point data respectively. - DATA_SPARSE_MATRIX(Apixel); - DATA_STRUCT(spde, spde_t); - - // ------------------------------------------------------------------------ // - // Polygon level data - // ------------------------------------------------------------------------ // - - // Covariate pixel data - DATA_MATRIX(x); - - // two col matrix with start end indices for each shape case. - DATA_IARRAY(startendindex); - - // Shape data. Cases and region id. - DATA_VECTOR(polygon_response_data); - DATA_VECTOR(response_sample_size); - - // Use to aggreagte pixel response values to polygon level - DATA_VECTOR(aggregation_values); - - // ------------------------------------------------------------------------ // - // Likelihood and link functions - // ------------------------------------------------------------------------ // - - DATA_INTEGER(family); - DATA_INTEGER(link); - - // ------------------------------------------------------------------------ // - // Parameters - // ------------------------------------------------------------------------ // - - PARAMETER(intercept); - PARAMETER_VECTOR(slope); - - DATA_SCALAR(priormean_intercept); - DATA_SCALAR(priorsd_intercept); - DATA_SCALAR(priormean_slope); - DATA_SCALAR(priorsd_slope); - - // Priors for likelihood - PARAMETER(log_tau_gaussian); - Type tau_gaussian = exp(log_tau_gaussian); - Type gaussian_sd = 1 / sqrt(tau_gaussian); - - // INLA defines a loggamma prior on log tau. - // We evaluate a gamma prior on tau, but the parameters are - // therefore the same. - Type prior_gamma_shape = 1; - Type prior_gamma_rate = 5e-05; - - PARAMETER_VECTOR(iideffect); - PARAMETER(iideffect_log_tau); - Type iideffect_tau = exp(iideffect_log_tau); - Type iideffect_sd = 1 / sqrt(iideffect_tau); - - Type iideffect_mean = 0.0; - - // Priors on iid random effect for polygons - DATA_SCALAR(prior_iideffect_sd_max); - DATA_SCALAR(prior_iideffect_sd_prob); - - // spde hyperparameters - PARAMETER(log_sigma); - PARAMETER(log_rho); - Type sigma = exp(log_sigma); - Type rho = exp(log_rho); - - // Priors on spde hyperparameters - DATA_SCALAR(prior_rho_min); - DATA_SCALAR(prior_rho_prob); - DATA_SCALAR(prior_sigma_max); - DATA_SCALAR(prior_sigma_prob); - - // Convert hyperparameters to natural scale - DATA_SCALAR(nu); - Type kappa = sqrt(8.0) / rho; - - // Random effect parameters - PARAMETER_VECTOR(nodemean); - - // Model component flags - DATA_INTEGER(field); - DATA_INTEGER(iid); - - // Number of polygons - int n_polygons = polygon_response_data.size(); - // Number of pixels - int n_pixels = x.rows(); - - Type nll = 0.0; - - // ------------------------------------------------------------------------ // - // Likelihood from priors - // ------------------------------------------------------------------------ // - - nll -= dnorm(intercept, priormean_intercept, priorsd_intercept, true); - for (int s = 0; s < slope.size(); s++) { - nll -= dnorm(slope[s], priormean_slope, priorsd_slope, true); - } - - if(iid) { - // Likelihood of hyperparameter of polygon iid random effect. - // From https://projecteuclid.org/euclid.ss/1491465621 (Eqn 3.3) - Type lambda = -log(prior_iideffect_sd_prob) / prior_iideffect_sd_max; - Type log_pcdensity_iid = log(lambda / 2) - (3/2)*iideffect_log_tau - lambda * pow(iideffect_tau, -1/2); - // log(iideffect_sd) from the Jacobian - nll -= log_pcdensity_iid + log(iideffect_sd); - - // Likelihood of random effect for polygons - for(int p = 0; p < iideffect.size(); p++) { - nll -= dnorm(iideffect[p], iideffect_mean, iideffect_sd, true); - } - } - - // Likelihood from the gaussian prior. - // log(prec) ~ loggamma - // prec ~ gamma - if(family == 0) { - nll -= dgamma(tau_gaussian, prior_gamma_shape, prior_gamma_rate, true); - } - - if(field) { - // Likelihood of hyperparameters for field. - // From https://www.tandfonline.com/doi/full/10.1080/01621459.2017.1415907 (Theorem 2.6) - Type lambdatilde1 = -log(prior_rho_prob) * prior_rho_min; - Type lambdatilde2 = -log(prior_sigma_prob) / prior_sigma_max; - Type log_pcdensity = log(lambdatilde1) + log(lambdatilde2) - 2*log_rho - lambdatilde1 * pow(rho, -1) - lambdatilde2 * sigma; - // log_rho and log_sigma from the Jacobian - nll -= log_pcdensity + log_rho + log_sigma; - - // Build spde matrix - SparseMatrix Q = Q_spde(spde, kappa); - - // From Lindgren (2011) https://doi.org/10.1111/j.1467-9868.2011.00777.x, see equation for the marginal variance - Type scaling_factor = sqrt(exp(lgamma(nu)) / (exp(lgamma(nu + 1)) * 4 * M_PI * pow(kappa, 2*nu))); - - // Likelihood of the random field. - nll += SCALE(GMRF(Q), sigma / scaling_factor)(nodemean); - } - - Type nll_priors = nll; - - // ------------------------------------------------------------------------ // - // Likelihood from data - // ------------------------------------------------------------------------ // - - vector pixel_linear_pred(n_pixels); - pixel_linear_pred = intercept + x * slope; - - if(field) { - // Calculate field for pixel data - vector linear_pred_field(n_pixels); - linear_pred_field = Apixel * nodemean; - pixel_linear_pred += linear_pred_field.array(); - } - - // recalculate startendindices to be in the form start, n - startendindex.col(1) = startendindex.col(1) - startendindex.col(0) + 1; - - Type polygon_response; - Type normalised_polygon_response; - Type normalisation_total; - Type pred_polygoncases; - Type pred_polygonrate; - Type polygon_sd; - vector pixel_pred; - vector numerator_pixels; - vector normalisation_pixels; - vector reportnormalisation(n_polygons); - vector reportprediction_cases(n_polygons); - vector reportprediction_rate(n_polygons); - vector reportnll(n_polygons); - vector reportpolygonsd(n_polygons); - - // For each shape get pixel predictions within and aggregate to polygon level - for (int polygon = 0; polygon < n_polygons; polygon++) { - - // Get pixel level predictions (rate) - pixel_pred = pixel_linear_pred.segment(startendindex(polygon, 0), startendindex(polygon, 1)).array(); - if(iid) { - pixel_pred += iideffect[polygon]; - } - // Use correct link function - if(link == 0) { - pixel_pred = invlogit(pixel_pred); - } else if(link == 1) { - pixel_pred = exp(pixel_pred); - } else if(link == 2){ - // Don't need to do anything, i.e. pixel_pred = pixel_pred; - } else { - error("Link function not implemented."); - } - - // Aggregate to polygon prediction - numerator_pixels = pixel_pred * aggregation_values.segment(startendindex(polygon, 0), startendindex(polygon, 1)).array(); - normalisation_pixels = aggregation_values.segment(startendindex(polygon, 0), startendindex(polygon, 1)); - normalisation_total = sum(normalisation_pixels); - pred_polygoncases = sum(numerator_pixels); - pred_polygonrate = pred_polygoncases/normalisation_total; - - reportnormalisation[polygon] = normalisation_total; - reportprediction_cases[polygon] = pred_polygoncases; - reportprediction_rate[polygon] = pred_polygonrate; - - // Use correct likelihood function - if(family == 0) { - // Scale the pixel sd to polygon level - polygon_sd = gaussian_sd * sqrt((normalisation_pixels * normalisation_pixels).sum()) / normalisation_total; - reportpolygonsd[polygon] = polygon_sd; - // Calculate normal likelihood in rate space - polygon_response = polygon_response_data(polygon); - normalised_polygon_response = polygon_response/normalisation_total; - nll -= dnorm(normalised_polygon_response, pred_polygonrate, polygon_sd, true); - reportnll[polygon] = -dnorm(normalised_polygon_response, pred_polygonrate, polygon_sd, true); - } else if(family == 1) { - nll -= dbinom(polygon_response_data[polygon], response_sample_size[polygon], pred_polygonrate, true); - reportnll[polygon] = -dbinom(polygon_response_data[polygon], response_sample_size[polygon], pred_polygonrate, true); - } else if(family == 2) { - nll -= dpois(polygon_response_data[polygon], pred_polygoncases, true); - reportnll[polygon] = -dpois(polygon_response_data[polygon], pred_polygoncases, true); - } else { - error("Likelihood not implemented."); - } - - } - - REPORT(reportprediction_cases); - REPORT(reportprediction_rate); - REPORT(reportnormalisation); - REPORT(reportnll); - REPORT(polygon_response_data); - REPORT(nll_priors); - REPORT(nll); - if(family == 0) { - REPORT(reportpolygonsd); - } - - return nll; -} +// +// Author: Anita Nandi +// Date: 2019-02-14 + +// Data: Spatial field mesh and matrices, polygon data, covariate pixel data + + +#define TMB_LIB_INIT R_init_disaggregation +#include + +template +Type objective_function::operator()() +{ + + using namespace R_inla; + using namespace density; + using namespace Eigen; + + // ------------------------------------------------------------------------ // + // Spatial field data + // ------------------------------------------------------------------------ // + + // The A matrices are for projecting the mesh to a point for the pixel and point data respectively. + DATA_SPARSE_MATRIX(Apixel); + DATA_STRUCT(spde, spde_t); + + // ------------------------------------------------------------------------ // + // Polygon level data + // ------------------------------------------------------------------------ // + + // Covariate pixel data + DATA_MATRIX(x); + + // two col matrix with start end indices for each shape case. + DATA_IARRAY(startendindex); + + // Shape data. Cases and region id. + DATA_VECTOR(polygon_response_data); + DATA_VECTOR(response_sample_size); + + // Use to aggreagte pixel response values to polygon level + DATA_VECTOR(aggregation_values); + + // ------------------------------------------------------------------------ // + // Likelihood and link functions + // ------------------------------------------------------------------------ // + + DATA_INTEGER(family); + DATA_INTEGER(link); + + // ------------------------------------------------------------------------ // + // Parameters + // ------------------------------------------------------------------------ // + + PARAMETER(intercept); + PARAMETER_VECTOR(slope); + + DATA_SCALAR(priormean_intercept); + DATA_SCALAR(priorsd_intercept); + DATA_SCALAR(priormean_slope); + DATA_SCALAR(priorsd_slope); + + // Priors for likelihood + PARAMETER(log_tau_gaussian); + Type tau_gaussian = exp(log_tau_gaussian); + Type gaussian_sd = 1 / sqrt(tau_gaussian); + + // INLA defines a loggamma prior on log tau. + // We evaluate a gamma prior on tau, but the parameters are + // therefore the same. + Type prior_gamma_shape = 1; + Type prior_gamma_rate = 5e-05; + + PARAMETER_VECTOR(iideffect); + PARAMETER(iideffect_log_tau); + Type iideffect_tau = exp(iideffect_log_tau); + Type iideffect_sd = 1 / sqrt(iideffect_tau); + + Type iideffect_mean = 0.0; + + // Priors on iid random effect for polygons + DATA_SCALAR(prior_iideffect_sd_max); + DATA_SCALAR(prior_iideffect_sd_prob); + + // spde hyperparameters + PARAMETER(log_sigma); + PARAMETER(log_rho); + Type sigma = exp(log_sigma); + Type rho = exp(log_rho); + + // Priors on spde hyperparameters + DATA_SCALAR(prior_rho_min); + DATA_SCALAR(prior_rho_prob); + DATA_SCALAR(prior_sigma_max); + DATA_SCALAR(prior_sigma_prob); + + // Convert hyperparameters to natural scale + DATA_SCALAR(nu); + Type kappa = sqrt(8.0) / rho; + + // Random effect parameters + PARAMETER_VECTOR(nodemean); + + // Model component flags + DATA_INTEGER(field); + DATA_INTEGER(iid); + + // Number of polygons + int n_polygons = polygon_response_data.size(); + // Number of pixels + int n_pixels = x.rows(); + + Type nll = 0.0; + + // ------------------------------------------------------------------------ // + // Likelihood from priors + // ------------------------------------------------------------------------ // + + nll -= dnorm(intercept, priormean_intercept, priorsd_intercept, true); + for (int s = 0; s < slope.size(); s++) { + nll -= dnorm(slope[s], priormean_slope, priorsd_slope, true); + } + + if(iid) { + // Likelihood of hyperparameter of polygon iid random effect. + // From https://projecteuclid.org/euclid.ss/1491465621 (Eqn 3.3) + Type lambda = -log(prior_iideffect_sd_prob) / prior_iideffect_sd_max; + Type log_pcdensity_iid = log(lambda / 2) - (3/2)*iideffect_log_tau - lambda * pow(iideffect_tau, -1/2); + // log(iideffect_sd) from the Jacobian + nll -= log_pcdensity_iid + iideffect_log_tau; + + // Likelihood of random effect for polygons + for(int p = 0; p < iideffect.size(); p++) { + nll -= dnorm(iideffect[p], iideffect_mean, iideffect_sd, true); + } + } + + // Likelihood from the gaussian prior. + // log(prec) ~ loggamma + // prec ~ gamma + if(family == 0) { + nll -= dgamma(tau_gaussian, prior_gamma_shape, prior_gamma_rate, true); + } + + if(field) { + // Likelihood of hyperparameters for field. + // From https://www.tandfonline.com/doi/full/10.1080/01621459.2017.1415907 (Theorem 2.6) + Type lambdatilde1 = -log(prior_rho_prob) * prior_rho_min; + Type lambdatilde2 = -log(prior_sigma_prob) / prior_sigma_max; + Type log_pcdensity = log(lambdatilde1) + log(lambdatilde2) - 2*log_rho - lambdatilde1 * pow(rho, -1) - lambdatilde2 * sigma; + // log_rho and log_sigma from the Jacobian + nll -= log_pcdensity + log_rho + log_sigma; + + // Build spde matrix + SparseMatrix Q = Q_spde(spde, kappa); + + // From Lindgren (2011) https://doi.org/10.1111/j.1467-9868.2011.00777.x, see equation for the marginal variance + Type scaling_factor = sqrt(exp(lgamma(nu)) / (exp(lgamma(nu + 1)) * 4 * M_PI * pow(kappa, 2*nu))); + + // Likelihood of the random field. + nll += SCALE(GMRF(Q), sigma / scaling_factor)(nodemean); + } + + Type nll_priors = nll; + + // ------------------------------------------------------------------------ // + // Likelihood from data + // ------------------------------------------------------------------------ // + + vector pixel_linear_pred(n_pixels); + pixel_linear_pred = intercept + x * slope; + + if(field) { + // Calculate field for pixel data + vector linear_pred_field(n_pixels); + linear_pred_field = Apixel * nodemean; + pixel_linear_pred += linear_pred_field.array(); + } + + // recalculate startendindices to be in the form start, n + startendindex.col(1) = startendindex.col(1) - startendindex.col(0) + 1; + + Type polygon_response; + Type normalised_polygon_response; + Type normalisation_total; + Type pred_polygoncases; + Type pred_polygonrate; + Type polygon_sd; + vector pixel_pred; + vector numerator_pixels; + vector normalisation_pixels; + vector reportnormalisation(n_polygons); + vector reportprediction_cases(n_polygons); + vector reportprediction_rate(n_polygons); + vector reportnll(n_polygons); + vector reportpolygonsd(n_polygons); + + // For each shape get pixel predictions within and aggregate to polygon level + for (int polygon = 0; polygon < n_polygons; polygon++) { + + // Get pixel level predictions (rate) + pixel_pred = pixel_linear_pred.segment(startendindex(polygon, 0), startendindex(polygon, 1)).array(); + if(iid) { + pixel_pred += iideffect[polygon]; + } + // Use correct link function + if(link == 0) { + pixel_pred = invlogit(pixel_pred); + } else if(link == 1) { + pixel_pred = exp(pixel_pred); + } else if(link == 2){ + // Don't need to do anything, i.e. pixel_pred = pixel_pred; + } else { + error("Link function not implemented."); + } + + // Aggregate to polygon prediction + numerator_pixels = pixel_pred * aggregation_values.segment(startendindex(polygon, 0), startendindex(polygon, 1)).array(); + normalisation_pixels = aggregation_values.segment(startendindex(polygon, 0), startendindex(polygon, 1)); + normalisation_total = sum(normalisation_pixels); + pred_polygoncases = sum(numerator_pixels); + pred_polygonrate = pred_polygoncases/normalisation_total; + + reportnormalisation[polygon] = normalisation_total; + reportprediction_cases[polygon] = pred_polygoncases; + reportprediction_rate[polygon] = pred_polygonrate; + + // Use correct likelihood function + if(family == 0) { + // Scale the pixel sd to polygon level + polygon_sd = gaussian_sd * sqrt((normalisation_pixels * normalisation_pixels).sum()) / normalisation_total; + reportpolygonsd[polygon] = polygon_sd; + // Calculate normal likelihood in rate space + polygon_response = polygon_response_data(polygon); + normalised_polygon_response = polygon_response/normalisation_total; + nll -= dnorm(normalised_polygon_response, pred_polygonrate, polygon_sd, true); + reportnll[polygon] = -dnorm(normalised_polygon_response, pred_polygonrate, polygon_sd, true); + } else if(family == 1) { + nll -= dbinom(polygon_response_data[polygon], response_sample_size[polygon], pred_polygonrate, true); + reportnll[polygon] = -dbinom(polygon_response_data[polygon], response_sample_size[polygon], pred_polygonrate, true); + } else if(family == 2) { + nll -= dpois(polygon_response_data[polygon], pred_polygoncases, true); + reportnll[polygon] = -dpois(polygon_response_data[polygon], pred_polygoncases, true); + } else { + error("Likelihood not implemented."); + } + + } + + REPORT(reportprediction_cases); + REPORT(reportprediction_rate); + REPORT(reportnormalisation); + REPORT(reportnll); + REPORT(polygon_response_data); + REPORT(nll_priors); + REPORT(nll); + if(family == 0) { + REPORT(reportpolygonsd); + } + + return nll; +} diff --git a/tests/testthat/helper_data.R b/tests/testthat/helper_data.R new file mode 100644 index 0000000..89deef8 --- /dev/null +++ b/tests/testthat/helper_data.R @@ -0,0 +1,37 @@ +polygons <- list() +n_polygon_per_side <- 10 +n_polygons <- n_polygon_per_side * n_polygon_per_side +n_pixels_per_side <- n_polygon_per_side * 2 + +for(i in 1:n_polygons) { + row <- ceiling(i/n_polygon_per_side) + col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} + +polys <- lapply(polygons, sf::st_polygon) +N <- floor(runif(n_polygons, min = 1, max = 100)) +response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) +response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) +response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) +response_df2 <- data.frame(area_id = 1:n_polygons, n_positive = runif(n_polygons, min = 0, max = 1), sample_size = floor(runif(n_polygons, min = 1, max = 100))) + +spdf <- sf::st_sf(response_df, geometry = polys) +spdf_na <- sf::st_sf(response_na_df, geometry = polys) +spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) +spdf2 <- sf::st_sf(response_df2, geometry = polys) + +# Create raster stack +r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- c(r, r2) +names(cov_stack) <- c('layer1', 'layer2') + +test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_stack) diff --git a/tests/testthat/test-build-mesh.R b/tests/testthat/test-build-mesh.R index 97e1d98..8bd19a2 100644 --- a/tests/testthat/test-build-mesh.R +++ b/tests/testthat/test-build-mesh.R @@ -2,26 +2,8 @@ context("Build mesh") test_that("build_mesh behaves as expected", { - - skip_if_not_installed('INLA') + skip_on_cran() - - polygons <- list() - n_polygon_per_side <- 10 - n_polygons <- n_polygon_per_side * n_polygon_per_side - n_pixels_per_side <- n_polygon_per_side * 2 - - for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } - - polys <- do.call(raster::spPolygons, polygons) - - response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) my_mesh <- build_mesh(spdf) @@ -29,5 +11,5 @@ test_that("build_mesh behaves as expected", { expect_error(build_mesh(spdf, mesh.args = c(4, 8))) expect_is(my_mesh, 'inla.mesh') expect_is(build_mesh(spdf, mesh.args = list(max.edge = c(50, 100))), 'inla.mesh') - -}) \ No newline at end of file + +}) diff --git a/tests/testthat/test-extract.R b/tests/testthat/test-extract.R index 4a50ef5..84ba7b4 100644 --- a/tests/testthat/test-extract.R +++ b/tests/testthat/test-extract.R @@ -1,124 +1,55 @@ context("Extract covariates and polygon data") -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -} - -polys <- do.call(raster::spPolygons, polygons) -N <- floor(runif(n_polygons, min = 1, max = 100)) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) - -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -spdf_binom <- sp::SpatialPolygonsDataFrame(polys, response_binom_df) - -# Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) - -test_that("parallelExtract gives errors when it should", { - - skip_on_cran() - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - - expect_error(parallelExtract(spdf, cov_stack, fun = NULL, id = 'area_id')) - expect_error(parallelExtract(cov_stack, spdf, fun = NULL, id = 'id')) - - parallel::stopCluster(cl) - foreach::registerDoSEQ() -}) +test_that("getPolygonData function", { -test_that("parallelExtract give the right form of output", { - skip_on_cran() - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - cov_data <- parallelExtract(cov_stack, spdf, fun = NULL, id = 'area_id') - parallel::stopCluster(cl) - foreach::registerDoSEQ() - - expect_is(cov_data, 'data.frame') - expect_equal(sort(as.numeric(unique(cov_data$area_id))), spdf$area_id)# - expect_equal(ncol(cov_data), raster::nlayers(cov_stack) + 2)# - expect_equal(names(cov_stack), names(cov_data)[-c(1,2)])# - expect_equal(length(unique(cov_data$area_id)), length(spdf)) - -}) -test_that("getPolygonData function", { - - skip_on_cran() - expect_error(getPolygonData(spdf, id_var = 'id', response_var = 'response')) expect_error(getPolygonData(spdf, id_var = 'area_id', response_var = 'data')) - + result <- getPolygonData(spdf, id_var = 'area_id', response_var = 'response') result_binom <- getPolygonData(spdf_binom, id_var = 'area_id', response_var = 'response', sample_size_var = 'sample_size') - + expect_is(result, 'data.frame') expect_equal(ncol(result), 3) expect_equal(nrow(result), nrow(spdf)) expect_equal(result$area_id, spdf$area_id) expect_equal(result$response, spdf$response) expect_equal(result$N, rep(NA, nrow(result))) - + expect_is(result_binom, 'data.frame') expect_equal(ncol(result_binom), 3) expect_equal(nrow(result_binom), nrow(spdf_binom)) expect_equal(result_binom$area_id, spdf_binom$area_id) expect_equal(result_binom$response, spdf_binom$response) expect_equal(result_binom$N, spdf_binom$sample_size) - + }) test_that("getCovariateData function gives errors when it should", { - + skip_on_cran() - + expect_error(getCovariateRasters('/home/rasters', '.tif$', spdf)) - + # Save .tif files in tempdir() - r <- raster::raster(ncol=20, nrow=20) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_stack <- raster::stack(r, r2) - raster::writeRaster(r, paste0(tempdir(), '/cov1.tif'), overwrite = TRUE) - raster::writeRaster(r2, paste0(tempdir(), '/cov2.tif'), overwrite = TRUE) - - expect_is(getCovariateRasters(tempdir(), '.tif$', spdf), 'RasterBrick') - + terra::writeRaster(r, paste0(tempdir(), '/cov1.tif'), overwrite = TRUE) + terra::writeRaster(r2, paste0(tempdir(), '/cov2.tif'), overwrite = TRUE) + + expect_is(getCovariateRasters(tempdir(), '.tif$', spdf), 'SpatRaster') + }) test_that("extractCoordsForMesh function behaves as it should", { skip_on_cran() - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - cov_data <- parallelExtract(cov_stack, spdf, fun = NULL, id = 'area_id') - parallel::stopCluster(cl) - foreach::registerDoSEQ() - - result <- extractCoordsForMesh(cov_stack, cov_data$cellid) - + + cov_data <- terra::extract(cov_stack, spdf, cells = TRUE, na.rm = TRUE, ID = TRUE) + names(cov_data)[1] <- 'area_id' + + result <- extractCoordsForMesh(cov_stack, cov_data$cell) + result2 <- extractCoordsForMesh(cov_stack) expect_error(extractCoordsForMesh(cov_data$cellid, cov_stack)) diff --git a/tests/testthat/test-fit-model.R b/tests/testthat/test-fit-model.R index 3a52252..a3a7774 100644 --- a/tests/testthat/test-fit-model.R +++ b/tests/testthat/test-fit-model.R @@ -1,134 +1,92 @@ context("Fitting model") -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -} - -polys <- do.call(raster::spPolygons, polygons) -N <- floor(runif(n_polygons, min = 1, max = 100)) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) - -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -spdf_binom <- sp::SpatialPolygonsDataFrame(polys, response_binom_df) - -# Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) - -if(identical(Sys.getenv("NOT_CRAN"), "true")) { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack) -} else { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack, - makeMesh = FALSE) -} - -test_that("disag_model produces errors whe expected", { - +test_that("disag_model produces errors when expected", { + skip_if_not_installed('INLA') skip_on_cran() - + expect_error(disag_model(list())) expect_error(disag_model(test_data, iterations = 'iterations')) expect_error(disag_model(test_data, priors = list(polygon_sd_men = 0.3, polygon_sd_sd = 0.4))) expect_error(disag_model(test_data, priors = c(polygon_sd_mean = 1.2))) expect_error(disag_model(test_data, family = 'banana')) expect_error(disag_model(test_data, link = 'apple')) - + }) test_that("disag_model behaves as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - - result <- disag_model(test_data, iterations = 2) + + result <- disag_model(test_data, iterations = 100, iid = FALSE) expect_is(result, 'disag_model') expect_equal(length(result), 5) - expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 5) - expect_equal(unique(names(result$sd_out$par.random)), c("iideffect", "nodemean")) - - - -}) - - + expect_equal(length(result$sd_out$par.fixed), terra::nlyr(test_data$covariate_rasters) + 4) + expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) +}) test_that("disag_model with 1 covariate behaves as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - + test_data2 <- test_data test_data2$covariate_rasters <- test_data2$covariate_rasters[[1]] test_data2$covariate_data <- test_data2$covariate_data[, 1:3] - - result <- disag_model(test_data2, iterations = 2) - + + result <- disag_model(test_data2, iterations = 100, iid = FALSE) + expect_is(result, 'disag_model') expect_equal(length(result), 5) - - expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 5) - expect_equal(unique(names(result$sd_out$par.random)), c("iideffect", "nodemean")) - + + # Should be intercept, 1 slope, tau gaussian, and 2 for space. None for iid anymore. + expect_equal(length(result$sd_out$par.fixed), terra::nlyr(test_data$covariate_rasters) + 3) + expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) + # Confirm only two covariates were fitted. expect_equal(sum(names(result$opt$par) == 'slope'), 1) - + }) test_that("user defined model setup is working as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - - binom_data <- prepare_data(polygon_shapefile = spdf_binom, + + binom_data <- prepare_data(polygon_shapefile = spdf_binom, covariate_rasters = cov_stack, sample_size_var = 'sample_size') - - result2 <- disag_model(test_data, iterations = 2, field = FALSE, family = 'poisson', link = 'log') - result3 <- disag_model(binom_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit') - result4 <- disag_model(test_data, iterations = 2, field = FALSE, iid = FALSE, link = 'identity') - - expect_error(disag_model(test_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit')) - + + result2 <- disag_model(test_data, iterations = 100, field = FALSE, family = 'poisson', link = 'log') + result3 <- disag_model(binom_data, iterations = 100, iid = FALSE, family = 'binomial', link = 'logit') + result4 <- disag_model(test_data, iterations = 100, field = FALSE, iid = FALSE, link = 'identity') + + expect_error(disag_model(test_data, iterations = 100, iid = FALSE, family = 'binomial', link = 'logit')) + expect_is(result2, 'disag_model') expect_equal(length(result2), 5) - expect_equal(length(result2$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) + expect_equal(length(result2$sd_out$par.fixed), terra::nlyr(test_data$covariate_rasters) + 2) expect_equal(unique(names(result2$sd_out$par.random)), c("iideffect")) expect_false(result2$model_setup$field) expect_true(result2$model_setup$iid) expect_equal(result2$model_setup$family, 'poisson') expect_equal(result2$model_setup$link, 'log') - + expect_is(result3, 'disag_model') expect_equal(length(result3), 5) - expect_equal(length(result3$sd_out$par.fixed), raster::nlayers(binom_data$covariate_rasters) + 3) + expect_equal(length(result3$sd_out$par.fixed), terra::nlyr(binom_data$covariate_rasters) + 3) expect_equal(unique(names(result3$sd_out$par.random)), c("nodemean")) expect_true(result3$model_setup$field) expect_false(result3$model_setup$iid) expect_equal(result3$model_setup$family, 'binomial') expect_equal(result3$model_setup$link, 'logit') - + expect_is(result4, 'disag_model') expect_equal(length(result4), 5) - expect_equal(length(result4$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) + expect_equal(length(result4$sd_out$par.fixed), terra::nlyr(test_data$covariate_rasters) + 2) expect_equal(unique(names(result4$sd_out$par.random)), NULL) expect_false(result4$model_setup$field) expect_false(result4$model_setup$iid) @@ -137,31 +95,31 @@ test_that("user defined model setup is working as expected", { }) test_that("make_model_object behaves as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - + result <- make_model_object(test_data) - + expect_is(result, 'list') expect_equal(sum(sapply(c("par", "fn", "gr", "report"), function(x) !(x %in% names(result)))), 0) - + }) test_that("setup_hess_control behaves as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - + obj <- make_model_object(test_data) - + opt <- stats::nlminb(obj$par, obj$fn, obj$gr, control = list(iter.max = 2, trace = 0)) - + hess_control <- setup_hess_control(opt, hess_control_parscale = c(rep(c(0.9, 1.1), 3), 1), hess_control_ndeps = 1e-3) - + expect_is(hess_control, 'list') expect_equal(length(hess_control$parscale), length(opt$par)) expect_equal(length(hess_control$ndeps), length(opt$par)) - + }) diff --git a/tests/testthat/test-matching.R b/tests/testthat/test-matching.R index e22d433..42b152a 100644 --- a/tests/testthat/test-matching.R +++ b/tests/testthat/test-matching.R @@ -17,7 +17,7 @@ test_that("Getting start and end index returns the right object", { result <- getStartendindex(covs, response, 'id') save(result, file = paste0(tempdir(), '/test_startendindex.RData')) - + expect_is(result, "matrix") expect_equal(nrow(result), nrow(response)) expect_equal(ncol(result), 2) diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R index 7d82a0d..ab0e0e6 100644 --- a/tests/testthat/test-plotting.R +++ b/tests/testthat/test-plotting.R @@ -1,116 +1,93 @@ context("Plotting data") -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -} - -polys <- do.call(raster::spPolygons, polygons) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -response_df2 <- data.frame(area_id = 1:n_polygons, n_positive = runif(n_polygons, min = 0, max = 1), sample_size = floor(runif(n_polygons, min = 1, max = 100))) -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -spdf2 <- sp::SpatialPolygonsDataFrame(polys, response_df2) - -# Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) - -if(identical(Sys.getenv("NOT_CRAN"), "true")) { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack) -} else { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack, - makeMesh = FALSE) -} - test_that("Check plot_polygon_data function works as expected", { - + skip_on_cran() - + p <- plot_polygon_data(spdf, list(id_var = 'area_id', response_var = 'response')) expect_error(plot_polygon_data(polys, list(id_var = 'area_id', response_var = 'response'))) expect_is(p, 'ggplot') - + p2 <- plot_polygon_data(spdf2, list(id_var = 'area_id', response_var = 'n_positive')) expect_is(p2, 'ggplot') - + }) test_that("Check plot.disag.data function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - - test_data2 <- prepare_data(polygon_shapefile = spdf2, + + test_data2 <- prepare_data(polygon_shapefile = spdf2, covariate_rasters = cov_stack, response_var = 'n_positive') - + p <- plot(test_data) - + expect_is(p, 'list') expect_equal(length(p), 3) expect_equal(names(p), c('polygon', 'covariates', 'mesh')) - + p2 <- plot(test_data2) - + expect_is(p2, 'list') expect_equal(length(p2), 3) expect_equal(names(p2), c('polygon', 'covariates', 'mesh')) - + p3 <- plot(test_data, which = c(1,3)) - + expect_is(p3, 'list') expect_equal(length(p3), 2) expect_equal(names(p3), c('polygon', 'mesh')) - + }) test_that("Check plot.disag_model function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - - fit_result <- disag_model(test_data, iterations = 2) - - fit_result_nofield <- disag_model(test_data, iterations = 2, field = FALSE) - + + fit_result <- disag_model(test_data, iterations = 10) + + fit_result_nofield <- disag_model(test_data, iterations = 10, field = FALSE) + p1 <- plot(fit_result) - + p2 <- plot(fit_result_nofield) - + expect_is(p1, 'list') expect_equal(length(p1), 2) - + expect_is(p2, 'list') expect_equal(length(p2), 2) - - + + }) test_that("Check plot.disag_prediction function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - - fit_result <- disag_model(test_data, iterations = 2) - + + fit_result <- disag_model(test_data, iterations = 1000, + iid = TRUE, + field = TRUE, + family = 'poisson', + link = 'log', + priors = list(priormean_intercept = 0, + priorsd_intercept = 0.1, + priormean_slope = 0.0, + priorsd_slope = 0.1, + prior_rho_min = 5, + prior_rho_prob = 0.01, + prior_sigma_max = 0.1, + prior_sigma_prob = 0.01, + prior_iideffect_sd_max = 0.0001, + prior_iideffect_sd_prob = 0.01)) pred <- predict(fit_result) p <- plot(pred) - - expect_is(p, 'trellis') - + + expect_is(p, 'gg') + }) diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index 5da19a9..d2cd848 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -1,252 +1,270 @@ context("Predict model") -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -} - -polys <- do.call(raster::spPolygons, polygons) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - -# Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) - -if(identical(Sys.getenv("NOT_CRAN"), "true")) { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack) -} else { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack, - makeMesh = FALSE) -} - test_that("Check predict.disag_model function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - - result <- disag_model(test_data, iterations = 2) + + result <- disag_model(test_data, iterations = 1000, + iid = TRUE, + field = TRUE, + family = 'poisson', + link = 'log', + priors = list(priormean_intercept = 0, + priorsd_intercept = 0.1, + priormean_slope = 0.0, + priorsd_slope = 0.1, + prior_rho_min = 5, + prior_rho_prob = 0.01, + prior_sigma_max = 0.1, + prior_sigma_prob = 0.01, + prior_iideffect_sd_max = 0.0001, + prior_iideffect_sd_prob = 0.01)) pred2 <- predict(result) - + expect_is(pred2, 'disag_prediction') expect_equal(length(pred2), 2) expect_equal(names(pred2), c('mean_prediction', 'uncertainty_prediction')) - + expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) - expect_is(pred2$mean_prediction$prediction, 'Raster') - expect_is(pred2$mean_prediction$field, 'Raster') + expect_is(pred2$mean_prediction$prediction, 'SpatRaster') + expect_is(pred2$mean_prediction$field, 'SpatRaster') expect_true(is.null(pred2$mean_prediction$iid)) - expect_is(pred2$mean_prediction$covariates, 'Raster') - + expect_is(pred2$mean_prediction$covariates, 'SpatRaster') + expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) - expect_is(pred2$uncertainty_prediction$realisations, 'RasterStack') - expect_is(pred2$uncertainty_prediction$predictions_ci, 'RasterBrick') - expect_equal(raster::nlayers(pred2$uncertainty_prediction$realisations), 100) - expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) + expect_is(pred2$uncertainty_prediction$realisations, 'SpatRaster') + expect_is(pred2$uncertainty_prediction$predictions_ci, 'SpatRaster') + expect_equal(terra::nlyr(pred2$uncertainty_prediction$realisations), 100) + expect_equal(terra::nlyr(pred2$uncertainty_prediction$predictions_ci), 2) pred2 <- predict(result, predict_iid = TRUE, N = 10) - + expect_is(pred2, 'disag_prediction') expect_equal(length(pred2), 2) expect_equal(names(pred2), c('mean_prediction', 'uncertainty_prediction')) - + expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) expect_equal(names(pred2$mean_prediction), c('prediction', 'field', 'iid', 'covariates')) - expect_is(pred2$mean_prediction$prediction, 'Raster') - expect_is(pred2$mean_prediction$field, 'Raster') - expect_is(pred2$mean_prediction$iid, 'Raster') - expect_is(pred2$mean_prediction$covariates, 'Raster') - + expect_is(pred2$mean_prediction$prediction, 'SpatRaster') + expect_is(pred2$mean_prediction$field, 'SpatRaster') + expect_is(pred2$mean_prediction$iid, 'SpatRaster') + expect_is(pred2$mean_prediction$covariates, 'SpatRaster') + expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) - expect_is(pred2$uncertainty_prediction$realisations, 'RasterStack') - expect_is(pred2$uncertainty_prediction$predictions_ci, 'RasterBrick') - expect_equal(raster::nlayers(pred2$uncertainty_prediction$realisations), 10) - expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) - - + expect_is(pred2$uncertainty_prediction$realisations, 'SpatRaster') + expect_is(pred2$uncertainty_prediction$predictions_ci, 'SpatRaster') + expect_equal(terra::nlyr(pred2$uncertainty_prediction$realisations), 10) + expect_equal(terra::nlyr(pred2$uncertainty_prediction$predictions_ci), 2) + + # For a model with no field or iid - - result <- disag_model(test_data, iterations = 2, field = FALSE, iid = FALSE) - + + result <- disag_model(test_data, iterations = 100, field = FALSE, iid = FALSE) + pred2 <- predict(result) - + expect_is(pred2, 'disag_prediction') expect_equal(length(pred2), 2) expect_equal(names(pred2), c('mean_prediction', 'uncertainty_prediction')) - + expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) - expect_is(pred2$mean_prediction$prediction, 'Raster') + expect_is(pred2$mean_prediction$prediction, 'SpatRaster') expect_true(is.null(pred2$mean_prediction$field)) expect_true(is.null(pred2$mean_prediction$iid)) - expect_is(pred2$mean_prediction$covariates, 'Raster') - + expect_is(pred2$mean_prediction$covariates, 'SpatRaster') + expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) - expect_is(pred2$uncertainty_prediction$realisations, 'RasterStack') - expect_is(pred2$uncertainty_prediction$predictions_ci, 'RasterBrick') - expect_equal(raster::nlayers(pred2$uncertainty_prediction$realisations), 100) - expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) - + expect_is(pred2$uncertainty_prediction$realisations, 'SpatRaster') + expect_is(pred2$uncertainty_prediction$predictions_ci, 'SpatRaster') + expect_equal(terra::nlyr(pred2$uncertainty_prediction$realisations), 100) + expect_equal(terra::nlyr(pred2$uncertainty_prediction$predictions_ci), 2) + }) test_that("Check predict.disag_model function works with newdata", { - + skip_if_not_installed('INLA') skip_on_cran() - - result <- disag_model(test_data, field = FALSE, iid = TRUE, iterations = 2) - - newdata <- raster::crop(raster::stack(r, r2), c(0, 10, 0, 10)) + + result <- disag_model(test_data, field = FALSE, iid = TRUE, iterations = 100, + priors = list(priormean_intercept = 0, + priorsd_intercept = 1, + priormean_slope = 0.0, + priorsd_slope = 0.4, + prior_rho_min = 1, + prior_rho_prob = 0.01, + prior_sigma_max = 0.1, + prior_sigma_prob = 0.01, + prior_iideffect_sd_max = 0.0001, + prior_iideffect_sd_prob = 0.01)) + + newdata <- terra::crop(c(r, r2), c(0, 10, 0, 10)) + names(newdata) <- c('layer1', 'layer2') pred1 <- predict(result) pred2 <- predict(result, newdata, predict_iid = TRUE, N = 5) - + expect_is(pred2, 'disag_prediction') expect_equal(length(pred2), 2) expect_equal(names(pred2), c('mean_prediction', 'uncertainty_prediction')) - + expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) expect_equal(names(pred2$mean_prediction), c('prediction', 'field', 'iid', 'covariates')) - expect_is(pred2$mean_prediction$prediction, 'Raster') + expect_is(pred2$mean_prediction$prediction, 'SpatRaster') expect_true(is.null(pred2$mean_prediction$field)) - expect_is(pred2$mean_prediction$iid, 'Raster') - expect_is(pred2$mean_prediction$covariates, 'Raster') - + expect_is(pred2$mean_prediction$iid, 'SpatRaster') + expect_is(pred2$mean_prediction$covariates, 'SpatRaster') + expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) - expect_is(pred2$uncertainty_prediction$realisations, 'RasterStack') - expect_is(pred2$uncertainty_prediction$predictions_ci, 'RasterBrick') - expect_equal(raster::nlayers(pred2$uncertainty_prediction$realisations), 5) - expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) - - expect_false(identical(raster::extent(pred1$mean_prediction$prediction), raster::extent(pred2$mean_prediction$prediction))) - expect_false(identical(raster::extent(pred1$uncertainty_prediction$realisations), raster::extent(pred2$uncertainty_prediction$realisations))) - + expect_is(pred2$uncertainty_prediction$realisations, 'SpatRaster') + expect_is(pred2$uncertainty_prediction$predictions_ci, 'SpatRaster') + expect_equal(terra::nlyr(pred2$uncertainty_prediction$realisations), 5) + expect_equal(terra::nlyr(pred2$uncertainty_prediction$predictions_ci), 2) + + expect_false(identical(terra::ext(pred1$mean_prediction$prediction), terra::ext(pred2$mean_prediction$prediction))) + expect_false(identical(terra::ext(pred1$uncertainty_prediction$realisations), terra::ext(pred2$uncertainty_prediction$realisations))) + }) test_that('Check that check_newdata works', { - + skip_if_not_installed('INLA') skip_on_cran() - - result <- disag_model(test_data, field = FALSE, iterations = 2) - - newdata <- raster::crop(raster::stack(r, r2), c(0, 10, 0, 10)) + + result <- disag_model(test_data, field = FALSE, iterations = 100) + + newdata <- terra::crop(c(r, r2), c(0, 10, 0, 10)) + names(newdata) <- c('layer1', 'layer2') + nd1 <- check_newdata(newdata, result) - expect_is(nd1, 'RasterBrick') - + expect_is(nd1, 'SpatRaster') + nn <- newdata[[1]] - names(nn) <- 'extra_uneeded' - newdata2 <- raster::stack(newdata, nn) + names(nn) <- 'extra_unneeded' + newdata2 <- c(newdata, nn) expect_error(check_newdata(newdata2, result), NA) newdata3 <- newdata[[1]] expect_error(check_newdata(newdata3, result), 'All covariates') - + newdata4 <- result$data$covariate_data expect_error(check_newdata(newdata4, result), 'newdata should be NULL or') - - + + }) test_that('Check that setup_objects works', { - + skip_if_not_installed('INLA') skip_on_cran() - - result <- disag_model(test_data, iterations = 2) - + + result <- disag_model(test_data, iterations = 100, + iid = TRUE, + field = TRUE, + priors = list(priormean_intercept = 0, + priorsd_intercept = 1, + priormean_slope = 0.0, + priorsd_slope = 0.4, + prior_rho_min = 1, + prior_rho_prob = 0.01, + prior_sigma_max = 0.1, + prior_sigma_prob = 0.01, + prior_iideffect_sd_max = 0.01, + prior_iideffect_sd_prob = 0.01)) + objects <- setup_objects(result) - + expect_is(objects, 'list') expect_equal(length(objects), 3) expect_equal(names(objects), c('covariates', 'field_objects', 'iid_objects')) expect_is(objects$field_objects, 'list') expect_true(is.null(objects$iid_objects)) - newdata <- raster::crop(raster::stack(r, r2), c(0, 180, -90, 90)) + newdata <- terra::crop(c(r, r2), c(0, 180, -90, 90)) + names(newdata) <- c('layer1', 'layer2') objects2 <- setup_objects(result, newdata) - + expect_is(objects2, 'list') expect_equal(length(objects2), 3) expect_equal(names(objects2), c('covariates', 'field_objects', 'iid_objects')) expect_is(objects2$field_objects, 'list') expect_true(is.null(objects$iid_objects)) - + objects3 <- setup_objects(result, predict_iid = TRUE) - + expect_is(objects3, 'list') expect_equal(length(objects3), 3) expect_equal(names(objects3), c('covariates', 'field_objects', 'iid_objects')) expect_is(objects3$field_objects, 'list') expect_is(objects3$iid_objects, 'list') - + }) test_that('Check that predict_single_raster works', { - + skip_if_not_installed('INLA') skip_on_cran() - - result <- disag_model(test_data, iterations = 2) - + + result <- disag_model(test_data, iterations = 100, + iid = TRUE, + field = TRUE, + priors = list(priormean_intercept = 0, + priorsd_intercept = 1, + priormean_slope = 0.0, + priorsd_slope = 0.4, + prior_rho_min = 1, + prior_rho_prob = 0.01, + prior_sigma_max = 0.1, + prior_sigma_prob = 0.01, + prior_iideffect_sd_max = 0.01, + prior_iideffect_sd_prob = 0.01)) + objects <- setup_objects(result) - + pars <- result$obj$env$last.par.best pars <- split(pars, names(pars)) - - pred2 <- predict_single_raster(pars, + + pred2 <- predict_single_raster(pars, objects = objects, link_function = result$model_setup$link) - + expect_is(pred2, 'list') expect_equal(length(pred2), 4) expect_equal(names(pred2), c('prediction', 'field', 'iid', 'covariates')) - expect_is(pred2$prediction, 'Raster') - expect_is(pred2$field, 'Raster') + expect_is(pred2$prediction, 'SpatRaster') + expect_is(pred2$field, 'SpatRaster') expect_true(is.null(pred2$iid)) - expect_is(pred2$covariates, 'Raster') - + expect_is(pred2$covariates, 'SpatRaster') + objects2 <- setup_objects(result, predict_iid = TRUE) - - pred2 <- predict_single_raster(pars, + + pred2 <- predict_single_raster(pars, objects = objects2, link_function = result$model_setup$link) - + expect_is(pred2, 'list') expect_equal(length(pred2), 4) expect_equal(names(pred2), c('prediction', 'field', 'iid', 'covariates')) - expect_is(pred2$prediction, 'Raster') - expect_is(pred2$field, 'Raster') - expect_is(pred2$iid, 'Raster') - expect_is(pred2$covariates, 'Raster') - + expect_is(pred2$prediction, 'SpatRaster') + expect_is(pred2$field, 'SpatRaster') + expect_is(pred2$iid, 'SpatRaster') + expect_is(pred2$covariates, 'SpatRaster') + }) diff --git a/tests/testthat/test-prepare-data.R b/tests/testthat/test-prepare-data.R index 7cdcee8..f71c4dd 100644 --- a/tests/testthat/test-prepare-data.R +++ b/tests/testthat/test-prepare-data.R @@ -1,53 +1,19 @@ - context("Preparing data") -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -} - -polys <- do.call(raster::spPolygons, polygons) -N <- floor(runif(n_polygons, min = 1, max = 100)) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) -response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) - -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -spdf_na <- sp::SpatialPolygonsDataFrame(polys, response_na_df) -spdf_binom <- sp::SpatialPolygonsDataFrame(polys, response_binom_df) - -# Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) - - test_that("Check prepare_data function works as expected", { - - skip_if_not_installed('INLA') + skip_on_cran() - - result <- prepare_data(polygon_shapefile = spdf, + + result <- prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack) - + expect_is(result, 'disag_data') expect_equal(length(result), 10) - expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', + expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', 'aggregation_pixels', 'coordsForFit', 'coordsForPrediction', 'startendindex', 'mesh')) - expect_is(result$polygon_shapefile, 'SpatialPolygonsDataFrame') + expect_is(result$polygon_shapefile, 'sf') expect_is(result$shapefile_names, 'list') - expect_is(result$covariate_rasters, c('RasterBrick', 'RasterStack')) + expect_is(result$covariate_rasters, 'SpatRaster') expect_is(result$polygon_data, 'data.frame') expect_is(result$covariate_data, 'data.frame') expect_is(result$aggregation_pixels, 'numeric') @@ -58,25 +24,25 @@ test_that("Check prepare_data function works as expected", { expect_equal(sum(is.na(result$polygon_data$N)), length(result$polygon_data$N)) expect_equal(nrow(result$polygon_data), nrow(result$startendindex)) expect_equal(nrow(result$covariate_data), nrow(result$coordsForFit)) - + }) test_that("Check prepare_data function with sample size works as expected", { - + skip_on_cran() - - result <- prepare_data(polygon_shapefile = spdf_binom, + + result <- prepare_data(polygon_shapefile = spdf_binom, covariate_rasters = cov_stack, sample_size_var = 'sample_size', makeMesh = FALSE) - + expect_is(result, 'disag_data') expect_equal(length(result), 10) - expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', + expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', 'aggregation_pixels', 'coordsForFit', 'coordsForPrediction', 'startendindex', 'mesh')) - expect_is(result$polygon_shapefile, 'SpatialPolygonsDataFrame') + expect_is(result$polygon_shapefile, 'sf') expect_is(result$shapefile_names, 'list') - expect_is(result$covariate_rasters, c('RasterBrick', 'RasterStack')) + expect_is(result$covariate_rasters, 'SpatRaster') expect_is(result$polygon_data, 'data.frame') expect_is(result$covariate_data, 'data.frame') expect_is(result$aggregation_pixels, 'numeric') @@ -87,36 +53,36 @@ test_that("Check prepare_data function with sample size works as expected", { expect_equal(sum(is.na(result$polygon_data$N)), 0) expect_equal(nrow(result$polygon_data), nrow(result$startendindex)) expect_equal(nrow(result$covariate_data), nrow(result$coordsForFit)) - + }) test_that("Check prepare_data function deals with NAs as expected", { - + skip_on_cran() - + cov_stack_na <- cov_stack cov_stack_na[[1]][c(1:10)] <- NA - + aggregation_raster_na <- r aggregation_raster_na[c(1:10)] <- NA - + expect_error(prepare_data(polygon_shapefile = spdf_na, covariate_rasters = cov_stack, makeMesh = FALSE)) expect_error(prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack_na, makeMesh = FALSE)) expect_error(prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack, aggregation_raster = aggregation_raster_na, makeMesh = FALSE)) - - result <- prepare_data(polygon_shapefile = spdf_na, + + result <- prepare_data(polygon_shapefile = spdf_na, covariate_rasters = cov_stack_na, aggregation_raster = aggregation_raster_na, na.action = TRUE, makeMesh = FALSE) - + expect_is(result, 'disag_data') expect_equal(length(result), 10) - expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', + expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', 'aggregation_pixels', 'coordsForFit', 'coordsForPrediction', 'startendindex', 'mesh')) - expect_is(result$polygon_shapefile, 'SpatialPolygonsDataFrame') + expect_is(result$polygon_shapefile, 'sf') expect_is(result$shapefile_names, 'list') - expect_is(result$covariate_rasters, c('RasterBrick', 'RasterStack')) + expect_is(result$covariate_rasters, 'SpatRaster') expect_is(result$polygon_data, 'data.frame') expect_is(result$covariate_data, 'data.frame') expect_is(result$aggregation_pixels, 'numeric') @@ -133,43 +99,40 @@ test_that("Check prepare_data function deals with NAs as expected", { test_that("Check as.disag_data function works as expected", { - + skip_on_cran() - + polygon_data <- getPolygonData(spdf, id_var = 'area_id', response_var = 'response') - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - cov_data <- parallelExtract(cov_stack, spdf, fun = NULL, id = 'area_id') - parallel::stopCluster(cl) - foreach::registerDoSEQ() - + + cov_data <- terra::extract(cov_stack, spdf, cells=TRUE, na.rm=TRUE, ID=TRUE) + names(cov_data)[1] <- 'area_id' + aggregation_data <- rep(1, nrow(cov_data)) - + coordsForFit <- extractCoordsForMesh(cov_stack, cov_data$cellid) - + coordsForPrediction <- extractCoordsForMesh(cov_stack) - + startendindex <- getStartendindex(cov_data, polygon_data, 'area_id') - - result <- as.disag_data(spdf, + + result <- as.disag_data(spdf, list('area_id', 'response'), cov_stack, - polygon_data, - cov_data, + polygon_data, + cov_data, aggregation_data, - coordsForFit, + coordsForFit, coordsForPrediction, startendindex, mesh = NULL) - + expect_is(result, 'disag_data') expect_equal(length(result), 10) - expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', + expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', 'aggregation_pixels', 'coordsForFit', 'coordsForPrediction', 'startendindex', 'mesh')) - expect_is(result$polygon_shapefile, 'SpatialPolygonsDataFrame') + expect_is(result$polygon_shapefile, 'sf') expect_is(result$shapefile_names, 'list') - expect_is(result$covariate_rasters, c('RasterBrick', 'RasterStack')) + expect_is(result$covariate_rasters, 'SpatRaster') expect_is(result$polygon_data, 'data.frame') expect_is(result$covariate_data, 'data.frame') expect_is(result$aggregation_pixels, 'numeric') @@ -179,6 +142,6 @@ test_that("Check as.disag_data function works as expected", { expect_true(is.null(result$mesh)) expect_equal(nrow(result$polygon_data), nrow(result$startendindex)) expect_equal(nrow(result$covariate_data), nrow(result$coordsForFit)) - + }) diff --git a/tests/testthat/test-summary.R b/tests/testthat/test-summary.R index 7b8c574..0538441 100644 --- a/tests/testthat/test-summary.R +++ b/tests/testthat/test-summary.R @@ -1,45 +1,11 @@ context("Summary functions") -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -} - -polys <- do.call(raster::spPolygons, polygons) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - -# Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) - -if(identical(Sys.getenv("NOT_CRAN"), "true")) { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack) -} else { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack, - makeMesh = FALSE) -} - test_that("Check summary.disag_data function works as expected", { - + skip_on_cran() - + data_summary <- summary(test_data) - + expect_is(data_summary, 'list') expect_equal(length(data_summary), 3) expect_equal(names(data_summary), c('number_polygons', 'number_covariates', 'covariate_summary')) @@ -47,29 +13,29 @@ test_that("Check summary.disag_data function works as expected", { expect_is(data_summary$number_covariates, 'integer') expect_is(data_summary$covariate_summary, 'table') expect_equal(ncol(data_summary$covariate_summary), data_summary$number_covariates) - + }) test_that("Check print.disag_data function works as expected", { - + skip_on_cran() - + print_output <- print(test_data) - + expect_is(print_output, 'disag_data') expect_equal(print_output, test_data) - + }) test_that("Check summary.disag_model function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, field = FALSE, iterations = 2) - + model_summary <- summary(result) - + expect_is(model_summary, 'list') expect_equal(length(model_summary), 3) expect_equal(names(model_summary), c('model_params', 'nll', 'metrics')) @@ -77,55 +43,75 @@ test_that("Check summary.disag_model function works as expected", { expect_is(model_summary$nll, 'numeric') expect_is(model_summary$metrics, 'data.frame') expect_equal(dim(model_summary$metrics), c(1, 5)) - + }) test_that("Check print.disag_model function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - + result <- disag_model(test_data, field = FALSE, iterations = 2) - + print_output <- print(result) - + expect_is(print_output, 'disag_model') expect_equal(print_output, result) - + }) test_that("Check summary.disag_predictions function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - - result <- disag_model(test_data, iid = FALSE, iterations = 2) - + + result <- disag_model(test_data, iid = FALSE, iterations = 100, + list(priormean_intercept = 0, + priorsd_intercept = 0.1, + priormean_slope = 0.0, + priorsd_slope = 0.1, + prior_rho_min = 5, + prior_rho_prob = 0.01, + prior_sigma_max = 0.1, + prior_sigma_prob = 0.01, + prior_iideffect_sd_max = 0.00001, + prior_iideffect_sd_prob = 0.01)) + pred <- predict(result) - + model_summary <- summary(pred) - + expect_is(model_summary, 'list') expect_equal(length(model_summary), 3) expect_equal(names(model_summary), c('number_realisations', 'range_mean_values', 'range_iqr_values')) expect_is(model_summary$number_realisations, 'integer') expect_is(model_summary$range_mean_values, 'numeric') expect_is(model_summary$range_iqr_values, 'numeric') - + }) test_that("Check print.disag_predictions function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - - result <- disag_model(test_data, iid = FALSE, iterations = 2) - + + result <- disag_model(test_data, iid = FALSE, iterations = 100, + list(priormean_intercept = 0, + priorsd_intercept = 0.1, + priormean_slope = 0.0, + priorsd_slope = 0.1, + prior_rho_min = 5, + prior_rho_prob = 0.01, + prior_sigma_max = 0.1, + prior_sigma_prob = 0.01, + prior_iideffect_sd_max = 0.0001, + prior_iideffect_sd_prob = 0.01)) + pred <- predict(result) - + print_output <- print(pred) - + expect_is(print_output, 'disag_prediction') expect_equal(print_output, pred) - -}) \ No newline at end of file + +}) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 2436e18..9892b98 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -8,32 +8,38 @@ vignette: > %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- - + ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", cache = TRUE, fig.width = 7, - eval = FALSE + eval = TRUE ) ``` -```{r, echo=FALSE} +```{r, echo = FALSE, eval = TRUE} isINLA <- requireNamespace('INLA', quietly = TRUE) ``` -The **disaggregation** package contains functions to run Bayesian disaggregation models. Aggregated response data over large heterongenous regions can be used alongside fine-scale covariate information to predict fine-scale response across the region. The github page for this package can be found [here](https://github.com/aknandi/disaggregation). +The **disaggregation** package contains functions to run Bayesian disaggregation models. Aggregated response data over large heterogeneous regions can be used alongside fine-scale covariate information to predict fine-scale response across the region. The github page for this package can be found [here](https://github.com/aknandi/disaggregation). + +Install **disaggregation** using: -Install **disggregation** using: +```r +install.packages('disaggregation') +``` +or from github using + ```r devtools::install_github("aknandi/disaggregation") ``` The key functions are `prepare_data`, `fit_model` and `predict`. The `prepare_data` function takes the aggregated data and covariate data to be used in the model and produces an object to be use by `fit_model`. This functions runs the disaggregation model and the out can be passed to `predict` to produce fine-scale predicted maps of the response variable. -To use the disaggregation `prepare_data` fuction, you must have the aggregated data as a `SpatialPolygonDataFrame` object and a `RasterStack` of the covariate data to be used in the model. +To use the disaggregation `prepare_data` function, you must have the aggregated data as a `sf` object and a `SpatRaster` of the covariate data to be used in the model. ## Example @@ -42,101 +48,120 @@ We will demonstrate an example of the **disaggregation** package using areal dat ```{r} library(SpatialEpi, quietly = TRUE) library(dplyr, quietly = TRUE) -library(sp, quietly = TRUE) -library(raster, quietly = TRUE) library(disaggregation, quietly = TRUE) +library(ggplot2) +library(sf) +library(terra) + +polygons <- sf::st_as_sf(NYleukemia$spatial.polygon) + +df <- cbind(polygons, NYleukemia$data) + -map <- NYleukemia$spatial.polygon -df <- NYleukemia$data +ggplot() + geom_sf(data = df, aes(fill = cases / population)) + scale_fill_viridis_c(lim = c(0, 0.003)) -polygon_data <- SpatialPolygonsDataFrame(map, df) -polygon_data ``` -Now we simulate two covariate rasters for the area of interest and make a `RasterStack`. They are simulated at the resolution of approximately 1km2. +Now we simulate two covariate rasters for the area of interest and make a two-layered `SpatRaster`. They are simulated at the resolution of approximately 1km2. ```{r, fig.show='hold'} -extent_in_km <- 111*(polygon_data@bbox[, 2] - polygon_data@bbox[, 1]) + +bbox <- sf::st_bbox(df) + +extent_in_km <- 111*(bbox[c(3, 4)] - bbox[c(1, 2)]) n_pixels_x <- floor(extent_in_km[[1]]) n_pixels_y <- floor(extent_in_km[[2]]) -r <- raster::raster(ncol = n_pixels_x, nrow = n_pixels_y) -r <- raster::setExtent(r, raster::extent(polygon_data)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_x != 0, x %% n_pixels_x, n_pixels_x), 3)) -r2 <- raster::raster(ncol = n_pixels_x, nrow = n_pixels_y) -r2 <- raster::setExtent(r2, raster::extent(polygon_data)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_y), 3)) -cov_stack <- raster::stack(r, r2) -cov_stack <- raster::scale(cov_stack) + +r <- terra::rast(ncols = n_pixels_x, nrows = n_pixels_y) +terra::ext(r) <- terra::ext(df) + +data_generate <- function(x){ + rnorm(1, ifelse(x %% n_pixels_x != 0, x %% n_pixels_x, n_pixels_x), 3) +} + +terra::values(r) <- sapply(seq(terra::ncell(r)), data_generate) +r2 <- terra::rast(ncol = n_pixels_x, nrow = n_pixels_y) +terra::ext(r2) <- terra::ext(df) +terra::values(r2) <- sapply(seq(terra::ncell(r2)), + function(x) rnorm(1, ceiling(x/n_pixels_y), 3)) + + +cov_stack <- terra::rast(list(r, r2)) +cov_stack <- terra::scale(cov_stack) +names(cov_stack) <- c('layer1', 'layer2') + ``` We also create a population raster. This is to allow the model to correctly aggregated the pixel values to the polygon level. For this simple example we assume that the population within each polygon is uniformly distributed. ```{r, fig.show='hold'} -extracted <- raster::extract(r, polygon_data) -n_cells <- sapply(extracted, length) -polygon_data@data$pop_per_cell <- polygon_data@data$population/n_cells -pop_raster <- rasterize(polygon_data, cov_stack, field = 'pop_per_cell') +extracted <- terra::extract(r, terra::vect(df$geometry), fun = sum) +n_cells <- terra::extract(r, terra::vect(df$geometry), fun = length) +df$pop_per_cell <- df$population/n_cells$lyr.1 +pop_raster <- terra::rasterize(terra::vect(df), cov_stack, field = 'pop_per_cell') ``` -To correct small inconsistencies in the polygon geometry, we run the line below +To correct small inconsistencies in the polygon geometry, we run the code below. ```{r, fig.show='hold'} -polygon_data <- rgeos::gBuffer(polygon_data, byid = TRUE, width = 0) +df <- sf::st_buffer(df, dist = 0) ``` -Now we have setup the data we can use the `prepare_data` function to create the objects needed to run the disaggregation model. The name of the response variable and id variable in the `SpatialPolygonsDataFrame` should be specified. +Now we have setup the data we can use the `prepare_data` function to create the objects needed to run the disaggregation model. The name of the response variable and id variable in the `sf` object should be specified. -The user can also control the parameters of the mesh that is used to create the spatial field. The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest and having a small region outside with a coarser mesh to avoid edge effects. The mesh parameters: `concave`, `convex` and `resolution` refer to the parameters used to create the mesh boundary using the [inla.noncovex.hull function](https://rdrr.io/github/andrewzm/INLA/man/inla.nonconvex.hull.html), while the mesh parameters `max.edge`, `cut` and `offset` refer to the parameters used to create the mesh using the [inla.mesh.2d function](https://rdrr.io/github/andrewzm/INLA/man/inla.mesh.2d.html). +The user can also control the parameters of the mesh that is used to create the spatial field. The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest and having a small region outside with a coarser mesh to avoid edge effects. The mesh parameters: `concave`, `convex` and `resolution` refer to the parameters used to create the mesh boundary using the [fm_nonconvex_hull_inla function](https://rdrr.io/cran/fmesher/man/fm_nonconvex_hull_inla.html), while the mesh parameters `max.edge`, `cut` and `offset` refer to the parameters used to create the mesh using the [fm_mesh_2d function](https://rdrr.io/cran/fmesher/man/fm_mesh_2d.html). -```{r, fig.show='hold'} -data_for_model <- prepare_data(polygon_data, - cov_stack, - pop_raster, +```{r, fig.show='hold', eval= isINLA} +data_for_model <- prepare_data(polygon_shapefile = df, + covariate_rasters = cov_stack, + aggregation_raster = pop_raster, response_var = 'cases', id_var = 'censustract.FIPS', mesh.args = list(cut = 0.01, offset = c(0.1, 0.5), max.edge = c(0.1, 0.2), resolution = 250), - na.action = TRUE, - ncores = 1) + na.action = TRUE) ``` -```{r, fig.show='hold'} +```{r, fig.show='hold', eval= isINLA} plot(data_for_model) ``` -Now have our data object we are ready to run the model. Here we can specify the likelihood function as gaussian, binomial or poisson, and we can specify the link function as logit, log or identity. The disaggregation model makes predictions at the pixel level: - -$link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i$ - -where $X$ are the covariates, $GP$ is the gaussian random field and $u_i$ is the iid random effect. The pixel predictions are then aggregated to the polygon level using the weighted sum (via the aggregation raster, $agg_i$): - -$cases_j = \sum_{i \epsilon j} pred_i \times agg_i$ - -$rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}$ - -The different likelihood correspond to slightly different models ($y_j$ is the repsonse count data): - -**Gaussian** ($\sigma_j$ is the dispersion of the polygon data), +Now we have our data object we are ready to run the model. Here we can specify +the likelihood function as Gaussian, binomial or poisson, and we can specify +the link function as logit, log or identity. The disaggregation model makes +predictions at the pixel level: + + $link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i$ + +where $X$ are the covariates, $GP$ is the Gaussian random field and $u_i$ is the iid random effect. The pixel predictions are then aggregated to the polygon level using the weighted sum (via the aggregation raster, $agg_i$): + + $cases_j = \sum_{i \epsilon j} pred_i \times agg_i$ + + $rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}$ + + The different likelihood correspond to slightly different models ($y_j$ is the response count data): + + **Gaussian** ($\sigma_j$ is the dispersion of the polygon data), $dnorm(y_j/\sum agg_i, rate_j, \sigma_j)$ - -Here $\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i$, where $\sigma$ is the dispersion of the pixel data, a parameter learnt by the model. + + Here $\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i$, where $\sigma$ is the dispersion of the pixel data, a parameter learnt by the model. **Binomial** (For a survey in polygon j, $y_j$ is the number positive and $N_j$ is the number tested) $dbinom(y_j, N_j, rate_j)$ - -**Poisson** (predicts incidence count) + + **Poisson** (predicts incidence count) $dpois(y_j, cases_j)$ + + The user can also specify the priors for the regression parameters. For the field, the user specifies the pc priors for the range, $\rho_{min}$ and $\rho_{prob}$, where $P(\rho < \rho_{min}) = \rho_{prob}$, and the variation, $\sigma_{min}$ and $\sigma_{prob}$, where $P(\sigma > \sigma_{min}) = \sigma_{prob}$, in the field. For the iid effect, the user also specifies pc priors. -The user can also specify the priors for the regression parameters. For the field, the user specifies the pc priors for the range, $\rho_{min}$ and $\rho_{prob}$, where $P(\rho < \rho_{min}) = \rho_{prob}$, and the variation, $\sigma_{min}$ and $\sigma_{prob}$, where $P(\sigma > \sigma_{min}) = \sigma_{prob}$, in the field. For the iid effect, the user also specifies pc priors. - -By default the model contains a spatial field and a polygon iid effect. These can be turned off in the `fit_model` function, using `field = FALSE` or `iid = FALSE`. +By default the model contains a spatial field and a polygon iid effect. These can be turned off in the `disag_model` function, using `field = FALSE` or `iid = FALSE`. ```{r, fig.show='hold', eval=isINLA} @@ -160,7 +185,7 @@ model_result <- disag_model(data_for_model, plot(model_result) ``` -Now we have the results from the model of the fitted parameters, we can predict Leukemia incidence rate at fine-scale (the scale of the covariate data) across New York. The `predict` function takes the model result and predicts both the mean raster surface and predicts and summarises `N` parameter draws, where `N` is set by the user (default 100). The uncertainty is summarirised via the confidence interval set by the user (default 95% CI). +Now we have the results from the model of the fitted parameters, we can predict Leukemia incidence rate at fine-scale (the scale of the covariate data) across New York. The `predict` function takes the model result and predicts both the mean raster surface and predicts and summarises `N` parameter draws, where `N` is set by the user (default 100). The uncertainty is summarised via the confidence interval set by the user (default 95% CI). ```{r, fig.show='hold', eval=isINLA}