Skip to content

Commit

Permalink
Merge pull request #53 from afsc-gap-products/dev
Browse files Browse the repository at this point in the history
Update version to 3.3-1
  • Loading branch information
sean-rohan-NOAA authored Feb 23, 2024
2 parents 78c7627 + 9e06d07 commit 6a33e38
Show file tree
Hide file tree
Showing 23 changed files with 2,375 additions and 2 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,9 @@ vignettes/*.pdf
/data/*.shp.xml
/data/*.cpg
/data/*.sbn
/data/*.nc
/analysis/temp3d/data
/analysis/temp3d/output
/analysis/temp3d/R/archive
/analysis/temp3d/plots
/analysis/temp3d/paper
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: coldpool
Type: Package
Title: AFSC/RACE Groundfish Assessment Program EBS and NBS temperature products
Version: 3.2-2
Version: 3.3-1
Authors@R: c(person("Sean", "Rohan", email = "sean.rohan@noaa.gov", role = c("aut", "cre")),
person("Lewis", "Barnett", email = "lewis.barnett@noaa.gov", role = c("aut", "ctb")),
person("Kelly", "Kearney", role = c("ctb")),
Expand Down Expand Up @@ -29,5 +29,6 @@ RoxygenNote: 7.2.3
Imports: colorspace,
viridis,
raster,
here
here,
tidyr
Suggests: knitr, rmarkdown, RODBC, getPass, readr, testthat, cowplot, metR
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@ export(cpa_pre2021)
export(ebs_bottom_temperature)
export(ebs_proj_crs)
export(ebs_surface_temperature)
export(estimate_anisotropy)
export(estimate_z_expansion)
export(get_connected)
export(get_data)
export(interpolate_variable)
export(interpolation_wrapper)
export(kriging_cv)
export(legend_discrete_cbar)
export(loocv_gear_temp)
export(make_raster_file)
Expand Down
12 changes: 12 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
coldpool 3.3-1 (November 13, 2023)
----------------------------------------------------------------

NEW FEATURE

- Added kriging_cv() function to compare kriging methods
using cross-validation for ordinary kriging,co-kriging, and
regression kriging.
- Added estimate_z_expansion() function to estimate the vertical
expansion factors for 3D kriging based on cross-validation.
- Added estimate_anisotropy() function for estimating 2D
anisotropy in variograms using cross-validation.
46 changes: 46 additions & 0 deletions R/estimate_anisotropy.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#' Estimate 2D variogram anisotropy parameters for kriging using 10-fold cross-validation
#'
#' Uses the coldpool::kriging_loocv() interface.
#'
#' @param x A data.frame containing variables to be interpolated and latitude and longitude coordinates.
#' @param variable_name 1L character vector indicating which varible to interpolate (e.g. 'TEMPERATURE')
#' @param latitude_name 1L character vector indicating the name of the latitude column (e.g., 'LATITUDE')
#' @param longitude_name 1L character vector indicating the name of the longitude column (e.g., 'LONGITUDE')
#' @param input_crs Character vector indicating the coordinate reference system for x (default = "WGS84")
#' @param interpolation_crs Coordinate reference system to use for interpolation.
#' @param kriging_formula Formula to use for kriging. Default (kriging_formula = variable_name ~ 1) is ordinary kriging without covariates. Formula for the gstat interface (see ?gstat::gstat). Covariate example: kriging_formula = variable_name ~ BOTTOM_DEPTH
#' @param starting_values Optional. Starting value of anisotropy parameters to use for optimization. See: ?gstat::vgm.
#' @param folds 1L numeric vector indicating how many folds to use for cross-validation.
#' @param nm Maximum number of nearest neighbor observations to use for interpolation.
#' @param vgm_width Optional. Width of variogram breaks.
#' @param variogram_model Character vector indicating which variogram model to use for interpolation. Valid options are exponential (exp), circular (cir), gaussian (gau), Bessel (bes), Matern (mat), or Stein's Matern (ste).
#' @param seed RNG seed (set.seed(seed)) to use for anisotropy estimation based on cross-validation.
#' @param only_return_anisotropy For internal use. Default = FALSE
#' @export

estimate_anisotropy <- function(x,
kriging_formula = variable_name ~ 1,
folds = 10,
variogram_model,
starting_values = NULL,
vgm_width = NULL,
nm = Inf,
seed = 19673) {

stopifnot("estimate_2d_anisotropy: Length of interpolation method must be 1." = length(variogram_model) == 1)
stopifnot("estimate_2d_anisotropy: variogram_model must be one of 'Exp', 'Cir', 'Gau', 'Sph', 'Mat', 'Bes', 'Ste'" = tolower(variogram_model) %in% c('exp', 'cir', 'gau', 'sph', 'mat', 'bes', 'ste'))

pars <- coldpool::kriging_loocv(x = x,
anisotropy_parameters = starting_values,
estimate_anisotropy = TRUE,
nm = nm,
vgm_width = vgm_width,
kriging_formula = kriging_formula,
location_formula = location_formula,
interpolation_methods = variogram_model,
seed = 19673,
only_return_anisotropy = TRUE)

return(pars)

}
230 changes: 230 additions & 0 deletions R/estimate_z_expansion.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
#' Estimate vertical expansion factor for 3D kriging
#'
#' Uses L-BFGS algorithm implemented in optim().
#'
#' @param z_start Starting value for vertical expansion estimation
#' @param x data.frame containing
#' @param cv_index Index of folds for cross validation
#' @param location_formula Formula to use for location argument to gstat. See gstat documentation for description of the formula interface (see ?gstat::gstat).
#' @param kriging_formula Formula to use for kriging. See gstat documentation for description of the formula interface (see ?gstat::gstat).
#' @param anisotopy_parameters Anisotropy parameters to use for ordinary kriging as a 5L vector. See: ?gstat::vgm. If NULL and estimate_anisotropy, anisotropy is estimated.
#' @param vgm_width Optional.
#' @param variogram_model Character vector indicating which variogram model to use for interpolation. Valid options are exponential (exp), circular (cir), gaussian (gau), Bessel (bes), Matern (mat), or Stein's Matern (ste).
#' @param use_for_mse Logical vector indicating whether to use an observation to calculate MSE for cross-validation.
#' @param z_limits Upper and lower bounds for z_expansion, for optimization.
#' @param nm Maximum number of nearest neighbor observations to use for interpolation.
#' @export

estimate_z_expansion <- function(x,
location_formula,
kriging_formula,
model,
z_start,
cv_index,
anisotropy_parameters = c(0,0,0,1,1),
vgm_width,
nm = Inf,
maxdist = Inf,
use_for_mse = rep(TRUE, length(cv_index)),
z_limits = c(10, 500000)) {



obj_fn <- function(log_z_mult,
dat,
cv_index,
anisotropy_parameters,
location_formula,
kriging_formula,
vgm_width,
model,
use_for_mse) {

dat[all.vars(location_formula)[3]] <- dat[all.vars(location_formula)[3]] * exp(log_z_mult)

mod <- gstat::gstat(formula = kriging_formula,
locations = location_formula,
data = dat,
nmax = nm,
maxdist = maxdist)

vario_both <- gstat::variogram(mod,
width = vgm_width,
alpha = c(0, 45, 90, 135))

vario_nugget <- gstat::fit.variogram(gstat::variogram(mod,
width = vgm_width),
gstat::vgm(model))

vario_fit <- gstat::fit.variogram(vario_both,
gstat::vgm(model = model,
anis = anisotropy_parameters,
nugget = vario_nugget$psill[1]))

mod <- gstat::gstat(formula = kriging_formula,
locations = location_formula,
data = dat,
nmax = nm,
model = vario_fit,
maxdist = maxdist)

cv_results <- gstat::gstat.cv(object = mod,
nfold = cv_index,
verbose = TRUE,
debug.level = 2)

mse <- mean(cv_results$residual[use_for_mse]^2)

return(mse)

}

results <- optim(par = log(z_start),
fn = obj_fn,
method = "L-BFGS-B",
dat = x,
control = list(trace = 5),
kriging_formula = kriging_formula,
location_formula = location_formula,
anisotropy_parameters = anisotropy_parameters,
lower = log(z_limits[1]),
upper = log(z_limits[2]),
cv_index = cv_index,
vgm_width = vgm_width,
model = model,
use_for_mse = use_for_mse
)

results$par <- exp(results$par)

return(results)

}



# estimate_z_expansion <- function(x,
# location_formula,
# kriging_formula,
# model,
# z_start,
# cv_index,
# anisotropy_parameters = c(0,0,0,1,1),
# vgm_parameters = NULL,
# vgm_width,
# nm = Inf,
# maxdist = Inf,
# use_for_mse = rep(TRUE, length(cv_index)),
# z_limits = c(10, 500000)) {
#
# obj_fn <- function(log_z_mult,
# dat,
# cv_index,
# anisotropy_parameters,
# location_formula,
# kriging_formula,
# vgm_parameters = NULL,
# vgm_width,
# model,
# use_for_mse) {
#
# dat[all.vars(location_formula)[3]] <- dat[all.vars(location_formula)[3]] * exp(log_z_mult)
#
# if(is.null(vgm_parameters)) {
# mod <- gstat::gstat(formula = kriging_formula,
# locations = location_formula,
# data = dat,
# nmax = nm,
# maxdist = maxdist)
#
# vario_both <- gstat::variogram(mod,
# width = vgm_width,
# alpha = c(0, 45, 90, 135))
#
# vario_nugget <- gstat::fit.variogram(gstat::variogram(mod,
# width = vgm_width),
# gstat::vgm(model))
#
# vario_fit <- gstat::fit.variogram(vario_both,
# gstat::vgm(model = model,
# anis = anisotropy_parameters,
# nugget = vario_nugget$psill[1]))
#
# mod <- gstat::gstat(formula = kriging_formula,
# locations = location_formula,
# data = dat,
# nmax = nm,
# maxdist = maxdist,
# model = vario_fit)
#
# cv_results <- gstat::gstat.cv(object = mod,
# nfold = cv_index,
# verbose = TRUE,
# debug.level = 2)
#
# mse <- mean(cv_results$residual[use_for_mse]^2)
#
# } else {
#
# stopifnot("estimate_z_expansion: vgm_parameters must be a list or named vector containing numeric values named nugget, psill, and range variables." = all(c("nugget", "psill", "range") %in% names(vgm_parameters)))
#
# vario_fixed <- gstat::vgm(model = model,
# psill = vgm_parameters[['psill']],
# range = vgm_parameters[['range']],
# anis = anisotropy_parameters,
# nugget = vgm_parameters[['nugget']])
#
# unique_index <- unique(cv_index)
#
# resid_error <- numeric()
#
# for(ii in 1:length(unique_index)) {
#
# training <- dat[cv_index != unique_index[ii], ]
#
# mod <- gstat::gstat(formula = kriging_formula,
# locations = location_formula,
# data = training,
# nmax = nm,
# model = vario_fixed,
# maxdist = maxdist)
#
# validation <- dat[cv_index == unique_index[ii] & use_for_mse, ]
#
# fit <- predict(mod, newdata = validation)
#
# resid_error <- fit$var1.pred - validation[[all.vars(kriging_formula)[1]]]
#
#
# }
#
# mse <- mean(resid_error^2)
#
# }
#
# return(mse)
#
# }
#
# results <- optim(par = log(z_start),
# fn = obj_fn,
# method = "L-BFGS-B",
# dat = x,
# control = list(trace = 5),
# kriging_formula = kriging_formula,
# location_formula = location_formula,
# anisotropy_parameters = anisotropy_parameters,
# variogram_parameters = variogram_parameters,
# lower = log(z_limits[1]),
# upper = log(z_limits[2]),
# cv_index = cv_index,
# vgm_width = vgm_width,
# model = model,
# use_for_mse = use_for_mse
# )
#
# results$par <- exp(results$par)
#
# return(results)
#
# }
Loading

0 comments on commit 6a33e38

Please sign in to comment.