Skip to content

Commit

Permalink
Merge pull request #362 from ncss-tech/fetchSOLUS
Browse files Browse the repository at this point in the history
Add `fetchSOLUS()`
  • Loading branch information
brownag authored Oct 23, 2024
2 parents 8604d18 + ef86fd7 commit 98bf43e
Show file tree
Hide file tree
Showing 4 changed files with 588 additions and 0 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ export(fetchRaCA)
export(fetchSCAN)
export(fetchSDA)
export(fetchSDA_spatial)
export(fetchSOLUS)
export(fetchSRI)
export(fetchSoilGrids)
export(fetchVegdata)
Expand Down Expand Up @@ -198,6 +199,7 @@ importFrom(methods,as)
importFrom(methods,is)
importFrom(methods,slot)
importFrom(stats,aggregate)
importFrom(stats,approxfun)
importFrom(stats,as.formula)
importFrom(stats,complete.cases)
importFrom(stats,na.omit)
Expand Down
353 changes: 353 additions & 0 deletions R/fetchSOLUS.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,353 @@
#' Fetch Soil Landscapes of the United States (SOLUS) Grids
#'
#' This tool creates a virtual raster or downloads data for an extent from Cloud Optimized GeoTIFFs
#' (COGs) from the [Soil Landscapes of the United States 100-meter (SOLUS100) soil property maps
#' project
#' repository](https://agdatacommons.nal.usda.gov/articles/dataset/Data_from_Soil_Landscapes_of_the_United_States_100-meter_SOLUS100_soil_property_maps_project_repository/25033856).
#'
#' @details
#'
#' If the input object `x` is not specified (`NULL` or missing), a _SpatRaster_ object using the
#' virtual URLs is returned. The full extent and resolution data set can be then downloaded and
#' written to file using `terra::writeRaster()` (or any other processing step specifying an output
#' file name). When input object `x` is specified, a _SpatRaster_ object using in memory or local
#' (temporary file or `filename`) resources is returned after downloading the data only for the
#' target extent. In the case where `x` is a _SoilProfileCollection_ or an _sf_ or _SpatVector_
#' object containing point geometries, the result will be a _SoilProfileCollection_ for values
#' extracted at the point locations. To return both the _SpatRaster_ and _SoilProfileCollection_
#' object output in a _list_, use `grid = NULL`.
#'
#' @param x An R spatial object (such as a _SpatVector_, _SpatRaster_, or _sf_ object) or a
#' _SoilProfileCollection_ with coordinates initialized via `aqp::initSpatial<-`. Default: `NULL`
#' returns the CONUS extent as virtual raster. If `x` is a _SpatRaster_ the coordinate reference
#' system, extent, and resolution are used as a template for the output raster.
#' @param depth_slices character. One or more of: `"0"`, `"5"`, `"15"`, `"30"`, `"60"`, `"100"`,
#' `"150"`. The "depth slice" `"all"` (used for variables such as `"anylithicdpt"`, and
#' `"resdept"`) is always included if any site-level variables are selected.
#' @param variables character. One or more of: `"anylithicdpt"`, `"caco3"`, `"cec7"`, `"claytotal"`,
#' `"dbovendry"`, `"ec"`, `"ecec"`, `"fragvol"`, `"gypsum"`, `"ph1to1h2o"`, `"resdept"`,
#' `"sandco"`, `"sandfine"`, `"sandmed"`, `"sandtotal"`, `"sandvc"`, `"sandvf"`, `"sar"`,
#' `"silttotal"`, `"soc"`.
#' @param output_type character. One or more of: `"prediction"`, `"relative prediction interval"`,
#' `"95% low prediction interval"`, `"95% high prediction interval"`
#' @param grid logical. Default `TRUE` returns a _SpatRaster_ object for an extent. `FALSE` returns
#' a _SoilProfileCollection_. Any other value returns a _list_ object with names `"grid"` and
#' `"spc"` containing both result objects.
#' @param samples integer. Number of regular samples to return for _SoilProfileCollection_ output.
#' Default `NULL` will convert all grid cells to a unique profile. Note that for a large extent,
#' this can produce large objects with a very large number of layers (especially with `method`
#' other than `"step"`).
#' @param method character. Used to determine depth interpolation method for _SoilProfileCollection_
#' output. Default: `"linear"`. Options include any `method` allowed for `approxfun()` or
#' `splinefun()` plus `"step"` and `"slice"`. `"step"` uses the prediction depths as the top and
#' bottom of each interval to create a piecewise continuous profile to maximum of 200 cm depth
#' (for 150 cm upper prediction depth). `"slice"` returns a discontinuous profile with 1 cm thick
#' slices at the predicted depths. Both `"step"` and `"slice"` return a number of layers equal to
#' length of `depth_slices`, and all other methods return data in interpolated 1cm slices.
#' @param max_depth integer. Maximum depth to interpolate 150 cm slice data to. Default: `151`.
#' Interpolation deeper than 151 cm is not possible for methods other than `"step"` and will
#' result in missing values.
#' @param filename character. Path to write output raster file. Default: `NULL` will keep result in
#' memory (or store in temporary file if memory threshold is exceeded)
#' @param overwrite Overwrite `filename` if it exists? Default: `FALSE`
#'
#' @return A _SpatRaster_ object containing SOLUS grids for specified extent, depths, variables, and
#' product types.
#'
#' @references Nauman, T.W., Kienast-Brown, S., White, D.A. Brungard, C.W., Philippe, J., Roecker,
#' S.M., Thompson, J.A. Soil Landscapes of the United States (SOLUS): developing predictive soil
#' property maps of the conterminous US using hybrid training sets. In Prep for SSSAJ.
#'
#' @author Andrew G. Brown
#'
#' @importFrom stats approxfun splinefun
#'
#' @export
#'
#' @examplesIf curl::has_internet() && requireNamespace("sf") && requireNamespace("terra")
#'
#' b <- c(-119.747629, -119.67935, 36.912019, 36.944987)
#'
#' bbox.sp <- sf::st_as_sf(wk::rct(
#' xmin = b[1], xmax = b[2], ymin = b[3], ymax = b[4],
#' crs = sf::st_crs(4326)
#' ))
#'
#' ssurgo.geom <- soilDB::SDA_spatialQuery(
#' bbox.sp,
#' what = 'mupolygon',
#' db = 'SSURGO',
#' geomIntersection = TRUE
#' )
#'
#' # grid output
#' res <- fetchSOLUS(
#' ssurgo.geom,
#' depth_slices = "0",
#' variables = c("sandtotal", "silttotal", "claytotal", "cec7"),
#' output_type = "prediction"
#' )
#'
#' terra::plot(res)
#'
#' # SoilProfileCollection output, using linear interpolation for 1cm slices
#' # site-level variables (e.g. resdept) added to site data.frame of SPC
#' res <- fetchSOLUS(
#' ssurgo.geom,
#' depth_slices = c("0", "5", "15", "30", "60", "100", "150"),
#' variables = c("sandtotal", "silttotal", "claytotal", "cec7", "resdept"),
#' output_type = "prediction",
#' method = "linear",
#' grid = FALSE,
#' samples = 10
#' )
#'
#' # plot, truncating each profile to the predicted restriction depth
#' aqp::plotSPC(trunc(res, 0, res$resdept_p), color = "claytotal_p", divide.hz = FALSE)
fetchSOLUS <- function(x = NULL,
depth_slices = c(0, 5, 15, 30, 60, 100, 150),
variables = c("anylithicdpt", "caco3", "cec7", "claytotal",
"dbovendry", "ec", "ecec", "fragvol", "gypsum",
"ph1to1h2o", "resdept", "sandco", "sandfine",
"sandmed", "sandtotal", "sandvc", "sandvf",
"sar", "silttotal", "soc"),
output_type = c("prediction",
"relative prediction interval",
"95% low prediction interval",
"95% high prediction interval"),
grid = TRUE,
samples = NULL,
method = c("linear", "constant", "fmm", "natural", "monoH.FC", "step", "slice"),
max_depth = 151,
filename = NULL,
overwrite = FALSE
) {

# Not all spline methods are relevant, but they can be allowed to work
method <- match.arg(method[1], c("linear", "constant", "fmm", "periodic", "natural", "monoH.FC", "hyman", "step", "slice"))

# get index of SOLUS COGs
ind <- try(.get_SOLUS_index())

if (inherits(ind, 'try-error')) {
stop("Failed to fetch SOLUS grid index", call. = FALSE)
}

# subset based on user specified properties, depths, and product type
isub <- ind[ind$property %in% variables &
as.character(ind$depth_slice) %in% c("all", depth_slices) &
ind$filetype %in% output_type,]

isub$subproperty <- gsub("\\.tif$", "", isub$filename)
isub$scalar <- as.numeric(isub$scalar)

if (!requireNamespace("terra")) {
stop("package 'terra' is required", call. = FALSE)
}

# create virtual raster from list of URLs
r <- terra::rast(
paste0("/vsicurl/", isub$url)
)

# manually apply scaling factors to source raster
terra::scoff(r) <- cbind(1 / isub$scalar, 0)

# do conversion of input spatial object
if (!missing(x) && !is.null(x)) {

# convert various input types to SpatVector
if (inherits(x, 'SoilProfileCollection')) {
x <- as(x, 'sf')
}

if (inherits(x, c('RasterLayer', 'RasterStack'))) {
x <- terra::rast(x)
}

if (!inherits(x, c('SpatRaster', 'SpatVector'))) {
x <- terra::vect(x)
}

if (inherits(x, 'SpatVector')) {
# project any input vector object to CRS of SOLUS
x <- terra::project(x, terra::crs(r))
}

xe <- terra::ext(terra::project(terra::as.polygons(x, ext = TRUE), r))

# handle requests out-of-bounds
if (!(terra::relate(terra::ext(r), xe, relation = "contains")[1] ||
terra::relate(terra::ext(r), xe, relation = "overlaps")[1])) {
stop("Extent of `x` is outside the boundaries of the source data extent.", call. = FALSE)
}

if (!inherits(x, 'SpatRaster')){
# crop to target extent (written to temp file if needed)
r <- terra::crop(r, x, filename = filename)
} else {
# if x is a spatraster, use it as a template for GDAL warp
r <- terra::project(r, x, filename = filename, align_only = FALSE, mask = TRUE, threads = TRUE)
}
}

if (isTRUE(grid)) {
return(r)
}

if (length(depth_slices) == 1 && method != "step") {
stop("Cannot interpolate for SoilProfileCollection output with only one depth slice! Change `method` to \"step\" or add another `depth_slice`.", call. = FALSE)
}

if (!missing(x) && !is.null(x) && terra::is.points(x)) {
dat <- terra::extract(r, x)
} else {
if (!missing(samples) && !is.null(samples)) {
dat <- terra::spatSample(r,
size = samples,
method = "regular",
xy = TRUE) # for testing
} else {
dat <- terra::as.data.frame(r, xy = TRUE, na.rm = FALSE)
}
}

dat$ID <- seq(nrow(dat))

spc <- .convert_SOLUS_dataframe_to_SPC(dat, idname = "ID", method = method, max_depth = max_depth)
aqp::initSpatial(spc, terra::crs(r)) <- ~ x + y

if (isFALSE(grid)) {
return(spc)
} else {
return(list(grid = r, spc = spc))
}
}

.get_SOLUS_index <- function() {

# TODO: parse XML directly instead of HTML?

# read index as HTML table
res <- rvest::html_table(rvest::read_html("https://storage.googleapis.com/solus100pub/index.html"), header = FALSE)[[1]]

# column names are in 4th row
colnames(res) <- res[5, ]

# drop empty rows
res <- res[-(c(1:5, nrow(res))), ]

# fix inconsistencies in depth column
res$depth[is.na(res$depth) | res$depth == ""] <- "all_cm"
dlut <- c("all_cm" = "all", "0_cm" = "0", "5_cm" = "5", "15_cm" = "15",
"30_cm" = "30", "60_cm" = "60", "100_cm" = "100", "150_cm" = "150")

# use depth slices
res$depth_slice <- dlut[res$depth]
res$depth_slice <- factor(res$depth_slice, levels = unique(dlut))

res
}

.convert_SOLUS_dataframe_to_SPC <- function(x, idname = "id", method, max_depth = 151) {
# x: data.frame object with column names corresponding to .get_SOLUS_index() filenames
# idname: character. column name used to identify profiles
# method: character. depth interpolation method

# dummy global definitions
.SD <- NULL
ID <- NULL
depth <- NULL

.extractTopDepthFromName <- function(x) {
gsub("^.*_(\\d+|all)_cm_.*$", "\\1", x)
}

.replaceTopDepthInName <- function(x) {
gsub("^(.*)_(\\d+|all)_cm(_.*)$", "\\1\\3", x)
}

tdep <- .extractTopDepthFromName(colnames(x))
colnames(x) <- .replaceTopDepthInName(colnames(x))

h <- data.table::rbindlist(lapply(unique(tdep[!tdep %in% c("x", "y", "all", idname)]), function(xx) {
data.frame(ID = x[[idname]], depth = xx, x[which(tdep == xx)])
}))

s <- data.frame(ID = x[[idname]], x[tdep %in% c("x", "y", "all")])

h$depth <- as.numeric(h$depth)

h <- h[order(ID, depth),]

stepwise_dept <- c(0, 5, 15, 30, 60, 100, 150)
stepwise_depb <- c(0, 15, 30, 60, 100, 150, 150)
names(stepwise_depb) <- stepwise_dept

h$top <- h$depth
h$bottom <- stepwise_depb[as.character(h$depth)]

ldx <- names(h) %in% c(idname, "depth", "x", "y", "top", "bottom")
iv <- names(h)[ldx]
vn <- names(h)[!ldx]

if (method %in% c("slice", "step")) {

if (method == "slice") {

h$bottom <- h$top + 1

} else {
message("NOTE: SOLUS predictions represent depth slices (method=\"slice\")\nConsider using method=\"constant\" or method=\"linear\".")

# apply fudge factors for depth slices as property input source
h$bottom[h$bottom == 0] <- 5
h$bottom[h$top == 150] <- max_depth
}

h <- as.data.frame(h)

depths(h) <- c(idname, "top", "bottom")

site(h) <- s

return(h)
} else if (method %in% c("linear", "constant", "fmm", "periodic", "natural", "monoH.FC", "hyman")) {

mindep <- min(h$top, na.rm = TRUE)
maxdep <- max(h$bottom, na.rm = TRUE)

if (maxdep == 150) {
maxdep <- max_depth
}

if (method %in% c("linear", "constant")) {
FUN <- approxfun
} else {
FUN <- splinefun
}

xx <- (mindep:(maxdep - 1))
y <- unique(h$top)
h2 <- h[, data.frame(top = mindep:(maxdep - 1),
bottom = (mindep + 1):maxdep,
lapply(.SD, function(x) {
if (all(is.na(x)))
return(rep(NA_real_, length(xx)))
FUN(y, x, method = method)(xx)
})),
.SDcols = vn,
by = list(ID = h[[idname]])]

h2 <- as.data.frame(h2)

depths(h2) <- c(idname, "top", "bottom")

site(h2) <- s

return(h2)

} else {
stop("Invalid method argument (\"", method, "\")", call. = FALSE)
}
}
Loading

0 comments on commit 98bf43e

Please sign in to comment.