Skip to content

Commit

Permalink
Merge pull request #39 from DOI-USGS/div-level_path
Browse files Browse the repository at this point in the history
levelpath and pathlength improvements
  • Loading branch information
dblodgett-usgs authored Aug 2, 2024
2 parents 98f7178 + 5503073 commit a4d534a
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 15 deletions.
59 changes: 48 additions & 11 deletions R/add_levelpaths.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ required_atts_add_levelpaths <- c("id", "toid")
#' will match the behavior of NHDPlus. Any numeric value can be
#' included in this column and the largest value will be followed when
#' no nameid is available.
#'
#' x must include id, toid, and conditionally divergence attributes.
#' If a "topo_sort" (hydrosequence in nhdplus terms) attribute is included,
#' it will be used instead of recreation.
#'
#' If a future plan is set, it will be used for a preprocess step of the function.
#'
#' @param x data.frame network compatible with \link{hydroloom_names}.
#' @param name_attribute character attribute to be used as name identifiers.
#' @param weight_attribute character attribute to be used as weight.
Expand All @@ -21,6 +28,11 @@ required_atts_add_levelpaths <- c("id", "toid")
#' path with the larger weight. If the `weight_attribute` is `override_factor`
#' times larger on a path, it will be followed regardless of the name_attribute
#' indication.
#'
#' If id and toid are non-dendritic so id:toid is many to one and id is
#' non-unique, a divergence attribute must be included such that the dendritic
#' network can be extracted after the network is sorted.
#'
#' @name add_levelpaths
#' @export
#' @examples
Expand Down Expand Up @@ -76,17 +88,29 @@ add_levelpaths.hy <- function(x, name_attribute, weight_attribute,
on.exit(pboptions(pbopts), add = TRUE)
}

check_names(x, required_atts_add_levelpaths, "add_levelpaths")
req_atts <- required_atts_add_levelpaths
check_names(x, req_atts, "add_levelpaths")

if(length(unique(x$id)) != nrow(x)) {
if(!divergence %in% names(x))
stop(paste("Non unique ids found. A divergence attribute must be included",
"if id is non-unique"))
req_atts <- c(required_atts_add_levelpaths, divergence)
}

hy_g <- get_hyg(x, add = TRUE, id = id)

orig_names <- attr(x, "orig_names")

x <- st_drop_geometry(x)

extra <- select(x, all_of(c(id, names(x)[!names(x) %in% required_atts_add_levelpaths])))
if(topo_sort %in% names(x)) {
req_atts <- c(req_atts, topo_sort)
}

extra <- distinct(select(x, all_of(c(id, names(x)[!names(x) %in% req_atts]))))

x <- select(x, all_of(c(required_atts_add_levelpaths,
x <- select(x, all_of(c(req_atts,
"lp_name_attribute" = name_attribute,
"lp_weight_attribute" = weight_attribute))) |>
distinct()
Expand All @@ -98,9 +122,17 @@ add_levelpaths.hy <- function(x, name_attribute, weight_attribute,
x[["lp_name_attribute"]] <- replace_na(x[["lp_name_attribute"]], " ") # NHDPlusHR uses NA for empty names.
x[["lp_name_attribute"]][x[["lp_name_attribute"]] == "-1"] <- " "

x <- sort_network(x)
# don't think this is necessary
# x <- sort_network(x)

if(!topo_sort %in% names(x)) x <- add_topo_sort(x)

if(divergence %in% names(x)) {
divs <- x[[id]][x[[divergence]] > 1]

x <- filter(x, !.data$toid %in% divs)
}

x <- add_topo_sort(x)
x$levelpath <- rep(0, nrow(x))

x <- x |> # get downstream name id added
Expand All @@ -113,9 +145,13 @@ add_levelpaths.hy <- function(x, name_attribute, weight_attribute,
group_by(.data$toid) |>
group_split()

cl <- "future"
if(inherits(future::plan(), "sequential")) cl = NULL

# reweight sets up ranked upstream paths
x <- pblapply(x, reweight, override_factor = override_factor,
nat = "lp_name_attribute", wat = "lp_weight_attribute", cl = "future")
nat = "lp_name_attribute", wat = "lp_weight_attribute",
cl = cl)

x <- x |>
bind_rows() |>
Expand Down Expand Up @@ -145,12 +181,13 @@ add_levelpaths.hy <- function(x, name_attribute, weight_attribute,
while(done < nrow(x) & checker < 10000000) {

pathids <- if(nrow(outlet_ind) == 1) {
list(par_get_path(as.list(outlet_ind), x, from_ind, status, "lp_weight_attribute"))
list(par_get_path(as.list(outlet_ind), x[c("ind", "lp_weight_attribute")], from_ind, status, "lp_weight_attribute"))
} else {
lapply(split(outlet_ind, seq_len(nrow(outlet_ind))),
par_get_path,
x_in = x, from_ind = from_ind,
status = status, wat = "lp_weight_attribute")
pblapply(split(outlet_ind, seq_len(nrow(outlet_ind))),
par_get_path,
x_in = x[c("ind", "lp_weight_attribute")], from_ind = from_ind,
status = status, wat = "lp_weight_attribute",
cl = cl)
}

pathids <- bind_rows(pathids)
Expand Down
17 changes: 13 additions & 4 deletions R/add_pathlength.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#' Add Path Length
#' @description Generates the main path length to a basin's terminal path.
#'
#' Requires id, toid, and length_km hydroloom compatible attributes.
#'
#' @inheritParams add_levelpaths
#' @name add_pathlength
#' @export
Expand Down Expand Up @@ -40,17 +43,23 @@ add_pathlength.hy <- function(x) {

x <- sort_network(st_drop_geometry(x))[nrow(x):1, ]

x$pathlength_km <- rep(0, nrow(x))
pathlength_km <- rep(0, nrow(x))
length_km <- x$length_km
toid <- x$toid

toids <- match(x$toid, x$id)

for(i in seq_len(length(x$id))) {
if((tid <- x$toid[i]) != get_outlet_value(x)) {
for(i in seq_len(length(toid))) {
tid <- toid[i]
if(tid != 0) {

x$pathlength_km[i] <- x$length_km[toids[i]] + x$pathlength_km[toids[i]]
pathlength_km[i] <- length_km[toids[i]] + pathlength_km[toids[i]]

}
if(i %% 10000 == 0) message(i)
}

x$pathlength_km <- pathlength_km

left_join(orig_order, x, by = id)
}
10 changes: 10 additions & 0 deletions man/add_levelpaths.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions man/add_pathlength.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions tests/testthat/test_add_levelpaths.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,32 @@ test_that("add_levelpaths example", {

})

test_that("add_levelpaths non dendritic", {
g <- sf::read_sf(system.file("extdata/new_hope.gpkg", package = "hydroloom"))

test_flowline <- add_toids(g, return_dendritic = FALSE)

test_flowline <- hy(test_flowline)

test_flowline <- dplyr::select(test_flowline, id, toid, divergence, GNIS_ID, arbolate_sum)

expect_error(add_levelpaths(dplyr::select(test_flowline, -divergence),
"GNIS_ID", "arbolate_sum"),
"divergence attribute must be included")

lp1 <- add_levelpaths(test_flowline, "GNIS_ID", "arbolate_sum")

expect_equal(nrow(lp1), nrow(g))

test_flowline <- add_topo_sort(test_flowline) |>
filter(!toid %in% test_flowline$id[test_flowline$divergence > 1]) |>
select(-divergence)

lp2 <- add_levelpaths(test_flowline, "GNIS_ID", "arbolate_sum")

expect_equal(lp1, lp2)
})

test_that("reweight", {
x <- readRDS(list.files(pattern = "reweight_test.rds",
full.names = TRUE, recursive = TRUE))
Expand Down
2 changes: 2 additions & 0 deletions tests/testthat/test_sort_network.R
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,8 @@ test_that("add_topo_sort deals with diversions", {

base_network <- hydroloom::add_topo_sort(base_network)

# this ensures that we get a downstream decreasing topo sort along a
# secondary and primary pathway.
expect_equal(length(unique(base_network$topo_sort[base_network$id == 8317403])), 1)

expect_true(
Expand Down

0 comments on commit a4d534a

Please sign in to comment.