Merge pull request #39 from DOI-USGS/div-level_path

levelpath and pathlength improvements
DOI-USGS · Aug 2, 2024 · a4d534a · a4d534a
2 parents 98f7178 + 5503073
commit a4d534a
Show file tree

Hide file tree

Showing 6 changed files with 101 additions and 15 deletions.
diff --git a/R/add_levelpaths.R b/R/add_levelpaths.R
@@ -6,6 +6,13 @@ required_atts_add_levelpaths <- c("id", "toid")
 #' will match the behavior of NHDPlus. Any numeric value can be
 #' included in this column and the largest value will be followed when
 #' no nameid is available.
+#'
+#' x must include id, toid, and conditionally divergence attributes.
+#' If a "topo_sort" (hydrosequence in nhdplus terms) attribute is included,
+#' it will be used instead of recreation.
+#'
+#' If a future plan is set, it will be used for a preprocess step of the function.
+#'
 #' @param x data.frame network compatible with \link{hydroloom_names}.
 #' @param name_attribute character attribute to be used as name identifiers.
 #' @param weight_attribute character attribute to be used as weight.
@@ -21,6 +28,11 @@ required_atts_add_levelpaths <- c("id", "toid")
 #' path with the larger weight. If the `weight_attribute` is `override_factor`
 #' times larger on a path, it will be followed regardless of the name_attribute
 #' indication.
+#'
+#' If id and toid are non-dendritic so id:toid is many to one and id is
+#' non-unique, a divergence attribute must be included such that the dendritic
+#' network can be extracted after the network is sorted.
+#'
 #' @name add_levelpaths
 #' @export
 #' @examples
@@ -76,17 +88,29 @@ add_levelpaths.hy <- function(x, name_attribute, weight_attribute,
     on.exit(pboptions(pbopts), add = TRUE)
   }
 
-  check_names(x, required_atts_add_levelpaths, "add_levelpaths")
+  req_atts <- required_atts_add_levelpaths
+  check_names(x, req_atts, "add_levelpaths")
+
+  if(length(unique(x$id)) != nrow(x)) {
+    if(!divergence %in% names(x))
+      stop(paste("Non unique ids found. A divergence attribute must be included",
+                 "if id is non-unique"))
+    req_atts <- c(required_atts_add_levelpaths, divergence)
+  }
 
   hy_g <- get_hyg(x, add = TRUE, id = id)
 
   orig_names <- attr(x, "orig_names")
 
   x <- st_drop_geometry(x)
 
-  extra <- select(x, all_of(c(id, names(x)[!names(x) %in% required_atts_add_levelpaths])))
+  if(topo_sort %in% names(x)) {
+    req_atts <- c(req_atts, topo_sort)
+  }
+
+  extra <- distinct(select(x, all_of(c(id, names(x)[!names(x) %in% req_atts]))))
 
-  x <- select(x, all_of(c(required_atts_add_levelpaths,
+  x <- select(x, all_of(c(req_atts,
                           "lp_name_attribute" = name_attribute,
                           "lp_weight_attribute" = weight_attribute))) |>
     distinct()
@@ -98,9 +122,17 @@ add_levelpaths.hy <- function(x, name_attribute, weight_attribute,
   x[["lp_name_attribute"]] <- replace_na(x[["lp_name_attribute"]], " ") # NHDPlusHR uses NA for empty names.
   x[["lp_name_attribute"]][x[["lp_name_attribute"]] == "-1"] <- " "
 
-  x <- sort_network(x)
+  # don't think this is necessary
+  # x <- sort_network(x)
+
+  if(!topo_sort %in% names(x)) x <- add_topo_sort(x)
+
+  if(divergence %in% names(x)) {
+    divs <- x[[id]][x[[divergence]] > 1]
+
+    x <- filter(x, !.data$toid %in% divs)
+  }
 
-  x <- add_topo_sort(x)
   x$levelpath <- rep(0, nrow(x))
 
   x <- x |> # get downstream name id added
@@ -113,9 +145,13 @@ add_levelpaths.hy <- function(x, name_attribute, weight_attribute,
     group_by(.data$toid) |>
     group_split()
 
+  cl <- "future"
+  if(inherits(future::plan(), "sequential")) cl = NULL
+
   # reweight sets up ranked upstream paths
   x <- pblapply(x, reweight, override_factor = override_factor,
-                     nat = "lp_name_attribute", wat = "lp_weight_attribute", cl = "future")
+                     nat = "lp_name_attribute", wat = "lp_weight_attribute",
+                cl = cl)
 
   x <- x |>
     bind_rows() |>
@@ -145,12 +181,13 @@ add_levelpaths.hy <- function(x, name_attribute, weight_attribute,
   while(done < nrow(x) & checker < 10000000) {
 
     pathids <- if(nrow(outlet_ind) == 1) {
-      list(par_get_path(as.list(outlet_ind), x, from_ind, status, "lp_weight_attribute"))
+      list(par_get_path(as.list(outlet_ind), x[c("ind", "lp_weight_attribute")], from_ind, status, "lp_weight_attribute"))
     } else {
-      lapply(split(outlet_ind, seq_len(nrow(outlet_ind))),
-             par_get_path,
-             x_in = x, from_ind = from_ind,
-             status = status, wat = "lp_weight_attribute")
+      pblapply(split(outlet_ind, seq_len(nrow(outlet_ind))),
+              par_get_path,
+              x_in = x[c("ind", "lp_weight_attribute")], from_ind = from_ind,
+              status = status, wat = "lp_weight_attribute",
+              cl = cl)
     }
 
     pathids <- bind_rows(pathids)

diff --git a/R/add_pathlength.R b/R/add_pathlength.R
@@ -1,5 +1,8 @@
 #' Add Path Length
 #' @description Generates the main path length to a basin's terminal path.
+#'
+#' Requires id, toid, and length_km hydroloom compatible attributes.
+#'
 #' @inheritParams add_levelpaths
 #' @name add_pathlength
 #' @export
@@ -40,17 +43,23 @@ add_pathlength.hy <- function(x) {
 
   x <- sort_network(st_drop_geometry(x))[nrow(x):1, ]
 
-  x$pathlength_km <- rep(0, nrow(x))
+  pathlength_km <- rep(0, nrow(x))
+  length_km <- x$length_km
+  toid <- x$toid
 
   toids <- match(x$toid, x$id)
 
-  for(i in seq_len(length(x$id))) {
-    if((tid <- x$toid[i]) != get_outlet_value(x)) {
+  for(i in seq_len(length(toid))) {
+    tid <- toid[i]
+    if(tid != 0) {
 
-      x$pathlength_km[i] <- x$length_km[toids[i]] + x$pathlength_km[toids[i]]
+      pathlength_km[i] <- length_km[toids[i]] + pathlength_km[toids[i]]
 
     }
+    if(i %% 10000 == 0) message(i)
   }
 
+  x$pathlength_km <- pathlength_km
+
   left_join(orig_order, x, by = id)
 }
diff --git a/man/add_levelpaths.Rd b/man/add_levelpaths.Rd
diff --git a/man/add_pathlength.Rd b/man/add_pathlength.Rd
diff --git a/tests/testthat/test_add_levelpaths.R b/tests/testthat/test_add_levelpaths.R
@@ -17,6 +17,32 @@ test_that("add_levelpaths example", {
 
 })
 
+test_that("add_levelpaths non dendritic", {
+  g <- sf::read_sf(system.file("extdata/new_hope.gpkg", package = "hydroloom"))
+
+  test_flowline <- add_toids(g, return_dendritic = FALSE)
+
+  test_flowline <- hy(test_flowline)
+
+  test_flowline <- dplyr::select(test_flowline, id, toid, divergence, GNIS_ID, arbolate_sum)
+
+  expect_error(add_levelpaths(dplyr::select(test_flowline, -divergence),
+                              "GNIS_ID", "arbolate_sum"),
+               "divergence attribute must be included")
+
+  lp1 <- add_levelpaths(test_flowline, "GNIS_ID", "arbolate_sum")
+
+  expect_equal(nrow(lp1), nrow(g))
+
+  test_flowline <- add_topo_sort(test_flowline) |>
+    filter(!toid %in% test_flowline$id[test_flowline$divergence > 1]) |>
+    select(-divergence)
+
+  lp2 <- add_levelpaths(test_flowline, "GNIS_ID", "arbolate_sum")
+
+  expect_equal(lp1, lp2)
+})
+
 test_that("reweight", {
   x <- readRDS(list.files(pattern = "reweight_test.rds",
                           full.names = TRUE, recursive = TRUE))

diff --git a/tests/testthat/test_sort_network.R b/tests/testthat/test_sort_network.R
@@ -208,6 +208,8 @@ test_that("add_topo_sort deals with diversions", {
 
   base_network <- hydroloom::add_topo_sort(base_network)
 
+  # this ensures that we get a downstream decreasing topo sort along a
+  # secondary and primary pathway.
   expect_equal(length(unique(base_network$topo_sort[base_network$id == 8317403])), 1)
 
   expect_true(