rewrite IUCN functions to match the updates to rredlist

ropensci · Jan 29, 2025 · 243afde · 243afde
1 parent 6a79a74
commit 243afde
Show file tree

Hide file tree

Showing 40 changed files with 24,031 additions and 398 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -119,7 +119,6 @@ S3method(iucn_status,default)
 S3method(iucn_status,iucn_summary)
 S3method(iucn_summary,character)
 S3method(iucn_summary,default)
-S3method(iucn_summary,iucn)
 S3method(lowest_common,default)
 S3method(lowest_common,gbifid)
 S3method(lowest_common,tolid)
@@ -272,7 +271,6 @@ export(iucn_getname)
 export(iucn_id)
 export(iucn_status)
 export(iucn_summary)
-export(iucn_summary_id)
 export(lowest_common)
 export(names_list)
 export(nbn_classification)

diff --git a/R/get_iucn.R b/R/get_iucn.R
@@ -45,12 +45,6 @@
 #' get_iucn("Branta bernicla")
 #' get_iucn("Panthera uncia")
 #'
-#' # as coercion
-#' as.iucn(22732)
-#' as.iucn("22732")
-#' (res <- as.iucn(c(22679946, 22732, 22679935)))
-#' data.frame(res)
-#' as.iucn(data.frame(res))
 #' }
 get_iucn <- function(sci, messages = TRUE, key = NULL, x = NULL, ...) {
 
@@ -61,7 +55,37 @@ get_iucn <- function(sci, messages = TRUE, key = NULL, x = NULL, ...) {
     sci <- x
   }
 
+  raw_data <- get_iucn_data(sci = sci, messages = messages, key = key, ...)
+
+  out <- lapply(names(raw_data), function(input_name) {
+    result <- raw_data[[input_name]]
+    if (length(result) == 1 && is.na(result)) {
+      id <- NA_character_
+      att <- "not found"
+    } else {
+      id <- result$taxon$sis_id
+      direct <- tolower(result$taxon$scientific_name) == tolower(input_name)
+      att <- "found"
+    }
+    list(id = id, name = input_name, att = att, direct = direct)
+  })
+
+  ids <- structure(as.character(unlist(pluck(out, "id"))), class = "iucn",
+                   match = pluck_un(out, "att", ""),
+                   name = pluck_un(out, "name", ""))
+  add_uri(ids, get_url_templates$iucn)
+}
+
 
+#' Get a IUCN Redlist taxon data
+#' 
+#' Used to get IUCN data for other functions to use.
+#' 
+#' @param If `TRUE`, latest use [rredlist::rl_species_latest()] instead of [rredlist::rl_species()]
+#' 
+#' @keywords internal
+get_iucn_data <- function(sci, messages = TRUE, key = NULL, latest = FALSE, ...) {
+
   if (inherits(sci, "character")) {
     tstate <- taxon_state$new(class = "iucn", names = sci)
     items <- sci
@@ -71,59 +95,53 @@ get_iucn <- function(sci, messages = TRUE, key = NULL, x = NULL, ...) {
     sci <- tstate$taxa_remaining()
     items <- c(sci, tstate$taxa_completed())
   }
-
+  
   prog <- progressor$new(items = items, suppress = !messages)
   done <- tstate$get()
   for (i in seq_along(done)) prog$completed(names(done)[i], done[[i]]$att)
   prog$prog_start()
-
+
+  if (latest) {
+    rl_func_to_use <- rredlist::rl_species_latest
+  } else {
+    rl_func_to_use <- rredlist::rl_species
+  }
+
   for (i in seq_along(sci)) {
     direct <- FALSE
     mssg(messages, "\nRetrieving data for taxon '", sci[i], "'\n")
-    df <- rredlist::rl_search(sci[i], key = key, ...)
-
-    if (!inherits(df$result, "data.frame") || NROW(df$result) == 0) {
-      id <- NA_character_
+    parts <- strsplit(sci[i], split = ' +')[[1]]
+    result <- tryCatch(
+      {
+        if (length(parts) == 2) {
+          tmp <- rl_func_to_use(genus = parts[1], species = parts[2], key = key, ...)
+        } else {
+          tmp <- rl_func_to_use(genus = parts[1], species = parts[2], infra = parts[3], key = key, ...)
+        }
+        tmp
+      },
+      error = function(e) {
+        NA_integer_
+      }
+    ) 
+
+    if (length(result) == 1 && is.na(result)) {
       att <- "not found"
     } else {
-      df <- df$result[, c("taxonid", "scientific_name", "kingdom",
-                   "phylum", "order", "family", "genus", "authority")]
-
-      # should return NA if species not found
-      if (NROW(df) == 0) {
-        mssg(messages, tx_msg_not_found)
-        id <- NA_character_
-        att <- "not found"
-      }
-
-      # check for direct match
-      direct <- match(tolower(df$scientific_name), tolower(sci[i]))
-
-      if (!all(is.na(direct))) {
-        id <- df$taxonid[!is.na(direct)]
-        direct <- TRUE
-        att <- "found"
-      } else {
-        direct <- FALSE
-        id <- df$taxonid
-        att <- "found"
-      }
-      # multiple matches not possible because no real search
+      att <- "found"
     }
-    res <- list(id = id, name = sci[i], att = att, direct = direct)
     prog$completed(sci[i], att)
     prog$prog(att)
-    tstate$add(sci[i], res)
+    tstate$add(sci[i], result)
   }
   out <- tstate$get()
-  ids <- structure(as.character(unlist(pluck(out, "id"))), class = "iucn",
-                   match = pluck_un(out, "att", ""),
-                   name = pluck_un(out, "name", ""))
   on.exit(prog$prog_summary(), add = TRUE)
   on.exit(tstate$exit, add = TRUE)
-  add_uri(ids, get_url_templates$iucn)
+  return(out)
 }
 
+
+
 #' @export
 #' @rdname get_iucn
 as.iucn <- function(x, check = TRUE, key = NULL) {

diff --git a/R/gnr_resolve.R b/R/gnr_resolve.R
@@ -1,67 +1,72 @@
 #' Resolve names using Global Names Resolver
 #'
+#' NOTE: this function is depreciated and will be removed in a future version.
+#' The service this function interacts with is no longer maintained and has been
+#' replaced by GNA Verifier, which can be used with the [gna_verifier()]
+#' function.
+#' 
 #' See section **Age of datasets in the Global Names Resolver**
 #'
 #' @export
 #' @param sci character; taxonomic names to be resolved. Doesn't work for
-#' vernacular/common names.
-#' @param data_source_ids character; IDs to specify what data source
-#'     is searched. See [gnr_datasources()].
+#'   vernacular/common names.
+#' @param data_source_ids character; IDs to specify what data source is
+#'   searched. See [gnr_datasources()].
 #' @param resolve_once logical; Find the first available match instead of
-#'    matches across all data sources with all possible renderings of a name.
-#'    When `TRUE`, response is rapid but incomplete.
+#'   matches across all data sources with all possible renderings of a name.
+#'   When `TRUE`, response is rapid but incomplete.
 #' @param with_context logical; Reduce the likelihood of matches to taxonomic
-#'    homonyms. When `TRUE` a common taxonomic context is calculated for
-#'    all supplied names from matches in data sources that have classification
-#'    tree paths. Names out of determined context are penalized during score
-#'    calculation.
+#'   homonyms. When `TRUE` a common taxonomic context is calculated for all
+#'   supplied names from matches in data sources that have classification tree
+#'   paths. Names out of determined context are penalized during score
+#'   calculation.
 #' @param canonical logical; If `FALSE` (default), gives back names with
-#'    taxonomic authorities. If `TRUE`, returns canocial names
-#'    (without tax. authorities and abbreviations).
+#'   taxonomic authorities. If `TRUE`, returns canocial names (without tax.
+#'   authorities and abbreviations).
 #' @param highestscore logical; Return those names with the highest score for
-#'    each searched name? Defunct
+#'   each searched name? Defunct
 #' @param best_match_only (logical) If `TRUE`, best match only returned.
-#' Default: `FALSE`
-#' @param preferred_data_sources (character) A vector of one or more data
-#' source IDs.
-#' @param with_canonical_ranks (logical) Returns names with infraspecific
-#' ranks, if present. If `TRUE`, we force `canonical=TRUE`, otherwise
-#' this parameter would have no effect. Default: `FALSE`
+#'   Default: `FALSE`
+#' @param preferred_data_sources (character) A vector of one or more data source
+#'   IDs.
+#' @param with_canonical_ranks (logical) Returns names with infraspecific ranks,
+#'   if present. If `TRUE`, we force `canonical=TRUE`, otherwise this parameter
+#'   would have no effect. Default: `FALSE`
 #' @param http The HTTP method to use, one of "get" or "post". Default: "get".
-#' Use `http="post"` with large queries. Queries with > 300 records
-#' use "post" automatically because "get" would fail
+#'   Use `http="post"` with large queries. Queries with > 300 records use "post"
+#'   automatically because "get" would fail
 #' @param names Deprecated, see `sci`
 #' @param ... Curl options passed on to [crul::HttpClient]
 #' @param cap_first (logical) For each name, fix so that the first name part is
-#' capitalized, while others are not. This web service is sensitive to
-#' capitalization, so you'll get different results depending on capitalization.
-#' First name capitalized is likely what you'll want and is the default.
-#' If `FALSE`, names are not modified. Default: `TRUE`
-#' @param fields (character) One of minimal (default) or all. Minimal gives
-#' back just four fields, whereas all gives all fields back.
+#'   capitalized, while others are not. This web service is sensitive to
+#'   capitalization, so you'll get different results depending on
+#'   capitalization. First name capitalized is likely what you'll want and is
+#'   the default. If `FALSE`, names are not modified. Default: `TRUE`
+#' @param fields (character) One of minimal (default) or all. Minimal gives back
+#'   just four fields, whereas all gives all fields back.
 #'
-#' @author Scott Chamberlain 
-#' @return A data.frame with one attribute `not_known`: a character
-#' vector of taxa unknown to the Global Names Index. Access like
-#' `attr(output, "not_known")`, or `attributes(output)$not_known`.
+#' @author Scott Chamberlain
+#' @return A data.frame with one attribute `not_known`: a character vector of
+#'   taxa unknown to the Global Names Index. Access like `attr(output,
+#'   "not_known")`, or `attributes(output)$not_known`.
 #'
-#' Columns of the output data.frame:
+#'   Columns of the output data.frame:
 #' * user_supplied_name (character) - the name you passed in to the
-#'  `names` parameter, unchanged.
+#'   `names` parameter, unchanged.
 #' * submitted_name (character) - the actual name submitted to the GNR
-#'  service
+#'   service
 #' * data_source_id (integer/numeric) - data source ID
 #' * data_source_title (character) - data source name
 #' * gni_uuid (character) - Global Names Index UUID (aka identifier)
 #' * matched_name (character) - the matched name in the GNR service
 #' * matched_name2 (character) - returned if `canonical=TRUE`, in
-#'  which case **matched_name** is not returned
+#'   which case **matched_name** is not returned
 #' * classification_path (character) - names of the taxonomic
-#'  classification tree, with names separated by pipes (`|`)
+#'   classification tree, with names separated by pipes (`|`)
 #' * classification_path_ranks (character) - ranks of the taxonomic
-#'  classification tree, with names separated by pipes (`|`)
+#'   classification tree, with names separated by pipes (`|`)
 #' * classification_path_ids (character) - identifiers of the taxonomic
-#'  classification tree, with names separated by pipes (`|`)
+#'   classification tree, with names separated by pipes (`|`)
 #' * taxon_id (character) - taxon identifier
 #' * edit_distance (integer/numeric) - edit distance
 #' * imported_at (character) - date imported
@@ -75,23 +80,20 @@
 #' * current_taxon_id (character) - current taxon id
 #' * current_name_string (character) - current name string
 #'
-#' Note that names (i.e. rows) are dropped that are NA, are zero length
-#' strings, are not character vectors, or are not found by the API.
+#'   Note that names (i.e. rows) are dropped that are NA, are zero length
+#'   strings, are not character vectors, or are not found by the API.
 #'
-#' @section Age of datasets in the Global Names Resolver:
-#' IMPORTANT: Datasets used in the Global Names Resolver vary in how recently
-#' they've been updated. See the `updated_at` field in the
-#' output of [gnr_datasources()] for dates when each dataset
-#' was last updated.
+#' @section Age of datasets in the Global Names Resolver: IMPORTANT: Datasets
+#'   used in the Global Names Resolver vary in how recently they've been
+#'   updated. See the `updated_at` field in the output of [gnr_datasources()]
+#'   for dates when each dataset was last updated.
 #'
-#' @section preferred_data_sources:
-#' If `preferred_data_sources` is used, only the preferred data
-#' is returned - if it has any results.
+#' @section preferred_data_sources: If `preferred_data_sources` is used, only
+#'   the preferred data is returned - if it has any results.
 #'
 #' @seealso [gnr_datasources()]
 #' @keywords resolve names taxonomy
-#' @references http://gnrd.globalnames.org/api
-#' http://gnrd.globalnames.org/
+#' @references http://gnrd.globalnames.org/api http://gnrd.globalnames.org/
 #' @examples \dontrun{
 #' gnr_resolve(sci = c("Helianthus annuus", "Homo sapiens"))
 #' gnr_resolve(sci = c("Asteraceae", "Plantae"))
@@ -131,6 +133,8 @@ gnr_resolve <- function(sci, data_source_ids = NULL, resolve_once = FALSE,
   with_canonical_ranks = FALSE, http = "get", cap_first = TRUE,
   fields = "minimal", names = NULL, ...) {
 
+  lifecycle::deprecate_warn(what = "taxize::gnr_resolve()", with = "taxize::gna_verifier()", when = "v0.9.103")
+
   if (!is.null(names)) {
     lifecycle::deprecate_warn(when = "v0.9.97", what = "gnr_resolve(names)", with = "gnr_resolve(sci)")
     sci <- names

diff --git a/R/iucn_id.R b/R/iucn_id.R
@@ -1,7 +1,7 @@
 #' Get an ID for a IUCN listed taxon
 #'
 #' @export
-#' @param sciname character; Scientific name. Should be cleand and in the
+#' @param sciname character; Scientific name. Should be cleaned and in the
 #' format `*<Genus> <Species>*`. One or more.
 #' @param key (character) required. you IUCN Redlist API key. See
 #' [rredlist::rredlist-package] for help on authenticating with
@@ -20,32 +20,46 @@
 #' iucn_id(c("Panthera uncia", "Lynx lynx"))
 #'
 #' # many names, some not found
-#' iucn_id(c("Panthera uncia", "Lynx lynx", "foo bar", "hello world"))
+#' iucn_id(c("Panthera uncia", "Lynx lynx", "foo bar", "Gorilla gorilla gorilla"))
 #'
 #' # a name not found
 #' iucn_id("Foo bar")
 #' }
 iucn_id <- function(sciname, key = NULL, ...) {
   out <- list()
-  for (i in seq_along(sciname)) {
-    out[[i]] <- get_iucn_id(sciname[[i]], key = key, ...)
+  parts <- strsplit(sciname, split = ' +')
+  lengths <- vapply(parts, length, FUN.VALUE = numeric(1))
+  invalid_lengths <- lengths <= 1 | lengths > 3
+  if (any(invalid_lengths)) {
+    error_list <- paste0('    ', which(invalid_lengths), ': "', sciname[invalid_lengths], '"')
+    if (length(error_list) > 100) {
+      error_list <- c(error_list[1:100], '   ...')
+    }
+    stop(
+      'The following inputs have the incorrect number of elements in their species name:\n',
+      paste0(error_list, collapse = '\n')
+    )
+  }
+
+  for (i in seq_along(parts)) {
+    out[[i]] <- get_iucn_id(parts[[i]], key = key, ...)
   }
   unlist(out)
 }
 
-get_iucn_id <- function(z, key = NULL, ...) {
-  tmp <- rredlist::rl_search(z, key = key, ...)
-  if (NROW(tmp$result) == 0) {
-    NA
-  } else if (NROW(tmp$result) > 1) {
-    iduniq <- unique(tmp$result$taxonid)
-    if (length(iduniq) == 1) {
-      iduniq
-    } else {
-      stop("> 1 result found, alter your query", call. = FALSE)
+get_iucn_id <- function(parts, key = NULL, ...) {
+  tryCatch(
+    {
+      if (length(parts) == 2) {
+        tmp <- rredlist::rl_species(genus = parts[1], species = parts[2], key = key, ...)
+      } else {
+        tmp <- rredlist::rl_species(genus = parts[1], species = parts[2], infra = parts[3], key = key, ...)
+      }
+      tmp$taxon$sis_id
+    },
+    error = function(e) {
+      NA_integer_
     }
-  } else {
-    tmp$result$taxonid
-  }
+  ) 
 }