Skip to content

Commit

Permalink
rewrite IUCN functions to match the updates to rredlist
Browse files Browse the repository at this point in the history
  • Loading branch information
zachary-foster committed Jan 29, 2025
1 parent 6a79a74 commit 243afde
Show file tree
Hide file tree
Showing 40 changed files with 24,031 additions and 398 deletions.
2 changes: 0 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ S3method(iucn_status,default)
S3method(iucn_status,iucn_summary)
S3method(iucn_summary,character)
S3method(iucn_summary,default)
S3method(iucn_summary,iucn)
S3method(lowest_common,default)
S3method(lowest_common,gbifid)
S3method(lowest_common,tolid)
Expand Down Expand Up @@ -272,7 +271,6 @@ export(iucn_getname)
export(iucn_id)
export(iucn_status)
export(iucn_summary)
export(iucn_summary_id)
export(lowest_common)
export(names_list)
export(nbn_classification)
Expand Down
100 changes: 59 additions & 41 deletions R/get_iucn.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,6 @@
#' get_iucn("Branta bernicla")
#' get_iucn("Panthera uncia")
#'
#' # as coercion
#' as.iucn(22732)
#' as.iucn("22732")
#' (res <- as.iucn(c(22679946, 22732, 22679935)))
#' data.frame(res)
#' as.iucn(data.frame(res))
#' }
get_iucn <- function(sci, messages = TRUE, key = NULL, x = NULL, ...) {

Expand All @@ -61,7 +55,37 @@ get_iucn <- function(sci, messages = TRUE, key = NULL, x = NULL, ...) {
sci <- x
}

raw_data <- get_iucn_data(sci = sci, messages = messages, key = key, ...)

out <- lapply(names(raw_data), function(input_name) {
result <- raw_data[[input_name]]
if (length(result) == 1 && is.na(result)) {
id <- NA_character_
att <- "not found"
} else {
id <- result$taxon$sis_id
direct <- tolower(result$taxon$scientific_name) == tolower(input_name)
att <- "found"
}
list(id = id, name = input_name, att = att, direct = direct)
})

ids <- structure(as.character(unlist(pluck(out, "id"))), class = "iucn",
match = pluck_un(out, "att", ""),
name = pluck_un(out, "name", ""))
add_uri(ids, get_url_templates$iucn)
}


#' Get a IUCN Redlist taxon data
#'
#' Used to get IUCN data for other functions to use.
#'
#' @param If `TRUE`, latest use [rredlist::rl_species_latest()] instead of [rredlist::rl_species()]
#'
#' @keywords internal
get_iucn_data <- function(sci, messages = TRUE, key = NULL, latest = FALSE, ...) {

if (inherits(sci, "character")) {
tstate <- taxon_state$new(class = "iucn", names = sci)
items <- sci
Expand All @@ -71,59 +95,53 @@ get_iucn <- function(sci, messages = TRUE, key = NULL, x = NULL, ...) {
sci <- tstate$taxa_remaining()
items <- c(sci, tstate$taxa_completed())
}

prog <- progressor$new(items = items, suppress = !messages)
done <- tstate$get()
for (i in seq_along(done)) prog$completed(names(done)[i], done[[i]]$att)
prog$prog_start()


if (latest) {
rl_func_to_use <- rredlist::rl_species_latest
} else {
rl_func_to_use <- rredlist::rl_species
}

for (i in seq_along(sci)) {
direct <- FALSE
mssg(messages, "\nRetrieving data for taxon '", sci[i], "'\n")
df <- rredlist::rl_search(sci[i], key = key, ...)

if (!inherits(df$result, "data.frame") || NROW(df$result) == 0) {
id <- NA_character_
parts <- strsplit(sci[i], split = ' +')[[1]]
result <- tryCatch(
{
if (length(parts) == 2) {
tmp <- rl_func_to_use(genus = parts[1], species = parts[2], key = key, ...)
} else {
tmp <- rl_func_to_use(genus = parts[1], species = parts[2], infra = parts[3], key = key, ...)
}
tmp
},
error = function(e) {
NA_integer_
}
)

if (length(result) == 1 && is.na(result)) {
att <- "not found"
} else {
df <- df$result[, c("taxonid", "scientific_name", "kingdom",
"phylum", "order", "family", "genus", "authority")]

# should return NA if species not found
if (NROW(df) == 0) {
mssg(messages, tx_msg_not_found)
id <- NA_character_
att <- "not found"
}

# check for direct match
direct <- match(tolower(df$scientific_name), tolower(sci[i]))

if (!all(is.na(direct))) {
id <- df$taxonid[!is.na(direct)]
direct <- TRUE
att <- "found"
} else {
direct <- FALSE
id <- df$taxonid
att <- "found"
}
# multiple matches not possible because no real search
att <- "found"
}
res <- list(id = id, name = sci[i], att = att, direct = direct)
prog$completed(sci[i], att)
prog$prog(att)
tstate$add(sci[i], res)
tstate$add(sci[i], result)
}
out <- tstate$get()
ids <- structure(as.character(unlist(pluck(out, "id"))), class = "iucn",
match = pluck_un(out, "att", ""),
name = pluck_un(out, "name", ""))
on.exit(prog$prog_summary(), add = TRUE)
on.exit(tstate$exit, add = TRUE)
add_uri(ids, get_url_templates$iucn)
return(out)
}



#' @export
#' @rdname get_iucn
as.iucn <- function(x, check = TRUE, key = NULL) {
Expand Down
102 changes: 53 additions & 49 deletions R/gnr_resolve.R
Original file line number Diff line number Diff line change
@@ -1,67 +1,72 @@
#' Resolve names using Global Names Resolver
#'
#' NOTE: this function is depreciated and will be removed in a future version.
#' The service this function interacts with is no longer maintained and has been
#' replaced by GNA Verifier, which can be used with the [gna_verifier()]
#' function.
#'
#' See section **Age of datasets in the Global Names Resolver**
#'
#' @export
#' @param sci character; taxonomic names to be resolved. Doesn't work for
#' vernacular/common names.
#' @param data_source_ids character; IDs to specify what data source
#' is searched. See [gnr_datasources()].
#' vernacular/common names.
#' @param data_source_ids character; IDs to specify what data source is
#' searched. See [gnr_datasources()].
#' @param resolve_once logical; Find the first available match instead of
#' matches across all data sources with all possible renderings of a name.
#' When `TRUE`, response is rapid but incomplete.
#' matches across all data sources with all possible renderings of a name.
#' When `TRUE`, response is rapid but incomplete.
#' @param with_context logical; Reduce the likelihood of matches to taxonomic
#' homonyms. When `TRUE` a common taxonomic context is calculated for
#' all supplied names from matches in data sources that have classification
#' tree paths. Names out of determined context are penalized during score
#' calculation.
#' homonyms. When `TRUE` a common taxonomic context is calculated for all
#' supplied names from matches in data sources that have classification tree
#' paths. Names out of determined context are penalized during score
#' calculation.
#' @param canonical logical; If `FALSE` (default), gives back names with
#' taxonomic authorities. If `TRUE`, returns canocial names
#' (without tax. authorities and abbreviations).
#' taxonomic authorities. If `TRUE`, returns canocial names (without tax.
#' authorities and abbreviations).
#' @param highestscore logical; Return those names with the highest score for
#' each searched name? Defunct
#' each searched name? Defunct
#' @param best_match_only (logical) If `TRUE`, best match only returned.
#' Default: `FALSE`
#' @param preferred_data_sources (character) A vector of one or more data
#' source IDs.
#' @param with_canonical_ranks (logical) Returns names with infraspecific
#' ranks, if present. If `TRUE`, we force `canonical=TRUE`, otherwise
#' this parameter would have no effect. Default: `FALSE`
#' Default: `FALSE`
#' @param preferred_data_sources (character) A vector of one or more data source
#' IDs.
#' @param with_canonical_ranks (logical) Returns names with infraspecific ranks,
#' if present. If `TRUE`, we force `canonical=TRUE`, otherwise this parameter
#' would have no effect. Default: `FALSE`
#' @param http The HTTP method to use, one of "get" or "post". Default: "get".
#' Use `http="post"` with large queries. Queries with > 300 records
#' use "post" automatically because "get" would fail
#' Use `http="post"` with large queries. Queries with > 300 records use "post"
#' automatically because "get" would fail
#' @param names Deprecated, see `sci`
#' @param ... Curl options passed on to [crul::HttpClient]
#' @param cap_first (logical) For each name, fix so that the first name part is
#' capitalized, while others are not. This web service is sensitive to
#' capitalization, so you'll get different results depending on capitalization.
#' First name capitalized is likely what you'll want and is the default.
#' If `FALSE`, names are not modified. Default: `TRUE`
#' @param fields (character) One of minimal (default) or all. Minimal gives
#' back just four fields, whereas all gives all fields back.
#' capitalized, while others are not. This web service is sensitive to
#' capitalization, so you'll get different results depending on
#' capitalization. First name capitalized is likely what you'll want and is
#' the default. If `FALSE`, names are not modified. Default: `TRUE`
#' @param fields (character) One of minimal (default) or all. Minimal gives back
#' just four fields, whereas all gives all fields back.
#'
#' @author Scott Chamberlain
#' @return A data.frame with one attribute `not_known`: a character
#' vector of taxa unknown to the Global Names Index. Access like
#' `attr(output, "not_known")`, or `attributes(output)$not_known`.
#' @author Scott Chamberlain
#' @return A data.frame with one attribute `not_known`: a character vector of
#' taxa unknown to the Global Names Index. Access like `attr(output,
#' "not_known")`, or `attributes(output)$not_known`.
#'
#' Columns of the output data.frame:
#' Columns of the output data.frame:
#' * user_supplied_name (character) - the name you passed in to the
#' `names` parameter, unchanged.
#' `names` parameter, unchanged.
#' * submitted_name (character) - the actual name submitted to the GNR
#' service
#' service
#' * data_source_id (integer/numeric) - data source ID
#' * data_source_title (character) - data source name
#' * gni_uuid (character) - Global Names Index UUID (aka identifier)
#' * matched_name (character) - the matched name in the GNR service
#' * matched_name2 (character) - returned if `canonical=TRUE`, in
#' which case **matched_name** is not returned
#' which case **matched_name** is not returned
#' * classification_path (character) - names of the taxonomic
#' classification tree, with names separated by pipes (`|`)
#' classification tree, with names separated by pipes (`|`)
#' * classification_path_ranks (character) - ranks of the taxonomic
#' classification tree, with names separated by pipes (`|`)
#' classification tree, with names separated by pipes (`|`)
#' * classification_path_ids (character) - identifiers of the taxonomic
#' classification tree, with names separated by pipes (`|`)
#' classification tree, with names separated by pipes (`|`)
#' * taxon_id (character) - taxon identifier
#' * edit_distance (integer/numeric) - edit distance
#' * imported_at (character) - date imported
Expand All @@ -75,23 +80,20 @@
#' * current_taxon_id (character) - current taxon id
#' * current_name_string (character) - current name string
#'
#' Note that names (i.e. rows) are dropped that are NA, are zero length
#' strings, are not character vectors, or are not found by the API.
#' Note that names (i.e. rows) are dropped that are NA, are zero length
#' strings, are not character vectors, or are not found by the API.
#'
#' @section Age of datasets in the Global Names Resolver:
#' IMPORTANT: Datasets used in the Global Names Resolver vary in how recently
#' they've been updated. See the `updated_at` field in the
#' output of [gnr_datasources()] for dates when each dataset
#' was last updated.
#' @section Age of datasets in the Global Names Resolver: IMPORTANT: Datasets
#' used in the Global Names Resolver vary in how recently they've been
#' updated. See the `updated_at` field in the output of [gnr_datasources()]
#' for dates when each dataset was last updated.
#'
#' @section preferred_data_sources:
#' If `preferred_data_sources` is used, only the preferred data
#' is returned - if it has any results.
#' @section preferred_data_sources: If `preferred_data_sources` is used, only
#' the preferred data is returned - if it has any results.
#'
#' @seealso [gnr_datasources()]
#' @keywords resolve names taxonomy
#' @references http://gnrd.globalnames.org/api
#' http://gnrd.globalnames.org/
#' @references http://gnrd.globalnames.org/api http://gnrd.globalnames.org/
#' @examples \dontrun{
#' gnr_resolve(sci = c("Helianthus annuus", "Homo sapiens"))
#' gnr_resolve(sci = c("Asteraceae", "Plantae"))
Expand Down Expand Up @@ -131,6 +133,8 @@ gnr_resolve <- function(sci, data_source_ids = NULL, resolve_once = FALSE,
with_canonical_ranks = FALSE, http = "get", cap_first = TRUE,
fields = "minimal", names = NULL, ...) {

lifecycle::deprecate_warn(what = "taxize::gnr_resolve()", with = "taxize::gna_verifier()", when = "v0.9.103")

if (!is.null(names)) {
lifecycle::deprecate_warn(when = "v0.9.97", what = "gnr_resolve(names)", with = "gnr_resolve(sci)")
sci <- names
Expand Down
48 changes: 31 additions & 17 deletions R/iucn_id.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' Get an ID for a IUCN listed taxon
#'
#' @export
#' @param sciname character; Scientific name. Should be cleand and in the
#' @param sciname character; Scientific name. Should be cleaned and in the
#' format `*<Genus> <Species>*`. One or more.
#' @param key (character) required. you IUCN Redlist API key. See
#' [rredlist::rredlist-package] for help on authenticating with
Expand All @@ -20,32 +20,46 @@
#' iucn_id(c("Panthera uncia", "Lynx lynx"))
#'
#' # many names, some not found
#' iucn_id(c("Panthera uncia", "Lynx lynx", "foo bar", "hello world"))
#' iucn_id(c("Panthera uncia", "Lynx lynx", "foo bar", "Gorilla gorilla gorilla"))
#'
#' # a name not found
#' iucn_id("Foo bar")
#' }
iucn_id <- function(sciname, key = NULL, ...) {
out <- list()
for (i in seq_along(sciname)) {
out[[i]] <- get_iucn_id(sciname[[i]], key = key, ...)
parts <- strsplit(sciname, split = ' +')
lengths <- vapply(parts, length, FUN.VALUE = numeric(1))
invalid_lengths <- lengths <= 1 | lengths > 3
if (any(invalid_lengths)) {
error_list <- paste0(' ', which(invalid_lengths), ': "', sciname[invalid_lengths], '"')
if (length(error_list) > 100) {
error_list <- c(error_list[1:100], ' ...')
}
stop(
'The following inputs have the incorrect number of elements in their species name:\n',
paste0(error_list, collapse = '\n')
)
}

for (i in seq_along(parts)) {
out[[i]] <- get_iucn_id(parts[[i]], key = key, ...)
}
unlist(out)
}

get_iucn_id <- function(z, key = NULL, ...) {
tmp <- rredlist::rl_search(z, key = key, ...)
if (NROW(tmp$result) == 0) {
NA
} else if (NROW(tmp$result) > 1) {
iduniq <- unique(tmp$result$taxonid)
if (length(iduniq) == 1) {
iduniq
} else {
stop("> 1 result found, alter your query", call. = FALSE)
get_iucn_id <- function(parts, key = NULL, ...) {
tryCatch(
{
if (length(parts) == 2) {
tmp <- rredlist::rl_species(genus = parts[1], species = parts[2], key = key, ...)
} else {
tmp <- rredlist::rl_species(genus = parts[1], species = parts[2], infra = parts[3], key = key, ...)
}
tmp$taxon$sis_id
},
error = function(e) {
NA_integer_
}
} else {
tmp$result$taxonid
}
)
}

Loading

0 comments on commit 243afde

Please sign in to comment.