merge

Merge branch 'master' of github.com:ropensci/taxize # Conflicts: # DESCRIPTION
ropensci · Feb 4, 2025 · ae6b2fa · ae6b2fa
2 parents 2a10dd6 + 9cf4aec
commit ae6b2fa
Show file tree

Hide file tree

Showing 12 changed files with 134 additions and 166 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -11,7 +11,7 @@ Description: Interacts with a suite of web APIs for taxonomic tasks,
     Integrated Taxonomic Information System (<https://itis.gov/web_service.html>),
     and many more. Links to the API documentation for other supported APIs are
     available in the documentaion for their respective functions in this package.
-Version: 0.9.102
+Version: 0.10.0
 License: MIT + file LICENSE
 URL: https://docs.ropensci.org/taxize/ (website),
     https://github.com/ropensci/taxize (devel),
@@ -71,7 +71,7 @@ Authors@R:
              role = "ctb"),
       person(given = "rOpenSci",
              role = "fnd",
-             comment = "https://ropensci.org/"))
+             comment = c(ROR = "019jywm96")))
 LazyLoad: yes
 LazyData: true
 Encoding: UTF-8
@@ -104,7 +104,6 @@ Imports:
     curl,
     stringi
 Suggests:
-    knitr,
     testthat,
     vegan,
     vcr
@@ -113,4 +112,3 @@ Roxygen: list(markdown = TRUE)
 X-schema.org-applicationCategory: Taxonomy
 X-schema.org-keywords: taxonomy, biology, nomenclature, JSON, API, web, api-client, identifiers, species, names
 X-schema.org-isPartOf: https://ropensci.org
-VignetteBuilder: knitr
diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,14 @@
+taxize 0.10.0
+=============
+
+* Replaced depreciated GNR functions with analogous GNA functions such as `gna_verifier` and `gna_parse`
+* Rewrote `scrapenames` for the new API
+* Updated use of `rredlist` to reflect new API changes
+* Many bug fixes
+
+
 taxize 0.9.101
+=============
 
 * Add `rworkflows`.
   - Update *.Rbuildignore* for `rworkflows`.

diff --git a/R/class2tree.R b/R/class2tree.R
@@ -66,10 +66,12 @@
 #' }
 #' 
 class2tree <- function(input, varstep = TRUE, check = TRUE, remove_shared = FALSE, ...) {
+
   if (any(is.na(input))) {
     message('Removed species without classification')
     input <- input[!is.na(input)]
   }
+
 
   # Check that there is more than 2 taxon
   if (length(input) < 3)
@@ -78,6 +80,9 @@ class2tree <- function(input, varstep = TRUE, check = TRUE, remove_shared = FALS
   if (length(unique(names(input))) < length(names(input)))
     stop("Input list of classifications contains duplicates")
 
+  # Convert tibbles to data.frames
+  input <- lapply(input, as.data.frame)
+
   # Get rank and ID list
   message('Get all ranks and their taxIDs')
   rankList <- dt2df(lapply(input, get_rank), idcol = FALSE)

diff --git a/R/gbif_helpers.R b/R/gbif_helpers.R
@@ -3,6 +3,7 @@ gbif_name_backbone <- function(name, rank = NULL, kingdom = NULL, phylum = NULL,
   class = NULL, order = NULL, family = NULL, genus = NULL, strict = FALSE,
   start = NULL, limit = 500, ...) {
 
+
   url = 'https://api.gbif.org/v1/species/match'
   args <- tc(list(name = name, rank = rank, kingdom = kingdom,
                   phylum = phylum, class = class, order = order, family = family,
@@ -13,7 +14,7 @@ gbif_name_backbone <- function(name, rank = NULL, kingdom = NULL, phylum = NULL,
   temp$raise_for_status()
   tt <- jsonlite::fromJSON(temp$parse("UTF-8"), FALSE)
 
-  if (all(names(tt) %in% c('confidence', 'synonym', 'matchType'))) {
+  if (all(names(tt) %in% c('confidence', 'synonym', 'matchType', "note"))) {
     data.frame(NULL)
   } else {
     dd <- data.table::setDF(

diff --git a/R/get_gbifid.R b/R/get_gbifid.R
@@ -140,6 +140,7 @@ get_gbifid <- function(sci, ask = TRUE, messages = TRUE, rows = NA,
   assert(rank, "character")
   assert(method, "character")
   assert_rows(rows)
+
   if (!is.null(sciname)) {
     lifecycle::deprecate_warn(when = "v0.9.97", what = "get_gbifid(sciname)", with = "get_gbifid(sci)")
     sci <- sciname
@@ -155,6 +156,9 @@ get_gbifid <- function(sci, ask = TRUE, messages = TRUE, rows = NA,
     items <- c(sci, tstate$taxa_completed())
   }
 
+  # Escape problematic characters
+  sci <- gsub(sci, pattern = "[^\\]?'", replacement = "\\\\'")
+
   prog <- progressor$new(items = items, suppress = !messages)
   done <- tstate$get()
   for (i in seq_along(done)) prog$completed(names(done)[i], done[[i]]$att)

diff --git a/R/gna_verifier.R b/R/gna_verifier.R
@@ -24,12 +24,12 @@
 #'   is not included in data-sources.
 #' @param main_taxon_threshold A `numeric` vector from 0.5 to 1. This sets the
 #'   minimal percentage for the main taxon discovery.
-#' @param output_type A `character` vector of length 1, one of `table`, `list`,
-#'   `json`, indicating the format of the output. The tabular output only
-#'   contains values that consistently appear in all results, so `list` or
-#'   `json` output can have additional information. For `list` and `json`
-#'   outputs, only values for unique taxon names are returned, but the `table`
-#'   output has rows that correspond 1-1 with the input data.
+#' @param output_type A `character` vector of length 1, either `table` or
+#'   `list`, indicating the format of the output. The tabular output only
+#'   contains values that consistently appear in all results, so `list` output
+#'   can have additional information. For `list` and `json` outputs, only values
+#'   for unique taxon names are returned, but the `table` output has rows that
+#'   correspond 1-1 with the input data.
 #' @param ... Curl options passed on to [crul::HttpClient]
 #'
 #' @return Depends on the value of the `output_type` option
@@ -52,6 +52,8 @@ gna_verifier <- function(
     output_type = 'table',
     ...
 ) {
+  batch_size <- 100 # How many names to lookup with each api call
+
   # Parse and verify input options
   data_sources <- as.character(data_sources)
   is_number <- grepl(data_sources, pattern = '[0-9]+')
@@ -79,33 +81,38 @@ gna_verifier <- function(
 
   # Convert input to unique values to avoid redundant API wprk
   unique_names <- unique(names)
+  name_batches <- split(unique_names, ceiling(seq_along(unique_names) / batch_size)) 
 
-  # Format the API GET request
-  base_url <- 'https://verifier.globalnames.org/'
-  args <- c(
-    data_sources = paste0(data_sources, collapse = '|'),
-    all_matches = tolower(as.character(all_matches)),
-    capitalize = tolower(as.character(capitalize)),
-    species_group = tolower(as.character(species_group)),
-    fuzzy_uninomial = tolower(as.character(fuzzy_uninomial)),
-    stats = tolower(as.character(stats))
-  )
-  formatted_args <- paste0(paste0(names(args), '=', args), collapse = '&')
-  formatted_path <- paste0(
-    'api/v1/verifications/',
-    paste0(unique_names, collapse = '|'),
-    '?', formatted_args
-  )
-
-  # Make and parse API call
-  api <- crul::HttpClient$new(base_url, headers = tx_ual, opts = list(...))
-  response <- api$get(path = formatted_path)
-  response$raise_for_status()
-  response_json <- response$parse("UTF-8")
-  if (output_type == 'json') {
-    return(response_json)
-  }
-  response_data <- jsonlite::fromJSON(response_json, FALSE)
+  batch_data <- lapply(name_batches, function(batch) {
+    # Format the API GET request
+    base_url <- 'https://verifier.globalnames.org/'
+    args <- c(
+      data_sources = paste0(data_sources, collapse = '|'),
+      all_matches = tolower(as.character(all_matches)),
+      capitalize = tolower(as.character(capitalize)),
+      species_group = tolower(as.character(species_group)),
+      fuzzy_uninomial = tolower(as.character(fuzzy_uninomial)),
+      stats = tolower(as.character(stats))
+    )
+    formatted_args <- paste0(paste0(names(args), '=', args), collapse = '&')
+    formatted_path <- paste0(
+      'api/v1/verifications/',
+      paste0(batch, collapse = '|'),
+      '?', formatted_args
+    )
+
+    # Make and parse API call
+    api <- crul::HttpClient$new(base_url, headers = tx_ual, opts = list(...))
+    response <- api$get(path = formatted_path)
+    response$raise_for_status()
+    response_json <- response$parse("UTF-8")
+    response_data <- jsonlite::fromJSON(response_json, FALSE)
+    return(response_data$names)
+  })
+
+  # Combine batch data to a single list
+  response_data <- unlist(batch_data, recursive = FALSE)
+  names(response_data) <- unique_names
   if (output_type == 'list') {
     return(response_data)
   }
@@ -117,7 +124,6 @@ gna_verifier <- function(
     'dataSourceTitleShort',
     'curation',
     'recordId',
-    'outlink',
     'entryDate', 
     'sortScore',
     'matchedNameID',
@@ -145,19 +151,25 @@ gna_verifier <- function(
     'parsingQualityScore'
   )
   convert_entry_to_row <- function(x, input_name) {
-    output <- c(input_name, unlist(x))
-    names(output)[1] <- 'submittedName'
+    if (is.null(x)) { # If there was no match
+      output <- c(input_name, rep(NA, length(used_cols)))
+      names(output) <- c('submittedName', used_cols)
+      output['matchType'] <- 'NoMatch'
+    } else {
+      output <- c(input_name, unlist(x))
+      names(output)[1] <- 'submittedName'
+    }
     parsed_names <- vapply(strsplit(names(output), split = '\\.'),
                            function(y) y[length(y)], FUN.VALUE = character(1))
     names(output) <- parsed_names
     return(as.data.frame(as.list(output[used_cols])))
   }
   if (all_matches) {
-    response_table <- do.call(rbind, lapply(response_data$names, function(x) {
+    response_table <- do.call(rbind, lapply(response_data, function(x) {
       do.call(rbind, lapply(x$results, function(y) convert_entry_to_row(y, x$name)))
     }))    
   } else {
-    response_table <- do.call(rbind, lapply(response_data$names, function(x) {
+    response_table <- do.call(rbind, lapply(response_data, function(x) {
       convert_entry_to_row(x$bestResult, x$name)
     }))
   }

diff --git a/README.md b/README.md
@@ -4,8 +4,6 @@
 [![Project Status: Active – The project has reached a stable, usable
 state and is being actively
 developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
-[![cran
-checks](https://badges.cranchecks.info/worst/taxize.svg)](https://cran.r-project.org/web/checks/check_results_taxize.html)
 [![rworkflows](https://github.com/ropensci/taxize/workflows/rworkflows/badge.svg)](https://github.com/ropensci/taxize/actions/)
 [![codecov](https://codecov.io/gh/ropensci/taxize/branch/master/graph/badge.svg)](https://app.codecov.io/gh/ropensci/taxize)
 [![rstudio mirror

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -54,15 +54,17 @@ reference:
   - title: "Name Resolution Services"
     contents:
       - gni_details
-      - gni_search
       - starts_with("gnr_")
       - iplant_resolve
       - tol_resolve
+      - gna_search
+      - gna_data_sources
   - title: "Name Parsers"
     contents:
-      - gn_parse
       - gni_parse
       - gbif_parse
+      - gna_verifier
+      - gna_parse
   - title: "NCBI Taxonomy"
     contents:
       - starts_with("ncbi_")