Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
Merge branch 'master' of github.com:ropensci/taxize

# Conflicts:
#	DESCRIPTION
  • Loading branch information
Zachary Foster committed Feb 4, 2025
2 parents 2a10dd6 + 9cf4aec commit ae6b2fa
Show file tree
Hide file tree
Showing 12 changed files with 134 additions and 166 deletions.
6 changes: 2 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Description: Interacts with a suite of web APIs for taxonomic tasks,
Integrated Taxonomic Information System (<https://itis.gov/web_service.html>),
and many more. Links to the API documentation for other supported APIs are
available in the documentaion for their respective functions in this package.
Version: 0.9.102
Version: 0.10.0
License: MIT + file LICENSE
URL: https://docs.ropensci.org/taxize/ (website),
https://github.com/ropensci/taxize (devel),
Expand Down Expand Up @@ -71,7 +71,7 @@ Authors@R:
role = "ctb"),
person(given = "rOpenSci",
role = "fnd",
comment = "https://ropensci.org/"))
comment = c(ROR = "019jywm96")))
LazyLoad: yes
LazyData: true
Encoding: UTF-8
Expand Down Expand Up @@ -104,7 +104,6 @@ Imports:
curl,
stringi
Suggests:
knitr,
testthat,
vegan,
vcr
Expand All @@ -113,4 +112,3 @@ Roxygen: list(markdown = TRUE)
X-schema.org-applicationCategory: Taxonomy
X-schema.org-keywords: taxonomy, biology, nomenclature, JSON, API, web, api-client, identifiers, species, names
X-schema.org-isPartOf: https://ropensci.org
VignetteBuilder: knitr
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
taxize 0.10.0
=============

* Replaced depreciated GNR functions with analogous GNA functions such as `gna_verifier` and `gna_parse`
* Rewrote `scrapenames` for the new API
* Updated use of `rredlist` to reflect new API changes
* Many bug fixes


taxize 0.9.101
=============

* Add `rworkflows`.
- Update *.Rbuildignore* for `rworkflows`.
Expand Down
5 changes: 5 additions & 0 deletions R/class2tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,12 @@
#' }
#'
class2tree <- function(input, varstep = TRUE, check = TRUE, remove_shared = FALSE, ...) {

if (any(is.na(input))) {
message('Removed species without classification')
input <- input[!is.na(input)]
}


# Check that there is more than 2 taxon
if (length(input) < 3)
Expand All @@ -78,6 +80,9 @@ class2tree <- function(input, varstep = TRUE, check = TRUE, remove_shared = FALS
if (length(unique(names(input))) < length(names(input)))
stop("Input list of classifications contains duplicates")

# Convert tibbles to data.frames
input <- lapply(input, as.data.frame)

# Get rank and ID list
message('Get all ranks and their taxIDs')
rankList <- dt2df(lapply(input, get_rank), idcol = FALSE)
Expand Down
3 changes: 2 additions & 1 deletion R/gbif_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ gbif_name_backbone <- function(name, rank = NULL, kingdom = NULL, phylum = NULL,
class = NULL, order = NULL, family = NULL, genus = NULL, strict = FALSE,
start = NULL, limit = 500, ...) {


url = 'https://api.gbif.org/v1/species/match'
args <- tc(list(name = name, rank = rank, kingdom = kingdom,
phylum = phylum, class = class, order = order, family = family,
Expand All @@ -13,7 +14,7 @@ gbif_name_backbone <- function(name, rank = NULL, kingdom = NULL, phylum = NULL,
temp$raise_for_status()
tt <- jsonlite::fromJSON(temp$parse("UTF-8"), FALSE)

if (all(names(tt) %in% c('confidence', 'synonym', 'matchType'))) {
if (all(names(tt) %in% c('confidence', 'synonym', 'matchType', "note"))) {
data.frame(NULL)
} else {
dd <- data.table::setDF(
Expand Down
4 changes: 4 additions & 0 deletions R/get_gbifid.R
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ get_gbifid <- function(sci, ask = TRUE, messages = TRUE, rows = NA,
assert(rank, "character")
assert(method, "character")
assert_rows(rows)

if (!is.null(sciname)) {
lifecycle::deprecate_warn(when = "v0.9.97", what = "get_gbifid(sciname)", with = "get_gbifid(sci)")
sci <- sciname
Expand All @@ -155,6 +156,9 @@ get_gbifid <- function(sci, ask = TRUE, messages = TRUE, rows = NA,
items <- c(sci, tstate$taxa_completed())
}

# Escape problematic characters
sci <- gsub(sci, pattern = "[^\\]?'", replacement = "\\\\'")

prog <- progressor$new(items = items, suppress = !messages)
done <- tstate$get()
for (i in seq_along(done)) prog$completed(names(done)[i], done[[i]]$att)
Expand Down
86 changes: 49 additions & 37 deletions R/gna_verifier.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@
#' is not included in data-sources.
#' @param main_taxon_threshold A `numeric` vector from 0.5 to 1. This sets the
#' minimal percentage for the main taxon discovery.
#' @param output_type A `character` vector of length 1, one of `table`, `list`,
#' `json`, indicating the format of the output. The tabular output only
#' contains values that consistently appear in all results, so `list` or
#' `json` output can have additional information. For `list` and `json`
#' outputs, only values for unique taxon names are returned, but the `table`
#' output has rows that correspond 1-1 with the input data.
#' @param output_type A `character` vector of length 1, either `table` or
#' `list`, indicating the format of the output. The tabular output only
#' contains values that consistently appear in all results, so `list` output
#' can have additional information. For `list` and `json` outputs, only values
#' for unique taxon names are returned, but the `table` output has rows that
#' correspond 1-1 with the input data.
#' @param ... Curl options passed on to [crul::HttpClient]
#'
#' @return Depends on the value of the `output_type` option
Expand All @@ -52,6 +52,8 @@ gna_verifier <- function(
output_type = 'table',
...
) {
batch_size <- 100 # How many names to lookup with each api call

# Parse and verify input options
data_sources <- as.character(data_sources)
is_number <- grepl(data_sources, pattern = '[0-9]+')
Expand Down Expand Up @@ -79,33 +81,38 @@ gna_verifier <- function(

# Convert input to unique values to avoid redundant API wprk
unique_names <- unique(names)
name_batches <- split(unique_names, ceiling(seq_along(unique_names) / batch_size))

# Format the API GET request
base_url <- 'https://verifier.globalnames.org/'
args <- c(
data_sources = paste0(data_sources, collapse = '|'),
all_matches = tolower(as.character(all_matches)),
capitalize = tolower(as.character(capitalize)),
species_group = tolower(as.character(species_group)),
fuzzy_uninomial = tolower(as.character(fuzzy_uninomial)),
stats = tolower(as.character(stats))
)
formatted_args <- paste0(paste0(names(args), '=', args), collapse = '&')
formatted_path <- paste0(
'api/v1/verifications/',
paste0(unique_names, collapse = '|'),
'?', formatted_args
)

# Make and parse API call
api <- crul::HttpClient$new(base_url, headers = tx_ual, opts = list(...))
response <- api$get(path = formatted_path)
response$raise_for_status()
response_json <- response$parse("UTF-8")
if (output_type == 'json') {
return(response_json)
}
response_data <- jsonlite::fromJSON(response_json, FALSE)
batch_data <- lapply(name_batches, function(batch) {
# Format the API GET request
base_url <- 'https://verifier.globalnames.org/'
args <- c(
data_sources = paste0(data_sources, collapse = '|'),
all_matches = tolower(as.character(all_matches)),
capitalize = tolower(as.character(capitalize)),
species_group = tolower(as.character(species_group)),
fuzzy_uninomial = tolower(as.character(fuzzy_uninomial)),
stats = tolower(as.character(stats))
)
formatted_args <- paste0(paste0(names(args), '=', args), collapse = '&')
formatted_path <- paste0(
'api/v1/verifications/',
paste0(batch, collapse = '|'),
'?', formatted_args
)

# Make and parse API call
api <- crul::HttpClient$new(base_url, headers = tx_ual, opts = list(...))
response <- api$get(path = formatted_path)
response$raise_for_status()
response_json <- response$parse("UTF-8")
response_data <- jsonlite::fromJSON(response_json, FALSE)
return(response_data$names)
})

# Combine batch data to a single list
response_data <- unlist(batch_data, recursive = FALSE)
names(response_data) <- unique_names
if (output_type == 'list') {
return(response_data)
}
Expand All @@ -117,7 +124,6 @@ gna_verifier <- function(
'dataSourceTitleShort',
'curation',
'recordId',
'outlink',
'entryDate',
'sortScore',
'matchedNameID',
Expand Down Expand Up @@ -145,19 +151,25 @@ gna_verifier <- function(
'parsingQualityScore'
)
convert_entry_to_row <- function(x, input_name) {
output <- c(input_name, unlist(x))
names(output)[1] <- 'submittedName'
if (is.null(x)) { # If there was no match
output <- c(input_name, rep(NA, length(used_cols)))
names(output) <- c('submittedName', used_cols)
output['matchType'] <- 'NoMatch'
} else {
output <- c(input_name, unlist(x))
names(output)[1] <- 'submittedName'
}
parsed_names <- vapply(strsplit(names(output), split = '\\.'),
function(y) y[length(y)], FUN.VALUE = character(1))
names(output) <- parsed_names
return(as.data.frame(as.list(output[used_cols])))
}
if (all_matches) {
response_table <- do.call(rbind, lapply(response_data$names, function(x) {
response_table <- do.call(rbind, lapply(response_data, function(x) {
do.call(rbind, lapply(x$results, function(y) convert_entry_to_row(y, x$name)))
}))
} else {
response_table <- do.call(rbind, lapply(response_data$names, function(x) {
response_table <- do.call(rbind, lapply(response_data, function(x) {
convert_entry_to_row(x$bestResult, x$name)
}))
}
Expand Down
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
[![Project Status: Active – The project has reached a stable, usable
state and is being actively
developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
[![cran
checks](https://badges.cranchecks.info/worst/taxize.svg)](https://cran.r-project.org/web/checks/check_results_taxize.html)
[![rworkflows](https://github.com/ropensci/taxize/workflows/rworkflows/badge.svg)](https://github.com/ropensci/taxize/actions/)
[![codecov](https://codecov.io/gh/ropensci/taxize/branch/master/graph/badge.svg)](https://app.codecov.io/gh/ropensci/taxize)
[![rstudio mirror
Expand Down
6 changes: 4 additions & 2 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,17 @@ reference:
- title: "Name Resolution Services"
contents:
- gni_details
- gni_search
- starts_with("gnr_")
- iplant_resolve
- tol_resolve
- gna_search
- gna_data_sources
- title: "Name Parsers"
contents:
- gn_parse
- gni_parse
- gbif_parse
- gna_verifier
- gna_parse
- title: "NCBI Taxonomy"
contents:
- starts_with("ncbi_")
Expand Down
Loading

0 comments on commit ae6b2fa

Please sign in to comment.