working on fixing functions using GNR

ropensci · Oct 23, 2024 · 5d2b66c · 5d2b66c
1 parent 78d0d72
commit 5d2b66c
Show file tree

Hide file tree

Showing 43 changed files with 75,585 additions and 505 deletions.
diff --git a/R/gni_parse.R b/R/gni_parse.R
@@ -1,55 +1,45 @@
 #' Parse scientific names using EOL's name parser.
 #'
+#' THIS FUNCTION IS DEFUNCT.
+#' 
 #' @export
-#' @param names A vector of length 1 or more of taxonomic names
-#' @param ... Curl options passed on to [crul::verb-GET]
-#' @return A data.frame with results, the submitted names, and the
-#' parsed names with additional information.
-#' @seealso [gbif_parse()], [gn_parse()]
-#' @references http://gni.globalnames.org/
-#' @examples \dontrun{
-#' gni_parse("Cyanistes caeruleus")
-#' gni_parse("Plantago minor")
-#' gni_parse("Plantago minor minor")
-#' gni_parse(c("Plantago minor minor","Helianthus annuus texanus"))
-#'
-#' # pass on curl options
-#' gni_parse("Cyanistes caeruleus", verbose = TRUE)
-#' }
+#' @keywords internal
 gni_parse <- function(names, ...) {
-  names <- paste0(names, collapse = "|")
-  cli <- crul::HttpClient$new(paste0(gni_base(), "parsers.json"),
-    headers = tx_ual, opts = list(...))
-  tt <- cli$get(query = list(names = names))
-  tt$raise_for_status()
-  out <- jsonlite::fromJSON(tt$parse("UTF-8"), FALSE)
-  dt2df(lapply(out, gni_parser), idcol = FALSE)
+  .Defunct("ncbi_searcher", "traits",
+           msg = "This function is defunct. See gn_parse()")
+  # names <- paste0(names, collapse = "|")
+  # url <- paste0("https://parser.globalnames.org/api/v1/", names)
+  # cli <- crul::HttpClient$new(url, headers = tx_ual, opts = list(...))
+  # tt <- cli$get(query = list(cultivars = cultivars, csv = TRUE))
+  # tt$raise_for_status()
+  # out <- jsonlite::fromJSON(tt$parse("UTF-8"), FALSE)
+  # dt2df(lapply(out, gni_parser), idcol = FALSE)
 }
 
-gni_parser <- function(x) {
-  positions_names <- vapply(x$scientificName$positions, function(y)
-    paste("position_", y[[1]], sep = ""), "", USE.NAMES = FALSE)
-  nums <- vapply(x$scientificName$positions, function(y) y[[2]], 1,
-    USE.NAMES = FALSE)
-  pv <- data.frame(as.list(setNames(nums, positions_names)),
-    stringsAsFactors = FALSE)
-
-  nmz <- c("verbatim","canonical", "normalized","hybrid","parsed")
-  singles <- data.frame(x$scientificName[names(x$scientificName) %in% nmz],
-    stringsAsFactors = FALSE)
-
-  details2 <- data.frame()
-  if (x$scientificName$parsed) {
-    details_ <- x$scientificName$details[[1]]
-    details_ <- details_[!names(details_) %in% 'status']
-    details <- dt2df(Map(function(x, y) data.frame(y, x,
-      stringsAsFactors = FALSE), details_, names(details_)),
-      idcol = FALSE)[,-3]
-    details2 <- as.data.frame(t(data.frame(details[,2])))
-    names(details2) <- details[,1]
-    row.names(details2) <- NULL
-  }
-
-  data.frame(Filter(NROW, list(details2, singles, pv)),
-    stringsAsFactors = FALSE)
-}
+# gni_parser <- function(x) {
+#   positions_names <- vapply(x$scientificName$positions, function(y)
+#     paste("position_", y[[1]], sep = ""), "", USE.NAMES = FALSE)
+#   nums <- vapply(x$scientificName$positions, function(y) y[[2]], 1,
+#     USE.NAMES = FALSE)
+#   pv <- data.frame(as.list(setNames(nums, positions_names)),
+#     stringsAsFactors = FALSE)
+# 
+#   nmz <- c("verbatim","canonical", "normalized","hybrid","parsed")
+#   singles <- data.frame(x$scientificName[names(x$scientificName) %in% nmz],
+#     stringsAsFactors = FALSE)
+#   
+#   details2 <- data.frame()
+#   if (x$scientificName$parsed) {
+#     details_ <- x$scientificName$details[[1]]
+#     details_ <- details_[!names(details_) %in% 'status']
+#     details <- dt2df(Map(function(x, y) data.frame(y, x,
+#       stringsAsFactors = FALSE), details_, names(details_)),
+#       idcol = FALSE)[,-3]
+#     details2 <- as.data.frame(t(data.frame(details[,2])))
+#     names(details2) <- details[,1]
+#     row.names(details2) <- NULL
+#   }
+#   
+#   data.frame(Filter(NROW, list(details2, singles, pv)),
+#     stringsAsFactors = FALSE)
+# }
diff --git a/R/scrapenames.r b/R/scrapenames.r
@@ -1,38 +1,41 @@
 #' @title Resolve names using Global Names Recognition and Discovery.
 #'
 #' @description Uses the Global Names Recognition and Discovery service, see
-#' http://gnrd.globalnames.org/
+#'   http://gnrd.globalnames.org/
 #'
-#' Note: this function sometimes gives data back and sometimes not. The API
-#' that this function is extremely buggy.
+#'   Note: this function sometimes gives data back and sometimes not. The API
+#'   that this function is extremely buggy.
 #'
 #' @export
-#' @param url An encoded URL for a web page, PDF, Microsoft Office document, or
-#' image file, see examples
-#' @param file When using multipart/form-data as the content-type, a file may
-#' be sent. This should be a path to your file on your machine.
-#' @param text Type: string. Text content; best used with a POST request, see
-#' examples
-#' @param engine (optional) (integer) Default: 0. Either 1 for TaxonFinder,
-#' 2 for NetiNeti, or 0 for both. If absent, both engines are used.
-#' @param unique (optional) (logical) If `TRUE` (default), response has
-#' unique names without offsets.
-#' @param verbatim (optional) Type: boolean, If `TRUE` (default to
-#' `FALSE`), response excludes verbatim strings.
-#' @param detect_language (optional) Type: boolean, When `TRUE` (default),
-#' NetiNeti is not used if the language of incoming text is determined not to
-#' be English. When `FALSE`, NetiNeti will be used if requested.
-#' @param all_data_sources (optional) Type: boolean. Resolve found names
-#' against all available Data Sources.
-#' @param data_source_ids (optional) Type: string. Pipe separated list of
-#' data source ids to resolve found names against. See list of Data Sources
-#' http://resolver.globalnames.org/data_sources
-#' @param return_content (logical) return OCR'ed text. returns text
-#' string in `x$meta$content` slot. Default: `FALSE`
+#' @param url Defunct. Use the `text` input for URLs as well as text strings.
+#' @param file When using multipart/form-data as the content-type, a file may be
+#'   sent. This should be a path to your file on your machine.
+#' @param text A text (or URL pointing to a text) for name detection.
+#' @param engine (optional) (integer) Defunct. The API used no longer supports
+#'   this option.
+#' @param unique Defunct. See the `unique_names` option.
+#' @param unique_names (optional) (logical) If `TRUE` (the default), the output
+#'   returns unique names, instead of all name occurrences, without position
+#'   information of a name in the text.
+#' @param verbatim (optional) Defunct. The API used no longer supports this
+#'   option.
+#' @param detect_language (optional) Defunct. See the `language` option.
+#' @param language The language of the text. Language value is used for
+#'   calculation of Bayesian odds. If this parameter is not given, eng is used
+#'   by default. Currently only English and German languages are supported.
+#'   Valid values are: `eng`, `deu`, `detect`.
+#' @param all_data_sources (optional) Defunct. The API used no longer supports
+#'   this option.
+#' @param data_source_ids (optional) Defunct. See the `sources` option.
+#' @param sources Pipe separated list of data source ids to resolve found names
+#'   against. See list of Data Sources
+#'   http://resolver.globalnames.org/data_sources
+#' @param return_content (logical) return OCR'ed text. returns text string in
+#'   `x$meta$content` slot. Default: `FALSE`
 #' @param ... Further args passed to [crul::verb-GET]
-#' @author Scott Chamberlain 
+#' @author Scott Chamberlain
 #' @return A list of length two, first is metadata, second is the data as a
-#' data.frame.
+#'   data.frame.
 #' @details One of url, file, or text must be specified - and only one of them.
 #' @examples \dontrun{
 #' # Get data from a website using its URL
@@ -49,7 +52,7 @@
 #'
 #' # With arguments
 #' scrapenames(url = 'https://www.mapress.com/zootaxa/2012/f/z03372p265f.pdf',
-#'   unique=TRUE)
+#'   unique_names=TRUE)
 #' scrapenames(url = 'https://en.wikipedia.org/wiki/Spider',
 #'   data_source_ids=c(1, 169))
 #'
@@ -69,24 +72,56 @@
 #' scrapenames(url='https://www.mapress.com/zootaxa/2012/f/z03372p265f.pdf',
 #'   return_content = TRUE)
 #' }
-scrapenames <- function(url = NULL, file = NULL, text = NULL, engine = NULL,
-  unique = NULL, verbatim = NULL, detect_language = NULL,
-  all_data_sources = NULL, data_source_ids = NULL,
-  return_content = FALSE, ...) {
-
+scrapenames <- function(
+    url = NULL,
+    file = NULL,
+    text = NULL,
+    engine = NULL,
+    unique = NULL,
+    unique_names = NULL,
+    verbatim = NULL,
+    detect_language = NULL,
+    language = NULL,
+    all_data_sources = NULL,
+    data_source_ids = NULL,
+    sources = NULL,
+    return_content = FALSE, 
+    ...
+) {
+
+  # Error if defunct parameters are used.
+  if (!is.null(url)) {
+    stop(call. = FALSE, 'The `url` option is defunct. Use the `text` option for URLs as well as text strings.')
+  }
+  if (!is.null(unique)) {
+    stop(call. = FALSE, 'The `unique` option is defunct. See the `unique_names` option. ')
+  }
+  if (!is.null(engine)) {
+    stop(call. = FALSE, 'The `engine` option is defunct. The API no longer supports this option. ')
+  }
+  if (!is.null(detect_language)) {
+    stop(call. = FALSE, 'The `detect_language` option is defunct. See the `language` option. ')
+  }
+  if (!is.null(data_source_ids)) {
+    stop(call. = FALSE, 'The `data_source_ids` option is defunct. See the `source` option. ')
+  }
+
   method <- tc(list(url = url, file = file, text = text))
   if (length(method) > 1) {
     stop("Only one of url, file, or text can be used", call. = FALSE)
   }
 
-  base <- "http://gnrd.globalnames.org/name_finder.json"
+  base <- "http://gnrd.globalnames.org/api/v1/find"
   if (!is.null(data_source_ids))
     data_source_ids <- paste0(data_source_ids, collapse = "|")
-  args <- tc(list(url = url, text = text, engine = engine, unique = unique,
-                  verbatim = verbatim, detect_language = detect_language,
-                  all_data_sources = all_data_sources,
-                  data_source_ids = data_source_ids,
-                  return_content = as_l(return_content)))
+  args <- tc(list(
+    text = text,
+    unique_names = unique_names,
+    verbatim = verbatim,
+    language = language,
+    source = source,
+    return_content = as_l(return_content)
+  ))
   cli <- crul::HttpClient$new(base, headers = tx_ual, opts = list(...))
   if (names(method) == 'url') {
     tt <- cli$get(query = args)
@@ -116,3 +151,4 @@ scrapenames <- function(url = NULL, file = NULL, text = NULL, engine = NULL,
   meta <- datout[!names(datout) %in% c("names")]
   list(meta = meta, data = nmslwr(datout$names))
 }
+
diff --git a/log.Rmd b/log.Rmd
@@ -10,6 +10,19 @@ clipr::write_clip(paste0('## ', format(Sys.time(), "%F (%A %B %e)\n\n")))
 # Date and time:
 clipr::write_clip(paste0('## ', format(Sys.time(), "%F %X %Z (%A %B %e)\n\n")))
 ```
+## 2024-10-16 (Wednesday October 16)
+
+Working on updating `scrapenames`.
+The options have been updated, but it has not been tested.
+
+## 2024-10-04 (Friday October  4)
+
+It seems some of the functions that use the Global Names APIs no longer work. 
+There seem to be multiple APIs (and corresponding command line tools) with multiple endpoints, so its hard to tell which is the new version for presumably depreciated APIs.
+I will look into each of the API/tools.
+
+* GNFinder: searches for latin names in text. Seems intended to provide metadata about species mentioned in old publications. Results can be passed to GNVerifier.
+* GNVerifier: Used to provide the currently accepted name for a species. 
 
 ## 2021-10-13 (Wednesday October 13)