From a9eb7ae203d7687982b35b7925c5571be2cb339d Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Wed, 20 Nov 2024 09:20:22 -0700 Subject: [PATCH 01/16] add functions for aggregating data on published data packages: get_reference_list, get_ref_info, summarize_packages --- R/meta_analyses.R | 244 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 244 insertions(+) create mode 100644 R/meta_analyses.R diff --git a/R/meta_analyses.R b/R/meta_analyses.R new file mode 100644 index 0000000..4e4089b --- /dev/null +++ b/R/meta_analyses.R @@ -0,0 +1,244 @@ +#' Get a list of reference codes from DataStore +#' +#' `get_reference_list` will return a list of the DataStore reference codes associated with a given reference type. Where "All" might be a bit generous: I would not expect more than the number given by "no_of_entries" as that is technically the number of entries per page and the function defaults to returning just one page (not entirely sure what a "page" is in this context). +#' +#' +#' @param reference_type String. The reference type to to query data store for. Defaults to data package ("dataPackage"). +#' @param no_of_entries Integer. The number of entries to return per page (where only one "page" of results is returned by default). Defaults to 500. +#' @param secure Logical. Defaults to FALSE for external users. Setting secure = TRUE will, with the proper credentials, return DataStore references with visibility set to both Public and Restricted. +#' +#' @return A List of reference IDs +#' @export +#' +#' @examples +#' \dontrun{ +#' get_reference_list() +#' } +get_references_list <- function (reference_type = "dataPackage", + no_of_entries = 500, + secure = FALSE) { + server <- NULL + if (secure == TRUE) { + server <- "https://irmaservices.nps.gov/datastore-secure/v7/rest/" + } + if (secure == FALSE) { + server <- "https://irmaservices.nps.gov/datastore/v7/rest/" + } + + url <- paste0(server, + "ReferenceTypeSearch/", + reference_type, + "?top=", + no_of_entries, + "&page=1") + ref_list <- httr::content(httr::GET(url, + httr::authenticate(":", ":", "ntlm"))) + DS_reference_list <- NULL + + for (i in 1:length(seq_along(ref_list[[1]]))) { + DS_reference_list <- append(DS_reference_list, + ref_list[[1]][[i]][["referenceId"]]) + } + return(DS_reference_list) +} + +#' Return Basic information about a list of DataStore References +#' +#' The function will return a data frame containing information about a given number of references within a reference type. The data returned includes the reference ID (referenceId), the date the references was activated on DataStore (dateOfIssue), the references visibility (visibility), the number of files associated with the reference (fileCount), the access level of the files (fileAccess), the reference title (title), the abbreviated citation (citation), the URL for the DataStore reference (referenceUrl), the group-type for the reference (referenceGroupType), the type of reference (typeName), whether the reference has a DOI associated with it (isDOI), whether their is a newer version of the reference (newVersion) and what the most recent version of the reference is (mostRecentReerence). +#' +#' @inheritParams get_reference_list +#'' +#' @return a data frame +#' @export +#' +#' @examples +#' \dontrun{ +#' get_ref_info() +#' } +#' +get_ref_info <- function (reference_type = "dataPackage", + no_of_entries = 500, + secure = FALSE) { + server <- NULL + if (secure == TRUE) { + server <- "https://irmaservices.nps.gov/datastore-secure/v7/rest/" + } + if (secure == FALSE) { + server <- "https://irmaservices.nps.gov/datastore/v7/rest/" + } + + url <- paste0(server, + "ReferenceTypeSearch/", + reference_type, + "?top=", + no_of_entries, + "&page=1") + ref_list <- httr::content(httr::GET(url, + httr::authenticate(":", ":", "ntlm"))) + DS_reference_list <- data.frame(referenceId =integer(), + referenceType = character(), + dateOfIssue = as.Date(character()), + visibility = factor(), + fileCount = integer(), + fileAccess = character(), + title = character(), + citation = character(), + referenceUrl = character(), + referenceGroupType = character(), + typeName = character(), + isDOI = logical(), + newVersion = character(), + mostRecentVersion = character() + ) + + for (i in 1:length(seq_along(ref_list[[1]]))) { + + if (is.null(ref_list[[1]][[i]][["newVersion"]])) { + newVersion <- NA + } else { + newVersion <- ref_list[[1]][[i]][["newVersion"]] + } + + if (is.null(ref_list[[1]][[i]][["mostRecentVersion"]])) { + mostRecentVersion <- NA + } else { + mostRecentVersion <- ref_list[[1]][[i]][["mostRecentVersion"]] + } + + ref <- c(ref_list[[1]][[i]][["referenceId"]], + ref_list[[1]][[i]][["referenceType"]], + ref_list[[1]][[i]][["dateOfIssue"]], + ref_list[[1]][[i]][["visibility"]], + ref_list[[1]][[i]][["fileCount"]], + ref_list[[1]][[i]][["fileAccess"]], + ref_list[[1]][[i]][["title"]], + ref_list[[1]][[i]][["citation"]], + ref_list[[1]][[i]][["referenceUrl"]], + ref_list[[1]][[i]][["referenceGroupType"]], + ref_list[[1]][[i]][["typeName"]], + ref_list[[1]][[i]][["isDOI"]], + newVersion, + mostRecentVersion + ) + + ref <- t(ref) + colnames(ref) <- colnames(DS_reference_list) + + DS_reference_list <- rbind(DS_reference_list, ref) + + } + return(DS_reference_list) +} + +#' Collect summary statistics on data packages +#' +#' Given a list of data package references from DataStore the function will download the indicated data packages (using creating the folders /data/reference for each data package; see `get_data_packages` for details), load them into R, and then collect some summary statistics on the data packages. +#' +#' If a data package fails to download (or load) into R, the function will return NAs instead of summary data about the data package as well as a message about the package status ("Loads", "Error") in the dataframe that the function returns. The function will ignore files that fall outside the data package specifications (one or more .csv files and a single .xml file ending in *_metadata.xml). +#' +#' When `check_metadata` is set to the default `FALSE`, the function will attempt to and load any .csv, regardless of the contents. Data packages with restricted access can produce false positives if you do not have the appropriate permissions to download the data as the function will still download the files, but they will be populated with unhelpful hmtl rather than the intended data. Functions that fail to load into R likely violate the data package specifications in some fundamental way (e.g. .CSV file instead of .csv or no .csv files at all). +#' +#' When `check_metadata` is set to `TRUE`, additional checks and tests are run on the data package and load errors may occur for all of the above reasons and also if there are multiple .xml files, if the metadata file name does not end in "*_metadata.xml", if there is no metadata file, or if the metadata file is EML schema-invalid. +#' +#' If you have access to restricted DataStore references (e.g. in an NPS office or logged in to an NPS VPN), you can set secure = TRUE. This will give you access to restricted (internal to NPS) references but if a reference is restricted to a named list of individuals you must be on that named list to access the reference. +#' +#' @param ref_list list or string of data package reference IDs from DataStore (potentially generated via `get_references_list`. +#' @param secure logical. Defaults to TRUE to access secure DataStore server and restricted data packages. Set to FALSE to to access only public references. +#' @param check_metadata Logical. Defaults to FALSE. In this case, metadata will not be checked or loaded. Any load errors will occur due to problems with .csv files (for instance if they don't exist). To test whether the metadata meets minimal requirements (is schema-valid), set check_metadata = TRUE. +#' +#' @return data frame +#' @export +#' +#' @examples +summarize_packages <- function(ref_list, + secure = TRUE, + check_metadata = FALSE) { + #setup a dataframe to return data to: + df <- data.frame(pkgid = character(), + status = character(), + fileNumber = integer(), + colNumber = integer(), + cellNumber = integer(), + fileSize = integer()) + + #get data from each data package + for (i in 1:length(seq_along(ref_list))) { + # places to add data to for each package + file_number <- 0 + col_number <- 0 + cell_number <- 0 + file_size <- 0 + + #This is where the data package will be downloaded to: + destination_dir <- paste("data/", ref_list[i], sep = "") + + #only download if the file/directory does not already exist + #caution: partially downloaded data packages WILL cause issues here! + if (!file.exists(destination_dir)) { + pkg_download <- tryCatch( + NPSutils::get_data_package(ref_list[i], + secure = secure, + force = TRUE), + error = function(e) e) + + if(inherits(pkg_download, "error")) { + dat <- data.frame(ref_list[i], "Error", NA, NA, NA, NA) + colnames(dat) <- colnames(df) + df <- rbind(df, dat) + next + } + } + + #tryCatch to load the package + pkg <- tryCatch( + if (check_metadata == FALSE) { + NPSutils::load_data_package(ref_list[i]) + } else { + NPSutils::load_data_package(ref_list[i], assign_attributes = TRUE) + + }, + error = function(e) e) + #if loading fails, put in a bunch of NAs instead of data: + if (inherits(pkg, "error")) { + dat <- data.frame(ref_list[i], "Error", NA, NA, NA, NA) + colnames(dat) <- colnames(df) + df <- rbind(df, dat) + } else { + #if loading is successful, get some basic info about the data package: + #number of data files: + file_number <- length(seq_along(pkg)) + + #number of columns of data + for (j in 1:length(seq_along(pkg))) { + col_number <- col_number + ncol(pkg[[j]]) + } + + #number of cells of data: + for(j in 1:length(seq_along(pkg))) { + cell_number <- + cell_number + ncol(pkg[[j]])*nrow(pkg[[j]]) + } + #total data (not metadata) file size of the data package + for (j in 1:file_number) { + file_size <- file_size + file.size( + list.files(here::here("data", + ref_list[i]), + full.names = TRUE)[j]) + } + #put all the info into a dataframe + dat <- data.frame(ref_list[i], + "Loads", + file_number, + col_number, + cell_number, + file_size) + colnames(dat) <- colnames(df) + #append the package-specific info to the overall dataframe: + df <- rbind(df, dat) + } + #remove the package specific data frame so that it can be re-written + rm(dat) + } + #return the dataframe for the entire set of data packages: + return(df) +} \ No newline at end of file From eaf2b9e859451c5b0e1d4cbcca1f930d19e8b8b8 Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Wed, 20 Nov 2024 09:20:48 -0700 Subject: [PATCH 02/16] add newline at end of file --- R/load_data_packages.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/load_data_packages.R b/R/load_data_packages.R index 308150c..803267f 100644 --- a/R/load_data_packages.R +++ b/R/load_data_packages.R @@ -197,4 +197,4 @@ extract_tbl <- function(x) { if (!is.list(x)) return(NULL) unlist(lapply(x, extract_tbl), FALSE) -} \ No newline at end of file +} From fca3df0f33d957a4f02639eab2c7afa358a4a5fb Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Wed, 20 Nov 2024 09:21:51 -0700 Subject: [PATCH 03/16] get_reference_list renamed to get_ref_list --- R/meta_analyses.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/meta_analyses.R b/R/meta_analyses.R index 4e4089b..58ec5c1 100644 --- a/R/meta_analyses.R +++ b/R/meta_analyses.R @@ -1,6 +1,6 @@ #' Get a list of reference codes from DataStore #' -#' `get_reference_list` will return a list of the DataStore reference codes associated with a given reference type. Where "All" might be a bit generous: I would not expect more than the number given by "no_of_entries" as that is technically the number of entries per page and the function defaults to returning just one page (not entirely sure what a "page" is in this context). +#' `get_ref_list` will return a list of the DataStore reference codes associated with a given reference type. Where "All" might be a bit generous: I would not expect more than the number given by "no_of_entries" as that is technically the number of entries per page and the function defaults to returning just one page (not entirely sure what a "page" is in this context). #' #' #' @param reference_type String. The reference type to to query data store for. Defaults to data package ("dataPackage"). @@ -46,7 +46,7 @@ get_references_list <- function (reference_type = "dataPackage", #' #' The function will return a data frame containing information about a given number of references within a reference type. The data returned includes the reference ID (referenceId), the date the references was activated on DataStore (dateOfIssue), the references visibility (visibility), the number of files associated with the reference (fileCount), the access level of the files (fileAccess), the reference title (title), the abbreviated citation (citation), the URL for the DataStore reference (referenceUrl), the group-type for the reference (referenceGroupType), the type of reference (typeName), whether the reference has a DOI associated with it (isDOI), whether their is a newer version of the reference (newVersion) and what the most recent version of the reference is (mostRecentReerence). #' -#' @inheritParams get_reference_list +#' @inheritParams get_ref_list #'' #' @return a data frame #' @export From 6decd6cc07d1768a218fd8796226d5d037d4d48b Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Wed, 20 Nov 2024 09:22:18 -0700 Subject: [PATCH 04/16] updat example for load_data_package_deprecated --- R/load_data_package.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/load_data_package.R b/R/load_data_package.R index 5ae4073..6affde1 100644 --- a/R/load_data_package.R +++ b/R/load_data_package.R @@ -11,7 +11,7 @@ #' #' @examples #' \dontrun{ -#' load_data_package(2272461) +#' load_data_package_deprecated(2272461) #' } load_data_package_deprecated <- function(reference_id) { data_package_directory <- paste("data/", reference_id, sep = "") From 71c213ffeefc0de74a4497630a88195c01ec2c97 Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Wed, 20 Nov 2024 09:31:57 -0700 Subject: [PATCH 05/16] fix typos for roxygen documentation --- R/meta_analyses.R | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/R/meta_analyses.R b/R/meta_analyses.R index 58ec5c1..379ce0b 100644 --- a/R/meta_analyses.R +++ b/R/meta_analyses.R @@ -12,9 +12,9 @@ #' #' @examples #' \dontrun{ -#' get_reference_list() +#' get_ref_list() #' } -get_references_list <- function (reference_type = "dataPackage", +get_ref_list <- function (reference_type = "dataPackage", no_of_entries = 500, secure = FALSE) { server <- NULL @@ -47,7 +47,7 @@ get_references_list <- function (reference_type = "dataPackage", #' The function will return a data frame containing information about a given number of references within a reference type. The data returned includes the reference ID (referenceId), the date the references was activated on DataStore (dateOfIssue), the references visibility (visibility), the number of files associated with the reference (fileCount), the access level of the files (fileAccess), the reference title (title), the abbreviated citation (citation), the URL for the DataStore reference (referenceUrl), the group-type for the reference (referenceGroupType), the type of reference (typeName), whether the reference has a DOI associated with it (isDOI), whether their is a newer version of the reference (newVersion) and what the most recent version of the reference is (mostRecentReerence). #' #' @inheritParams get_ref_list -#'' +#' #' @return a data frame #' @export #' @@ -150,6 +150,10 @@ get_ref_info <- function (reference_type = "dataPackage", #' @export #' #' @examples +#' #' \dontrun{ +#' x <- get_ref_list() +#' get_ref_info(x[[1]]) +#' } summarize_packages <- function(ref_list, secure = TRUE, check_metadata = FALSE) { From ece57338fd8d9051b84fb47104d6f26a08a58fcc Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Thu, 19 Dec 2024 15:08:14 -0700 Subject: [PATCH 06/16] add updated info about load_pkg_metadata. Added info about get_ref_list, get_ref_info, and summarize_packages --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index 84ba16e..55b1f7e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # NPSutils 0.3.3 (under development) +## 2024-12-19 + * updated `load_pkg_metadata` to be simpler and essentially call `DPchecker::load_metadata` but with a preset default directory structure that works well with the default settings for `get_data_package`. + * Add meta-analysis functions for finding and producing summary statistics multiple data packages including `get_ref_list`, `get_ref_info()`, and `summarize_packages`. ## 2024-10-24 * fix how `get_data_package` aliases `get_data_packages`, specifically now allows users to adjust parameters to non-default settings. ## 2024-10-21 From f42248fe7d4bdf06da3bed1248e6b2177873dea6 Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Thu, 19 Dec 2024 15:08:41 -0700 Subject: [PATCH 07/16] reconstruct function as a wrapper for DPchecker::load_metadata; update documentation to reflect the changes. --- R/load_pgk_metadata.R | 118 ++++-------------------------------------- 1 file changed, 10 insertions(+), 108 deletions(-) diff --git a/R/load_pgk_metadata.R b/R/load_pgk_metadata.R index 0ad5c0e..531b1a4 100644 --- a/R/load_pgk_metadata.R +++ b/R/load_pgk_metadata.R @@ -1,12 +1,9 @@ -#' Read contents of data package file and construct a data frame based on the -#' metadata file summarizing the fields and their types/definitions. +#' Loads EML-formatted metadata into R for inspection and/or editing +#' +#' @description `load_pkg_metadata()` is essentially a wrapper around `DPchecker::load_metadata` with the directory structure pre-set to work well the default location that `get_data_package` stores downloaded data packages. If you did not use the default settings for `get_data_package` (or downloaded a data package manually) you may find it easier to adjust the directory structure pointing to your data package and load the metadata using `DPchecker::load_metadata()`. Much like `load_metadata`, `load_pkg_metadata` requires that there be a single .xml file in the data package directory, that the metadata file name end in *_metadata.xml, and that the file contain schema-valid EML metadata. #' -#' @description `load_pkg_metadata()` reads the metadata file from a previously -#' downloaded package and loads a list of fields and their attributes into a -#' dataframe. -#' -#' @param holding_id is a 6-7 digit number corresponding to the holding ID of the data package zip file. -#' @param directory String. Path to the data package +#' @param holding_id is a 6-7 digit number corresponding to the holding ID of the data package zip file. Your data should be in a directory that that has the holding ID as its name. +#' @param directory String. Path to the data package directory, defaults to "data". #' #' @return one data frame to the global environment. #' @@ -16,105 +13,10 @@ #' \dontrun{ #' load_pgk_metadata(2266200) #' } -load_pkg_metadata <- function(holding_id, directory = here::here("data")) { - data_package_directory <- paste(directory, "/", holding_id, sep = "") - - metadata_file <- list.files( - path = data_package_directory, - pattern = "metadata.xml" - ) - - # Look for a metadatafile and let the user know about the results of the search. - if (length(metadata_file) == 0) { - cli::cli_abort(c( - "No metadata file found in: {.path {data_package_directory}}.", - "i" = "The filename must end in _metadata.xml")) - return(invisible()) - } - if (length(metadata_file) > 1) { - cli::cli_abort(c( - "Multiple metadata files found.", - "i" = "{.path {data_package_directory}} can contain only one - {.file *_metadata.xml}.")) - return(invisible()) - } +load_pkg_metadata <- function(holding_id, directory = "data") { - meta_location <- paste0(data_package_directory, "/", metadata_file) - if (!file.exists(meta_location)) { - cli::cli_abort(c( - "The data package for: {.var {holding_id}} was not found.", - "i" = "Make sure {.path {data_package_directory}} is the correct location", - "i" = "Make sure you downloaded the correct data package using {.fn get_data_package}." - )) - return(invisible()) - } - - #load metadata - eml_object <- EML::read_eml(meta_location, from = "xml") - #attributeList <- EML::get_attributes(eml_object) - attribute_list <- eml_object$dataset$dataTable$attributeList - attributes <- attribute_list$attributes - factors <- attribute_list$factors - - # Figure out column classes based on attribute table (character, numeric, integer, logical, or complex) - attributes$columnclass <- "character" - if (!"numberType" %in% colnames(attributes)) { - attributes$numberType <- as.character(NA) - } - if (!"formatString" %in% colnames(attributes)) { - attributes$formatString <- as.character(NA) - } - attributes$columnclass <- ifelse(attributes$storageType == "float" & attributes$numberType == "natural", "integer", attributes$columnclass) - attributes$columnclass <- ifelse(attributes$storageType == "float" & attributes$numberType == "whole", "integer", attributes$columnclass) - attributes$columnclass <- ifelse(attributes$storageType == "float" & attributes$numberType == "integer", "integer", attributes$columnclass) - attributes$columnclass <- ifelse(attributes$storageType == "float" & attributes$numberType == "real", "numeric", attributes$columnclass) - attributes$columnclass <- ifelse(attributes$storageType == "date" & attributes$formatString == "YYYY-MM-DD", "Date", attributes$columnclass) - - # return the field table to the workspace. - return(attributes) - -if (metaformat == "fgdc") { - # xmlFilename <- metalocation - workingXMLfile <- EML::read_eml(metalocation, from = "xml") - - # Build attributes table from the xml file - attributes <- data.frame( - id = numeric(), - attribute = character(), - attributeDefinition = character(), - attributeType = character(), - attributeFactors = numeric(), - stringsAsFactors = FALSE - ) - for (i in 1:length(workingXMLfile$ea$detailed$attr)) { - attributes <- rbind( - attributes, - cbind( - id = i, - attribute = workingXMLfile$ea$detailed$attr[[i]]$attrlabl, - attributeDefinition = workingXMLfile$ea$detailed$attr[[i]]$attrdef, - attributeType = workingXMLfile$ea$detailed$attr[[i]]$attrtype, - attributeFactors = length(workingXMLfile$ea$detailed$attr[[i]]$attrdomv) - ) - ) - } - - attributes$id <- as.integer(as.character(attributes$id)) - attributes$attribute <- as.character(attributes$attribute) - attributes$attributeDefinition <- as.character(attributes$attributeDefinition) - # attributes$attributeType<-as.character(attributes$attributeType) - attributes$attributeFactors <- as.integer(as.character(attributes$attributeFactors)) - - attributes$columnclass <- "character" - # attributes$columnclass<-ifelse(attributes$attributeType=="OID","integer",attributes$columnclass) - # attributes$columnclass<-ifelse(attributes$attributeType=="Date","Date",attributes$columnclass) - # attributes$columnclass<-ifelse(attributes$attributeType=="Double","numeric",attributes$columnclass) - - cat("Found ", crayon::blue$bold(nrow(attributes)), " fields.", sep = "") + meta <- DPchecker::load_metadata(directory = here::here("data", holding_id)) + + return(invisible(meta)) +} - # return the field table to the workspace. - return(attributes) - } else { - print("data/metadata format combination not supported") - } -} From cf7b13d3ff5db5a2ec64f8a3b69069b32d766347 Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Thu, 19 Dec 2024 15:09:07 -0700 Subject: [PATCH 08/16] Fix examples in summarize_packages --- R/meta_analyses.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/meta_analyses.R b/R/meta_analyses.R index 379ce0b..d1bbf5a 100644 --- a/R/meta_analyses.R +++ b/R/meta_analyses.R @@ -150,7 +150,7 @@ get_ref_info <- function (reference_type = "dataPackage", #' @export #' #' @examples -#' #' \dontrun{ +#' \dontrun{ #' x <- get_ref_list() #' get_ref_info(x[[1]]) #' } From a788a36be0c19253bd1e74e06943a01fabb09be6 Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Thu, 19 Dec 2024 15:09:47 -0700 Subject: [PATCH 09/16] auto update via devtools::document and pkgdown. --- NAMESPACE | 2 ++ docs/news/index.html | 6 ++-- docs/pkgdown.yml | 2 +- docs/reference/get_ref_info.html | 40 ++++++++++++++++++++++++--- docs/reference/index.html | 10 ++++++- docs/reference/load_pkg_metadata.html | 16 ++++------- docs/sitemap.xml | 2 ++ man/get_ref_info.Rd | 29 +++++++++++++++++-- man/get_ref_list.Rd | 30 ++++++++++++++++++++ man/load_data_package_deprecated.Rd | 2 +- man/load_pkg_metadata.Rd | 13 ++++----- man/summarize_packages.Rd | 36 ++++++++++++++++++++++++ 12 files changed, 159 insertions(+), 29 deletions(-) create mode 100644 man/get_ref_list.Rd create mode 100644 man/summarize_packages.Rd diff --git a/NAMESPACE b/NAMESPACE index 683736c..b3d312a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,7 @@ export(get_park_taxon_citations) export(get_park_taxon_refs) export(get_park_taxon_url) export(get_ref_info) +export(get_ref_list) export(get_unit_code) export(get_unit_code_info) export(get_unit_info) @@ -22,6 +23,7 @@ export(load_domains) export(load_pkg_metadata) export(map_wkt) export(rm_local_packages) +export(summarize_packages) export(validate_data_package) importFrom(lifecycle,deprecated) importFrom(magrittr,"%>%") diff --git a/docs/news/index.html b/docs/news/index.html index e149388..993793f 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -53,8 +53,10 @@

Changelog

-

2024-10-24

-
  • fix how get_data_package aliases get_data_packages, specifically now allows users to adjust parameters to non-default settings. ## 2024-10-21
  • +

    2024-12-19

    +
    • updated load_pkg_metadata to be simpler and essentially call DPchecker::load_metadata but with a preset default directory structure that works well with the default settings for get_data_package.
    • +
    • Add meta-analysis functions for finding and producing summary statistics multiple data packages including get_ref_list, get_ref_info(), and summarize_packages. ## 2024-10-24
    • +
    • fix how get_data_package aliases get_data_packages, specifically now allows users to adjust parameters to non-default settings. ## 2024-10-21
    • Bug fixes to load_data_package()
    • Bug fixes to .get_authors() diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index e9d715c..fe9867d 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -3,4 +3,4 @@ pkgdown: 2.1.0 pkgdown_sha: ~ articles: NPSutils: NPSutils.html -last_built: 2024-10-24T20:56Z +last_built: 2024-12-19T22:02Z diff --git a/docs/reference/get_ref_info.html b/docs/reference/get_ref_info.html index 4fcdb79..c2d107b 100644 --- a/docs/reference/get_ref_info.html +++ b/docs/reference/get_ref_info.html @@ -1,6 +1,7 @@ Get citation for Data Store holding info by HoldingID — get_ref_info • NPSutils @@ -48,24 +49,47 @@

      get_ref_info returns a character string or a vector with information from one of the metadata fields in a Data Store reference's associated xml file.

      +

      The function will return a data frame containing information about a given number of references within a reference type. The data returned includes the reference ID (referenceId), the date the references was activated on DataStore (dateOfIssue), the references visibility (visibility), the number of files associated with the reference (fileCount), the access level of the files (fileAccess), the reference title (title), the abbreviated citation (citation), the URL for the DataStore reference (referenceUrl), the group-type for the reference (referenceGroupType), the type of reference (typeName), whether the reference has a DOI associated with it (isDOI), whether their is a newer version of the reference (newVersion) and what the most recent version of the reference is (mostRecentReerence).

      -
      get_ref_info(holding_id, field)
      +
      get_ref_info(
      +  reference_type = "dataPackage",
      +  no_of_entries = 500,
      +  secure = FALSE
      +)
      +
      +get_ref_info(
      +  reference_type = "dataPackage",
      +  no_of_entries = 500,
      +  secure = FALSE
      +)

      Arguments

      -
      holding_id
      +
      reference_type
      +

      String. The reference type to to query data store for. Defaults to data package ("dataPackage").

      + + +
      no_of_entries
      +

      Integer. The number of entries to return per page (where only one "page" of results is returned by default). Defaults to 500.

      + + +
      secure
      +

      Logical. Defaults to FALSE for external users. Setting secure = TRUE will, with the proper credentials, return DataStore references with visibility set to both Public and Restricted.

      + + +
      holding_id

      The six-seven digit reference / holding ID number unique to the data store record.

      @@ -75,12 +99,20 @@

      Arguments

      all keywords as character values.

      +
      +

      Value

      +

      a data frame

      +

      Examples

      if (FALSE) { # \dontrun{
       get_ref_info(2266196, "Title")
       } # }
      +if (FALSE) { # \dontrun{
      +get_ref_info()
      +} # }
      +
       
      diff --git a/docs/reference/index.html b/docs/reference/index.html index a7e1335..814014e 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -93,6 +93,10 @@

      All functions get_ref_info()

      Get citation for Data Store holding info by HoldingID

      + +

      get_ref_list()

      + +

      Get a list of reference codes from DataStore

      get_unit_code()

      @@ -124,7 +128,7 @@

      All functions

      load_pkg_metadata()

      -

      Read contents of data package file and construct a data frame based on the metadata file summarizing the fields and their types/definitions.

      +

      Loads EML-formatted metadata into R for inspection and/or editing

      map_wkt()

      @@ -133,6 +137,10 @@

      All functions rm_local_packages()

      Delete data packages from your local machine

      + +

      summarize_packages()

      + +

      Collect summary statistics on data packages

      validate_data_package()

      diff --git a/docs/reference/load_pkg_metadata.html b/docs/reference/load_pkg_metadata.html index d199c11..831520f 100644 --- a/docs/reference/load_pkg_metadata.html +++ b/docs/reference/load_pkg_metadata.html @@ -1,7 +1,5 @@ -Read contents of data package file and construct a data frame based on the metadata file summarizing the fields and their types/definitions. — load_pkg_metadata • NPSutilsLoads EML-formatted metadata into R for inspection and/or editing — load_pkg_metadata • NPSutils @@ -48,19 +46,17 @@
      -

      `load_pkg_metadata()` reads the metadata file from a previously -downloaded package and loads a list of fields and their attributes into a -dataframe.

      +

      `load_pkg_metadata()` is essentially a wrapper around `DPchecker::load_metadata` with the directory structure pre-set to work well the default location that `get_data_package` stores downloaded data packages. If you did not use the default settings for `get_data_package` (or downloaded a data package manually) you may find it easier to adjust the directory structure pointing to your data package and load the metadata using `DPchecker::load_metadata()`. Much like `load_metadata`, `load_pkg_metadata` requires that there be a single .xml file in the data package directory, that the metadata file name end in *_metadata.xml, and that the file contain schema-valid EML metadata.

      -
      load_pkg_metadata(holding_id, directory = here::here("data"))
      +
      load_pkg_metadata(holding_id, directory = "data")
      @@ -68,11 +64,11 @@

      Arguments

      holding_id
      -

      is a 6-7 digit number corresponding to the holding ID of the data package zip file.

      +

      is a 6-7 digit number corresponding to the holding ID of the data package zip file. Your data should be in a directory that that has the holding ID as its name.

      directory
      -

      String. Path to the data package

      +

      String. Path to the data package directory, defaults to "data".

      diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 80cfae3..4e694c4 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -17,6 +17,7 @@ /reference/get_park_taxon_refs.html /reference/get_park_taxon_url.html /reference/get_ref_info.html +/reference/get_ref_list.html /reference/get_unit_code.html /reference/get_unit_code_info.html /reference/get_unit_info.html @@ -29,6 +30,7 @@ /reference/map_wkt.html /reference/NPSutils-package.html /reference/rm_local_packages.html +/reference/summarize_packages.html /reference/validate_data_package.html diff --git a/man/get_ref_info.Rd b/man/get_ref_info.Rd index 9fe2e8a..cebe0f7 100644 --- a/man/get_ref_info.Rd +++ b/man/get_ref_info.Rd @@ -1,24 +1,49 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/getReferenceInfo.R +% Please edit documentation in R/getReferenceInfo.R, R/meta_analyses.R \name{get_ref_info} \alias{get_ref_info} \title{Get citation for Data Store holding info by HoldingID} \usage{ -get_ref_info(holding_id, field) +get_ref_info( + reference_type = "dataPackage", + no_of_entries = 500, + secure = FALSE +) + +get_ref_info( + reference_type = "dataPackage", + no_of_entries = 500, + secure = FALSE +) } \arguments{ +\item{reference_type}{String. The reference type to to query data store for. Defaults to data package ("dataPackage").} + +\item{no_of_entries}{Integer. The number of entries to return per page (where only one "page" of results is returned by default). Defaults to 500.} + +\item{secure}{Logical. Defaults to FALSE for external users. Setting secure = TRUE will, with the proper credentials, return DataStore references with visibility set to both Public and Restricted.} + \item{holding_id}{The six-seven digit reference / holding ID number unique to the data store record.} \item{field}{is one of the following: "Title" returns the title of the data store reference as a string value; "Abstract" returns the abstract as a string value; "Citation" returns the citation as a string value, and "Keywords" returns a vector containing all keywords as character values.} } +\value{ +a data frame +} \description{ \code{get_ref_info} returns a character string or a vector with information from one of the metadata fields in a Data Store reference's associated xml file. + +The function will return a data frame containing information about a given number of references within a reference type. The data returned includes the reference ID (referenceId), the date the references was activated on DataStore (dateOfIssue), the references visibility (visibility), the number of files associated with the reference (fileCount), the access level of the files (fileAccess), the reference title (title), the abbreviated citation (citation), the URL for the DataStore reference (referenceUrl), the group-type for the reference (referenceGroupType), the type of reference (typeName), whether the reference has a DOI associated with it (isDOI), whether their is a newer version of the reference (newVersion) and what the most recent version of the reference is (mostRecentReerence). } \examples{ \dontrun{ get_ref_info(2266196, "Title") } +\dontrun{ +get_ref_info() +} + } diff --git a/man/get_ref_list.Rd b/man/get_ref_list.Rd new file mode 100644 index 0000000..afc6288 --- /dev/null +++ b/man/get_ref_list.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meta_analyses.R +\name{get_ref_list} +\alias{get_ref_list} +\title{Get a list of reference codes from DataStore} +\usage{ +get_ref_list( + reference_type = "dataPackage", + no_of_entries = 500, + secure = FALSE +) +} +\arguments{ +\item{reference_type}{String. The reference type to to query data store for. Defaults to data package ("dataPackage").} + +\item{no_of_entries}{Integer. The number of entries to return per page (where only one "page" of results is returned by default). Defaults to 500.} + +\item{secure}{Logical. Defaults to FALSE for external users. Setting secure = TRUE will, with the proper credentials, return DataStore references with visibility set to both Public and Restricted.} +} +\value{ +A List of reference IDs +} +\description{ +`get_ref_list` will return a list of the DataStore reference codes associated with a given reference type. Where "All" might be a bit generous: I would not expect more than the number given by "no_of_entries" as that is technically the number of entries per page and the function defaults to returning just one page (not entirely sure what a "page" is in this context). +} +\examples{ +\dontrun{ +get_ref_list() +} +} diff --git a/man/load_data_package_deprecated.Rd b/man/load_data_package_deprecated.Rd index 55fefad..9ba20ec 100644 --- a/man/load_data_package_deprecated.Rd +++ b/man/load_data_package_deprecated.Rd @@ -20,6 +20,6 @@ a list of one or more tibbles contained within the data package to the global en } \examples{ \dontrun{ -load_data_package(2272461) +load_data_package_deprecated(2272461) } } diff --git a/man/load_pkg_metadata.Rd b/man/load_pkg_metadata.Rd index 0d9c4a4..b8da639 100644 --- a/man/load_pkg_metadata.Rd +++ b/man/load_pkg_metadata.Rd @@ -2,23 +2,20 @@ % Please edit documentation in R/load_pgk_metadata.R \name{load_pkg_metadata} \alias{load_pkg_metadata} -\title{Read contents of data package file and construct a data frame based on the -metadata file summarizing the fields and their types/definitions.} +\title{Loads EML-formatted metadata into R for inspection and/or editing} \usage{ -load_pkg_metadata(holding_id, directory = here::here("data")) +load_pkg_metadata(holding_id, directory = "data") } \arguments{ -\item{holding_id}{is a 6-7 digit number corresponding to the holding ID of the data package zip file.} +\item{holding_id}{is a 6-7 digit number corresponding to the holding ID of the data package zip file. Your data should be in a directory that that has the holding ID as its name.} -\item{directory}{String. Path to the data package} +\item{directory}{String. Path to the data package directory, defaults to "data".} } \value{ one data frame to the global environment. } \description{ -`load_pkg_metadata()` reads the metadata file from a previously -downloaded package and loads a list of fields and their attributes into a -dataframe. +`load_pkg_metadata()` is essentially a wrapper around `DPchecker::load_metadata` with the directory structure pre-set to work well the default location that `get_data_package` stores downloaded data packages. If you did not use the default settings for `get_data_package` (or downloaded a data package manually) you may find it easier to adjust the directory structure pointing to your data package and load the metadata using `DPchecker::load_metadata()`. Much like `load_metadata`, `load_pkg_metadata` requires that there be a single .xml file in the data package directory, that the metadata file name end in *_metadata.xml, and that the file contain schema-valid EML metadata. } \examples{ \dontrun{ diff --git a/man/summarize_packages.Rd b/man/summarize_packages.Rd new file mode 100644 index 0000000..3063592 --- /dev/null +++ b/man/summarize_packages.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meta_analyses.R +\name{summarize_packages} +\alias{summarize_packages} +\title{Collect summary statistics on data packages} +\usage{ +summarize_packages(ref_list, secure = TRUE, check_metadata = FALSE) +} +\arguments{ +\item{ref_list}{list or string of data package reference IDs from DataStore (potentially generated via `get_references_list`.} + +\item{secure}{logical. Defaults to TRUE to access secure DataStore server and restricted data packages. Set to FALSE to to access only public references.} + +\item{check_metadata}{Logical. Defaults to FALSE. In this case, metadata will not be checked or loaded. Any load errors will occur due to problems with .csv files (for instance if they don't exist). To test whether the metadata meets minimal requirements (is schema-valid), set check_metadata = TRUE.} +} +\value{ +data frame +} +\description{ +Given a list of data package references from DataStore the function will download the indicated data packages (using creating the folders /data/reference for each data package; see `get_data_packages` for details), load them into R, and then collect some summary statistics on the data packages. +} +\details{ +If a data package fails to download (or load) into R, the function will return NAs instead of summary data about the data package as well as a message about the package status ("Loads", "Error") in the dataframe that the function returns. The function will ignore files that fall outside the data package specifications (one or more .csv files and a single .xml file ending in *_metadata.xml). + +When `check_metadata` is set to the default `FALSE`, the function will attempt to and load any .csv, regardless of the contents. Data packages with restricted access can produce false positives if you do not have the appropriate permissions to download the data as the function will still download the files, but they will be populated with unhelpful hmtl rather than the intended data. Functions that fail to load into R likely violate the data package specifications in some fundamental way (e.g. .CSV file instead of .csv or no .csv files at all). + +When `check_metadata` is set to `TRUE`, additional checks and tests are run on the data package and load errors may occur for all of the above reasons and also if there are multiple .xml files, if the metadata file name does not end in "*_metadata.xml", if there is no metadata file, or if the metadata file is EML schema-invalid. + +If you have access to restricted DataStore references (e.g. in an NPS office or logged in to an NPS VPN), you can set secure = TRUE. This will give you access to restricted (internal to NPS) references but if a reference is restricted to a named list of individuals you must be on that named list to access the reference. +} +\examples{ +\dontrun{ +x <- get_ref_list() +get_ref_info(x[[1]]) +} +} From 41366eb1c5ff921ed64394cafdda6b7102e2352b Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Thu, 19 Dec 2024 20:35:23 -0700 Subject: [PATCH 10/16] update @params for get_ref_info --- R/meta_analyses.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/meta_analyses.R b/R/meta_analyses.R index d1bbf5a..a56f2bc 100644 --- a/R/meta_analyses.R +++ b/R/meta_analyses.R @@ -46,7 +46,9 @@ get_ref_list <- function (reference_type = "dataPackage", #' #' The function will return a data frame containing information about a given number of references within a reference type. The data returned includes the reference ID (referenceId), the date the references was activated on DataStore (dateOfIssue), the references visibility (visibility), the number of files associated with the reference (fileCount), the access level of the files (fileAccess), the reference title (title), the abbreviated citation (citation), the URL for the DataStore reference (referenceUrl), the group-type for the reference (referenceGroupType), the type of reference (typeName), whether the reference has a DOI associated with it (isDOI), whether their is a newer version of the reference (newVersion) and what the most recent version of the reference is (mostRecentReerence). #' -#' @inheritParams get_ref_list +#' @param reference_type String. Defaults to "dataPackage". The reference type to query DataStore for. +#' @param no_of_entries Integer. Defaults to 500. The number of entries to return. +#' @param secure Logical. Defaults to FALSE. Should the function use a secure API or the pubic API? #' #' @return a data frame #' @export From 36b1a18fe13bc84b75158d3991dbf6207bcb5777 Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Thu, 19 Dec 2024 20:35:40 -0700 Subject: [PATCH 11/16] auto update via pkgdown and devtools::document --- docs/pkgdown.yml | 2 +- docs/reference/get_ref_info.html | 6 +++--- man/get_ref_info.Rd | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index fe9867d..dd34cd1 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -3,4 +3,4 @@ pkgdown: 2.1.0 pkgdown_sha: ~ articles: NPSutils: NPSutils.html -last_built: 2024-12-19T22:02Z +last_built: 2024-12-20T03:31Z diff --git a/docs/reference/get_ref_info.html b/docs/reference/get_ref_info.html index c2d107b..715113b 100644 --- a/docs/reference/get_ref_info.html +++ b/docs/reference/get_ref_info.html @@ -78,15 +78,15 @@

      Arguments

      reference_type
      -

      String. The reference type to to query data store for. Defaults to data package ("dataPackage").

      +

      String. Defaults to "dataPackage". The reference type to query DataStore for.

      no_of_entries
      -

      Integer. The number of entries to return per page (where only one "page" of results is returned by default). Defaults to 500.

      +

      Integer. Defaults to 500. The number of entries to return.

      secure
      -

      Logical. Defaults to FALSE for external users. Setting secure = TRUE will, with the proper credentials, return DataStore references with visibility set to both Public and Restricted.

      +

      Logical. Defaults to FALSE. Should the function use a secure API or the pubic API?

      holding_id
      diff --git a/man/get_ref_info.Rd b/man/get_ref_info.Rd index cebe0f7..2109b3c 100644 --- a/man/get_ref_info.Rd +++ b/man/get_ref_info.Rd @@ -17,11 +17,11 @@ get_ref_info( ) } \arguments{ -\item{reference_type}{String. The reference type to to query data store for. Defaults to data package ("dataPackage").} +\item{reference_type}{String. Defaults to "dataPackage". The reference type to query DataStore for.} -\item{no_of_entries}{Integer. The number of entries to return per page (where only one "page" of results is returned by default). Defaults to 500.} +\item{no_of_entries}{Integer. Defaults to 500. The number of entries to return.} -\item{secure}{Logical. Defaults to FALSE for external users. Setting secure = TRUE will, with the proper credentials, return DataStore references with visibility set to both Public and Restricted.} +\item{secure}{Logical. Defaults to FALSE. Should the function use a secure API or the pubic API?} \item{holding_id}{The six-seven digit reference / holding ID number unique to the data store record.} From 39f9c379f541f8c8c6eb88575f3107b0da28f31d Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Thu, 19 Dec 2024 20:39:04 -0700 Subject: [PATCH 12/16] fix typo in documentation --- R/meta_analyses.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/meta_analyses.R b/R/meta_analyses.R index a56f2bc..dc9fd36 100644 --- a/R/meta_analyses.R +++ b/R/meta_analyses.R @@ -44,7 +44,7 @@ get_ref_list <- function (reference_type = "dataPackage", #' Return Basic information about a list of DataStore References #' -#' The function will return a data frame containing information about a given number of references within a reference type. The data returned includes the reference ID (referenceId), the date the references was activated on DataStore (dateOfIssue), the references visibility (visibility), the number of files associated with the reference (fileCount), the access level of the files (fileAccess), the reference title (title), the abbreviated citation (citation), the URL for the DataStore reference (referenceUrl), the group-type for the reference (referenceGroupType), the type of reference (typeName), whether the reference has a DOI associated with it (isDOI), whether their is a newer version of the reference (newVersion) and what the most recent version of the reference is (mostRecentReerence). +#' The function will return a data frame containing information about a given number of references within a reference type. The data returned includes the reference ID (referenceId), the date the references was activated on DataStore (dateOfIssue), the references visibility (visibility), the number of files associated with the reference (fileCount), the access level of the files (fileAccess), the reference title (title), the abbreviated citation (citation), the URL for the DataStore reference (referenceUrl), the group-type for the reference (referenceGroupType), the type of reference (typeName), whether the reference has a DOI associated with it (isDOI), whether their is a newer version of the reference (newVersion) and what the most recent version of the reference is (mostRecentReference). #' #' @param reference_type String. Defaults to "dataPackage". The reference type to query DataStore for. #' @param no_of_entries Integer. Defaults to 500. The number of entries to return. From e82389b80825c2b5d98538df3e2d80bfef98cdf2 Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Thu, 19 Dec 2024 20:39:22 -0700 Subject: [PATCH 13/16] updated via devtools::document and pkgdown::build_site_github_pages --- docs/pkgdown.yml | 2 +- docs/reference/get_ref_info.html | 4 ++-- man/get_ref_info.Rd | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index dd34cd1..2979297 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -3,4 +3,4 @@ pkgdown: 2.1.0 pkgdown_sha: ~ articles: NPSutils: NPSutils.html -last_built: 2024-12-20T03:31Z +last_built: 2024-12-20T03:36Z diff --git a/docs/reference/get_ref_info.html b/docs/reference/get_ref_info.html index 715113b..f3dba63 100644 --- a/docs/reference/get_ref_info.html +++ b/docs/reference/get_ref_info.html @@ -1,7 +1,7 @@ Get citation for Data Store holding info by HoldingID — get_ref_info • NPSutils @@ -56,7 +56,7 @@

      Get citation for Data Store holding info by HoldingID

      get_ref_info returns a character string or a vector with information from one of the metadata fields in a Data Store reference's associated xml file.

      -

      The function will return a data frame containing information about a given number of references within a reference type. The data returned includes the reference ID (referenceId), the date the references was activated on DataStore (dateOfIssue), the references visibility (visibility), the number of files associated with the reference (fileCount), the access level of the files (fileAccess), the reference title (title), the abbreviated citation (citation), the URL for the DataStore reference (referenceUrl), the group-type for the reference (referenceGroupType), the type of reference (typeName), whether the reference has a DOI associated with it (isDOI), whether their is a newer version of the reference (newVersion) and what the most recent version of the reference is (mostRecentReerence).

      +

      The function will return a data frame containing information about a given number of references within a reference type. The data returned includes the reference ID (referenceId), the date the references was activated on DataStore (dateOfIssue), the references visibility (visibility), the number of files associated with the reference (fileCount), the access level of the files (fileAccess), the reference title (title), the abbreviated citation (citation), the URL for the DataStore reference (referenceUrl), the group-type for the reference (referenceGroupType), the type of reference (typeName), whether the reference has a DOI associated with it (isDOI), whether their is a newer version of the reference (newVersion) and what the most recent version of the reference is (mostRecentReference).

      diff --git a/man/get_ref_info.Rd b/man/get_ref_info.Rd index 2109b3c..b19f4aa 100644 --- a/man/get_ref_info.Rd +++ b/man/get_ref_info.Rd @@ -36,7 +36,7 @@ a data frame \code{get_ref_info} returns a character string or a vector with information from one of the metadata fields in a Data Store reference's associated xml file. -The function will return a data frame containing information about a given number of references within a reference type. The data returned includes the reference ID (referenceId), the date the references was activated on DataStore (dateOfIssue), the references visibility (visibility), the number of files associated with the reference (fileCount), the access level of the files (fileAccess), the reference title (title), the abbreviated citation (citation), the URL for the DataStore reference (referenceUrl), the group-type for the reference (referenceGroupType), the type of reference (typeName), whether the reference has a DOI associated with it (isDOI), whether their is a newer version of the reference (newVersion) and what the most recent version of the reference is (mostRecentReerence). +The function will return a data frame containing information about a given number of references within a reference type. The data returned includes the reference ID (referenceId), the date the references was activated on DataStore (dateOfIssue), the references visibility (visibility), the number of files associated with the reference (fileCount), the access level of the files (fileAccess), the reference title (title), the abbreviated citation (citation), the URL for the DataStore reference (referenceUrl), the group-type for the reference (referenceGroupType), the type of reference (typeName), whether the reference has a DOI associated with it (isDOI), whether their is a newer version of the reference (newVersion) and what the most recent version of the reference is (mostRecentReference). } \examples{ \dontrun{ From 7f96e8cf4c0a1cfc512efad7a770229294a1f000 Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Fri, 20 Dec 2024 11:47:53 -0700 Subject: [PATCH 14/16] rename meta analysis get_ref_info to get_refs_info to avoid duplication of function names. --- R/meta_analyses.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/meta_analyses.R b/R/meta_analyses.R index dc9fd36..2cf901e 100644 --- a/R/meta_analyses.R +++ b/R/meta_analyses.R @@ -46,9 +46,9 @@ get_ref_list <- function (reference_type = "dataPackage", #' #' The function will return a data frame containing information about a given number of references within a reference type. The data returned includes the reference ID (referenceId), the date the references was activated on DataStore (dateOfIssue), the references visibility (visibility), the number of files associated with the reference (fileCount), the access level of the files (fileAccess), the reference title (title), the abbreviated citation (citation), the URL for the DataStore reference (referenceUrl), the group-type for the reference (referenceGroupType), the type of reference (typeName), whether the reference has a DOI associated with it (isDOI), whether their is a newer version of the reference (newVersion) and what the most recent version of the reference is (mostRecentReference). #' -#' @param reference_type String. Defaults to "dataPackage". The reference type to query DataStore for. -#' @param no_of_entries Integer. Defaults to 500. The number of entries to return. -#' @param secure Logical. Defaults to FALSE. Should the function use a secure API or the pubic API? +#' @param reference_type String. The reference type to to query data store for. Defaults to data package ("dataPackage"). +#' @param no_of_entries Integer. The number of entries to return per page (where only one "page" of results is returned by default). Defaults to 500. +#' @param secure Logical. Defaults to FALSE for external users. Setting secure = TRUE will, with the proper credentials, return DataStore references with visibility set to both Public and Restricted. #' #' @return a data frame #' @export @@ -58,7 +58,7 @@ get_ref_list <- function (reference_type = "dataPackage", #' get_ref_info() #' } #' -get_ref_info <- function (reference_type = "dataPackage", +get_refs_info <- function (reference_type = "dataPackage", no_of_entries = 500, secure = FALSE) { server <- NULL From ef000395d8206851c8489e87f0ca0bbb18a9f7f0 Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Fri, 20 Dec 2024 11:48:11 -0700 Subject: [PATCH 15/16] update comments about get_ref_info to the new name get_refs_info --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 55b1f7e..4cc1136 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ ## 2024-12-19 * updated `load_pkg_metadata` to be simpler and essentially call `DPchecker::load_metadata` but with a preset default directory structure that works well with the default settings for `get_data_package`. - * Add meta-analysis functions for finding and producing summary statistics multiple data packages including `get_ref_list`, `get_ref_info()`, and `summarize_packages`. + * Add meta-analysis functions for finding and producing summary statistics multiple data packages including `get_ref_list`, `get_refs_info()`, and `summarize_packages`. ## 2024-10-24 * fix how `get_data_package` aliases `get_data_packages`, specifically now allows users to adjust parameters to non-default settings. ## 2024-10-21 From dc23032fb14f82d1ffd4335706273a18653f2aba Mon Sep 17 00:00:00 2001 From: Rob Baker Date: Fri, 20 Dec 2024 11:48:29 -0700 Subject: [PATCH 16/16] devtools::document and pkgdown::build_site_github_pages --- NAMESPACE | 1 + docs/news/index.html | 2 +- docs/pkgdown.yml | 2 +- docs/reference/get_ref_info.html | 40 ++++---------------------------- docs/reference/index.html | 4 ++++ docs/sitemap.xml | 1 + man/get_ref_info.Rd | 29 ++--------------------- man/get_refs_info.Rd | 31 +++++++++++++++++++++++++ 8 files changed, 45 insertions(+), 65 deletions(-) create mode 100644 man/get_refs_info.Rd diff --git a/NAMESPACE b/NAMESPACE index b3d312a..1ab94b9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ export(get_park_taxon_refs) export(get_park_taxon_url) export(get_ref_info) export(get_ref_list) +export(get_refs_info) export(get_unit_code) export(get_unit_code_info) export(get_unit_info) diff --git a/docs/news/index.html b/docs/news/index.html index 993793f..fcef3ea 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -55,7 +55,7 @@

      2024-12-19

      • updated load_pkg_metadata to be simpler and essentially call DPchecker::load_metadata but with a preset default directory structure that works well with the default settings for get_data_package.
      • -
      • Add meta-analysis functions for finding and producing summary statistics multiple data packages including get_ref_list, get_ref_info(), and summarize_packages. ## 2024-10-24
      • +
      • Add meta-analysis functions for finding and producing summary statistics multiple data packages including get_ref_list, get_refs_info(), and summarize_packages. ## 2024-10-24
      • fix how get_data_package aliases get_data_packages, specifically now allows users to adjust parameters to non-default settings. ## 2024-10-21
      • Bug fixes to load_data_package()
      • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 2979297..2d80c3d 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -3,4 +3,4 @@ pkgdown: 2.1.0 pkgdown_sha: ~ articles: NPSutils: NPSutils.html -last_built: 2024-12-20T03:36Z +last_built: 2024-12-20T18:46Z diff --git a/docs/reference/get_ref_info.html b/docs/reference/get_ref_info.html index f3dba63..4fcdb79 100644 --- a/docs/reference/get_ref_info.html +++ b/docs/reference/get_ref_info.html @@ -1,7 +1,6 @@ Get citation for Data Store holding info by HoldingID — get_ref_info • NPSutils @@ -49,47 +48,24 @@

        get_ref_info returns a character string or a vector with information from one of the metadata fields in a Data Store reference's associated xml file.

        -

        The function will return a data frame containing information about a given number of references within a reference type. The data returned includes the reference ID (referenceId), the date the references was activated on DataStore (dateOfIssue), the references visibility (visibility), the number of files associated with the reference (fileCount), the access level of the files (fileAccess), the reference title (title), the abbreviated citation (citation), the URL for the DataStore reference (referenceUrl), the group-type for the reference (referenceGroupType), the type of reference (typeName), whether the reference has a DOI associated with it (isDOI), whether their is a newer version of the reference (newVersion) and what the most recent version of the reference is (mostRecentReference).

        -
        get_ref_info(
        -  reference_type = "dataPackage",
        -  no_of_entries = 500,
        -  secure = FALSE
        -)
        -
        -get_ref_info(
        -  reference_type = "dataPackage",
        -  no_of_entries = 500,
        -  secure = FALSE
        -)
        +
        get_ref_info(holding_id, field)

        Arguments

        -
        reference_type
        -

        String. Defaults to "dataPackage". The reference type to query DataStore for.

        - - -
        no_of_entries
        -

        Integer. Defaults to 500. The number of entries to return.

        - - -
        secure
        -

        Logical. Defaults to FALSE. Should the function use a secure API or the pubic API?

        - - -
        holding_id
        +
        holding_id

        The six-seven digit reference / holding ID number unique to the data store record.

        @@ -99,20 +75,12 @@

        Arguments

        all keywords as character values.

        -
        -

        Value

        -

        a data frame

        -

        Examples

        if (FALSE) { # \dontrun{
         get_ref_info(2266196, "Title")
         } # }
        -if (FALSE) { # \dontrun{
        -get_ref_info()
        -} # }
        -
         
        diff --git a/docs/reference/index.html b/docs/reference/index.html index 814014e..625e151 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -89,6 +89,10 @@

        All functions get_park_taxon_url()

        Get URL for references for a park-species combination

        + +

        get_refs_info()

        + +

        Return Basic information about a list of DataStore References

        get_ref_info()

        diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 4e694c4..40dc0fa 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -16,6 +16,7 @@ /reference/get_park_taxon_citations.html /reference/get_park_taxon_refs.html /reference/get_park_taxon_url.html +/reference/get_refs_info.html /reference/get_ref_info.html /reference/get_ref_list.html /reference/get_unit_code.html diff --git a/man/get_ref_info.Rd b/man/get_ref_info.Rd index b19f4aa..9fe2e8a 100644 --- a/man/get_ref_info.Rd +++ b/man/get_ref_info.Rd @@ -1,49 +1,24 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/getReferenceInfo.R, R/meta_analyses.R +% Please edit documentation in R/getReferenceInfo.R \name{get_ref_info} \alias{get_ref_info} \title{Get citation for Data Store holding info by HoldingID} \usage{ -get_ref_info( - reference_type = "dataPackage", - no_of_entries = 500, - secure = FALSE -) - -get_ref_info( - reference_type = "dataPackage", - no_of_entries = 500, - secure = FALSE -) +get_ref_info(holding_id, field) } \arguments{ -\item{reference_type}{String. Defaults to "dataPackage". The reference type to query DataStore for.} - -\item{no_of_entries}{Integer. Defaults to 500. The number of entries to return.} - -\item{secure}{Logical. Defaults to FALSE. Should the function use a secure API or the pubic API?} - \item{holding_id}{The six-seven digit reference / holding ID number unique to the data store record.} \item{field}{is one of the following: "Title" returns the title of the data store reference as a string value; "Abstract" returns the abstract as a string value; "Citation" returns the citation as a string value, and "Keywords" returns a vector containing all keywords as character values.} } -\value{ -a data frame -} \description{ \code{get_ref_info} returns a character string or a vector with information from one of the metadata fields in a Data Store reference's associated xml file. - -The function will return a data frame containing information about a given number of references within a reference type. The data returned includes the reference ID (referenceId), the date the references was activated on DataStore (dateOfIssue), the references visibility (visibility), the number of files associated with the reference (fileCount), the access level of the files (fileAccess), the reference title (title), the abbreviated citation (citation), the URL for the DataStore reference (referenceUrl), the group-type for the reference (referenceGroupType), the type of reference (typeName), whether the reference has a DOI associated with it (isDOI), whether their is a newer version of the reference (newVersion) and what the most recent version of the reference is (mostRecentReference). } \examples{ \dontrun{ get_ref_info(2266196, "Title") } -\dontrun{ -get_ref_info() -} - } diff --git a/man/get_refs_info.Rd b/man/get_refs_info.Rd new file mode 100644 index 0000000..51f2672 --- /dev/null +++ b/man/get_refs_info.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meta_analyses.R +\name{get_refs_info} +\alias{get_refs_info} +\title{Return Basic information about a list of DataStore References} +\usage{ +get_refs_info( + reference_type = "dataPackage", + no_of_entries = 500, + secure = FALSE +) +} +\arguments{ +\item{reference_type}{String. The reference type to to query data store for. Defaults to data package ("dataPackage").} + +\item{no_of_entries}{Integer. The number of entries to return per page (where only one "page" of results is returned by default). Defaults to 500.} + +\item{secure}{Logical. Defaults to FALSE for external users. Setting secure = TRUE will, with the proper credentials, return DataStore references with visibility set to both Public and Restricted.} +} +\value{ +a data frame +} +\description{ +The function will return a data frame containing information about a given number of references within a reference type. The data returned includes the reference ID (referenceId), the date the references was activated on DataStore (dateOfIssue), the references visibility (visibility), the number of files associated with the reference (fileCount), the access level of the files (fileAccess), the reference title (title), the abbreviated citation (citation), the URL for the DataStore reference (referenceUrl), the group-type for the reference (referenceGroupType), the type of reference (typeName), whether the reference has a DOI associated with it (isDOI), whether their is a newer version of the reference (newVersion) and what the most recent version of the reference is (mostRecentReference). +} +\examples{ +\dontrun{ +get_ref_info() +} + +}