Skip to content

Commit

Permalink
handle big retrieval
Browse files Browse the repository at this point in the history
  • Loading branch information
ake123 committed Sep 18, 2024
1 parent e9ab622 commit 83c6185
Show file tree
Hide file tree
Showing 9 changed files with 185 additions and 50 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
YEAR: 2024
COPYRIGHT HOLDER: Akewak Jeba
COPYRIGHT HOLDER: Akewak Jeba, Leo Lahti
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Generated by roxygen2: do not edit by hand

export(check_api_access)
export(get_all_finna_data)
export(get_finna)
export(get_finna_records)
export(search_finna)
importFrom(curl,curl_download)
Expand Down
10 changes: 8 additions & 2 deletions R/get_record.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
#'
#' This function retrieves multiple Finna records based on a vector of record IDs. You can specify
#' which fields to return, the language, and the pagination options.
#'
#' @name get_finna_records
#' @param ids A vector of record IDs to retrieve.
#' @param field A vector of fields to return. Defaults to NULL, which returns all default fields.
#' @param prettyPrint Logical; whether to pretty-print the response. Defaults to FALSE.
#' @param lng Language for returned translated strings. Defaults to "fi".
#' @param page The page number to retrieve. Defaults to 1.
#' @param limit The number of records to return per page. Defaults to 20.
#' @return A list containing the retrieved records data.
#' @return A tibble containing the retrieved records data with provenance information.
#' @examples
#' records <- get_finna_records("fikka.3405646", field = "title", prettyPrint = TRUE, lng = "en-gb")
#' print(records)
Expand Down Expand Up @@ -113,6 +113,12 @@ get_finna_records <- function(ids, field = NULL, prettyPrint = FALSE, lng = "fi"

# Convert the list to a tibble
tibble_results <- tibble::as_tibble(do.call(rbind, lapply(data, function(x) unlist(x, recursive = FALSE))))
# Add provenance and citation information
tibble_results$provenance <- "Finna API (https://www.finna.fi)"
tibble_results$data_license <- "CC0 for metadata (https://creativecommons.org/publicdomain/zero/1.0/), images and other linked resources may have different licenses."

# Attach the citation as an attribute
attr(tibble_results, "citation") <- "Data retrieved from Finna API (https://www.finna.fi) - metadata licensed under CC0."
return(tibble_results)

} else {
Expand Down
100 changes: 60 additions & 40 deletions R/search_finna.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#' Finna Index Search with Advanced Options
#' Finna Index Search with Pagination and Advanced Options
#'
#' This function performs a search on the Finna index with extended options, allowing for a wide range of search types, filters, facets, and sorting methods.
#' It retrieves all available data by paginating through the API results.
#'
#' @name search_finna
#' @param lookfor A string containing the search terms. Boolean operators (AND, OR, NOT) can be included.
Expand All @@ -20,11 +21,10 @@
#' \item "author,id asc" (Author)
#' \item "title,id asc" (Title)
#' }
#' @param page An integer indicating the page number of results to retrieve. Defaults to 1.
#' @param limit An integer specifying the number of records to return per page. Defaults to 20.
#' @param limit An integer specifying the number of records to return per page. Defaults to 100 (maximum).
#' @param lng A string for the language of returned translated strings. Options are "fi", "en-gb", "sv", "se". Defaults to "fi".
#' @param prettyPrint A logical value indicating whether to pretty-print the JSON response. Useful for debugging. Defaults to FALSE.
#' @return A tibble containing the search results with relevant fields extracted. Returns NULL if the search fails or if the input is invalid.
#' @return A tibble containing all search results with relevant fields extracted and provenance information.
#' @examples
#' search_results <- search_finna("sibelius", sort = "main_date_str desc")
#' print(search_results)
Expand All @@ -36,7 +36,6 @@ search_finna <- function(lookfor,
facets = NULL,
facetFilters = NULL,
sort = "relevance,id asc",
page = 1,
limit = 100,
lng = "fi",
prettyPrint = FALSE) {
Expand All @@ -50,37 +49,53 @@ search_finna <- function(lookfor,
# Define the base URL for the search API
base_url <- "https://api.finna.fi/v1/search"

# Construct the query parameters
query_params <- list(
lookfor = lookfor,
type = type,
`field[]` = fields,
`filter[]` = filters,
`facet[]` = facets,
`facetFilter[]` = facetFilters,
sort = sort,
page = page,
limit = limit,
lng = lng,
prettyPrint = prettyPrint
)
# Initialize variables for pagination
all_data <- list() # Store all pages of data
page <- 1 # Start from the first page

# Execute the GET request and handle potential errors
response <- tryCatch(
httr::GET(base_url, query = query_params),
error = function(e) {
warning("Error: Failed to make the request.")
return(NULL)
repeat {
# Construct the query parameters for each page
query_params <- list(
lookfor = lookfor,
type = type,
`field[]` = fields,
`filter[]` = filters,
`facet[]` = facets,
`facetFilter[]` = facetFilters,
sort = sort,
page = page,
limit = limit,
lng = lng,
prettyPrint = prettyPrint
)

# Execute the GET request and handle potential errors
response <- tryCatch(
httr::GET(base_url, query = query_params),
error = function(e) {
warning("Error: Failed to make the request.")
return(NULL)
}
)

# Check if the response is valid
if (is.null(response) || httr::status_code(response) != 200) {
error_message <- sprintf("Failed to perform the search. Status code: %d - Response: %s",
httr::status_code(response), httr::content(response, "text"))
warning(error_message)
break
}
)

# Process the response based on the status code
if (httr::status_code(response) == 200) {
# Parse the JSON content of the response
search_results <- httr::content(response, "parsed")

# Extract and structure relevant data from the search results
records <- search_results$records
if (length(records) == 0) {
message("No more records found. Stopping pagination.")
break
}

data <- lapply(records, function(record) {
list(
Title = record$title %||% NA,
Expand Down Expand Up @@ -126,19 +141,24 @@ search_finna <- function(lookfor,
)
})

# Convert the extracted data into a tibble for easy analysis
tibble_results <- tibble::as_tibble(do.call(rbind, lapply(data, function(x) unlist(x, recursive = FALSE))))

# Attach the language attribute to the tibble
attr(tibble_results, "language") <- lng
# Append the current page's data to the list of all data
all_data <- c(all_data, data)

return(tibble_results)
# Check if we've reached the last page
if (length(records) < limit) {
message("Retrieved last page of results.")
break
}

} else {
# Handle API errors with detailed messages
error_message <- sprintf("Failed to perform the search. Status code: %d - Response: %s",
httr::status_code(response), httr::content(response, "text"))
warning(error_message)
return(NULL)
# Increment the page number for the next iteration
page <- page + 1
}

# Convert the collected data into a tibble for easy analysis
tibble_results <- tibble::as_tibble(do.call(rbind, lapply(all_data, function(x) unlist(x, recursive = FALSE))))

# Attach the language attribute to the tibble
attr(tibble_results, "language") <- lng
cat("Data retrieved from Finna API (https://www.finna.fi) - metadata licensed under CC0.\n")
return(tibble_results)
}
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# finna

[![rOG-badge](https://ropengov.github.io/rogtemplate/reference/figures/ropengov-badge.svg)](https://ropengov.org/)
[![issues](https://img.shields.io/github/issues/rOpenGov/finna)](https://github.com/rOpenGov/finna/issues)
[![pulls](https://img.shields.io/github/issues-pr/rOpenGov/finna)](https://github.com/rOpenGov/finna/pulls)
[![R-CMD-check](https://github.com/rOpenGov/finna/workflows/rworkflows/badge.svg)](https://github.com/rOpenGov/finna/blob/master/.github/workflows/check-standard..yml)
Expand Down
49 changes: 49 additions & 0 deletions man/get_all_finna_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

59 changes: 59 additions & 0 deletions man/get_finna.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/get_finna_records.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 4 additions & 6 deletions man/search_finna.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 83c6185

Please sign in to comment.