Skip to content

Commit

Permalink
data pagination
Browse files Browse the repository at this point in the history
  • Loading branch information
evanodell committed Jan 23, 2018
1 parent 5a66991 commit bde6387
Show file tree
Hide file tree
Showing 11 changed files with 206 additions and 26 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ Imports:
jsonlite,
tibble,
readr,
dplyr
dplyr,
curl
RoxygenNote: 6.0.1
Suggests:
knitr,
Expand Down
4 changes: 3 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Generated by roxygen2: do not edit by hand

export(nomis_codes)
export(nomis_data_info)
export(nomis_get_data)
export(nomis_search)
import(curl)
import(dplyr)
import(jsonlite)
import(readr)
import(tibble)
importFrom(readr,read_csv)
81 changes: 68 additions & 13 deletions R/data_download.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,32 @@
#' Retrieve nomis datasets
#'
#' Retrieves specific datasets from nomis, based on their ID. To find dataset
#' IDs, use \code{\link{nomis_data_info}}. Datasets are retrived in csv format
#' and parsed with the \code{read_csv} function from the \code{readr} package.
#' IDs, use \code{\link{nomis_data_info}}. Datasets are retrived in csv format
#' and parsed with the \code{read_csv} function from the \code{readr} package.
#'
#' To find the code options for a given dataset, use \code{\link{nomis_codes}}.
#'
#' This can be a very slow process if calling significant amounts of data.
#'
#' @param id The ID of the dataset to retrieve.
#' @param time Parameter for selecting common dates. Accepts one of
#' \code{NULL} (returns all data), \code{"latest"} (returns the latest
#' available data), \code{"previous"} ( the date prior to "latest"),
#' \code{"prevyear"} (the date one year prior to "latest") or \code{"first"}
#' (the oldest available data for this dataset).
#' available data), \code{"previous"} (the date prior to \code{"latest"}),
#' \code{"prevyear"} (the date one year prior to \code{"latest"}) or
#' \code{"first"} (the oldest available data for the dataset).
#' Defaults to \code{NULL}.
#' @param geography The code of the geographic area to return data for. If
#' \code{NULL}, returns data for all available geographic areas, subject to
#' other parameters. Defaults to \code{NULL}.
#' @param measures The code for the statistical measure(s) to include in the
#' data. Accepts a single string or number, or a list of strings or numbers.
#' If \code{NULL}, returns data for all available statistical measures subject
#' to other parameters. Defaults to \code{NULL}.
#' @param sex The code for sexes included in the dataset. Accepts a string or
#' number, or a vector of strings or numbers. \code{7} will return results for
#' males and females, \code{6} only females and \code{5} only males.
#' Defaults to \code{NULL}, equivalent to \code{c(5,6,7)} for datasets where
#' sex is an option.
#' @param exclude_missing If \code{TRUE}, excludes all missing values.
#' Defaults to \code{FALSE}.
#'
Expand All @@ -24,24 +41,62 @@
#'
#' y <- nomis_get_data(id="NM_1_1", time="latest")
#'
#' z <- nomis_get_data(id="NM_1_1", time="latest", geography="TYPE499", measures=c(20100, 20201), sex=5)
#'
#' }
nomis_get_data <- function(id, time=NULL, exclude_missing=FALSE){
nomis_get_data <- function(id, time=NULL, geography=NULL, measures=NULL, sex=NULL, exclude_missing=FALSE){

if(missing(id)){
stop("Dataset ID must be specified")
}

time_query <- dplyr::if_else(is.null(time)==FALSE,
paste0("&time=", paste0(time, collapse=",")),
"")

if(missing(id)) stop("Dataset ID must be specified")
geography_query <- dplyr::if_else(is.null(geography)==FALSE,
paste0("&geography=", geography),
"")

time_query <- dplyr::if_else(is.null(time)==TRUE,
"",
paste0("&time=", tolower(time)))
measures_query <- dplyr::if_else(length(measures)>0,
paste0("&measures=", paste0(measures, collapse=",")),
"")

sex_query <- dplyr::if_else(length(sex)>0,
paste0("&sex=", paste0(sex, collapse=",")),
"")

exclude_query <- dplyr::if_else(exclude_missing==TRUE,
"ExcludeMissingValues=true",
"&ExcludeMissingValues=true",
"")

query <- paste0("/",id,".data.csv?", time_query, exclude_query)
query <- paste0("/",id,".data.csv?", time_query, geography_query, measures_query, sex_query, exclude_query)

df <- nomis_collect_util(query)

if(nrow(df)==0) stop("API request did not return any results")
if(df$RECORD_COUNT[1]>25000) {# test for length and retrieve all data if amount available is over the limit of 25000

record_count <- df$RECORD_COUNT[1]

seq_list <- seq(from=25000, to=record_count, by=25000)

pages <- list()

for(i in 1:length(seq_list)){

query <- paste0(query, "&recordOffset=", seq_list[i])

message("Retrieving additional pages ", i, " of ", length(seq_list))

pages[[i]] <- nomis_collect_util(query)

}

df <- tibble::as_tibble(dplyr::bind_rows(pages, df))

}

if(nrow(df)==0) stop("The API request did not return any results. Please check your parameters.")

df

Expand Down
57 changes: 57 additions & 0 deletions R/nomis_codes.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@




#' Variable codes
#'
#' Retrieve all code options of all datasets, code options for a given
#' dataset, or the all the options for a given code variable from a
#' particular dataset.
#'
#' @param id The ID of the particular dataset. If both \code{id} and
#' \code{code} are left empty, returns all available codes for all datasets.
#' @param code The variable name to return options for. If left empty,
#' returns all options for the dataset specified by \code{id}.
#'
#' @return A list of options.
#' @export
#'
#' @examples \dontrun{
#'
#' z <- nomis_codes("NM_7_1", "geography")
#'
#' }
#'
nomis_codes <- function(id, code){

if(missing(id)) stop("The dataset ID must be specified to return options for a given code.")

if(missing(code)) {

q <- nomis_data_info(id)

df <- tibble::as_tibble(as.data.frame(q$components.dimension))

df$isfrequencydimension[is.na(df$isfrequencydimension)] <- "false"

} else {

qq <- paste0("https://www.nomisweb.co.uk/api/v01/dataset/",id,"/",code,"/TYPE.def.sdmx.json?")

a <- jsonlite::fromJSON(qq, flatten=TRUE)

x <- as.data.frame(a$structure$codelists$codelist$code)

df <- tibble::tibble(
description=x$description.value,
value=x$value
)

}

df

}



5 changes: 4 additions & 1 deletion R/nomisr-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

#' nomisr: Access UK labour market statistics from nomis with R
#'
#' Requests can return up to 25,000 rows of data at a time.
#'
#' Full API documentation available at
#' \url{https://www.nomisweb.co.uk/api/v01/help}
#'
Expand All @@ -10,7 +12,8 @@
#' @name nmisr
#' @import jsonlite
#' @import tibble
#' @import readr
#' @importFrom readr read_csv
#' @import dplyr
#' @import curl
# @useDynLib nomisr
NULL
8 changes: 6 additions & 2 deletions R/utils-collect.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,12 @@

nomis_collect_util <- function(query){

df <- readr::read_csv(paste0("https://www.nomisweb.co.uk/api/v01/dataset", query))
df <- suppressMessages(readr::read_csv(paste0("https://www.nomisweb.co.uk/api/v01/dataset", query)))

df

}
}




2 changes: 1 addition & 1 deletion R/utils-query.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ nomis_query_util <- function(query){

df

}
}
3 changes: 3 additions & 0 deletions man/nmisr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions man/nomis_codes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 30 additions & 6 deletions man/nomis_get_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test_data_collect.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ context("nomis_get_data")

test_that("nomis_get_data return expected format", {

x <- nomis_get_data(id="NM_1_1", time="latest")
z <- nomis_get_data(id="NM_1_1", time="latest", geography="TYPE499", measures=c(20100, 20201), sex=5)

expect_length(x, 34)
expect_type(x, "list")
Expand Down

0 comments on commit bde6387

Please sign in to comment.