diff --git a/DESCRIPTION b/DESCRIPTION index b0bfd55..4d51549 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: censobr Title: Download Data from Brazil's Population Census -Version: 0.0.01 +Version: 0.0.1 Authors@R: c(person(given="Rafael H. M.", family="Pereira", email="rafa.pereira.br@gmail.com", diff --git a/R/read_deaths.R b/R/read_deaths.R index b42fa7e..1fc7eb6 100644 --- a/R/read_deaths.R +++ b/R/read_deaths.R @@ -6,6 +6,8 @@ #' @template year #' @template columns #' @template as_data_frame +#' @template showProgress +#' @template cache #' #' @return An Arrow table or a `"data.frame"` object. #' @export @@ -20,34 +22,35 @@ #'}} read_deaths <- function(year = 2010, columns = NULL, - as_data_frame = TRUE){ + as_data_frame = TRUE, + showProgress = TRUE, + cache = TRUE){ ### check inputs - checkmate::assert_logical(as_data_frame) - checkmate::assert_vector(columns, null.ok = TRUE) checkmate::assert_numeric(year) + checkmate::assert_vector(columns, null.ok = TRUE) + checkmate::assert_logical(as_data_frame) + checkmate::assert_logical(showProgress) + checkmate::assert_logical(cache) years <- c(2010) if (isFALSE(year %in% years)) { stop(paste0("Error: Data set only available for the years ", paste(years), collapse = " "))} ### Get url - if (year==2010) { url <- '2010_deaths.parquet' } + if (year==2010) { file_url <- 'https://github.com/ipeaGIT/censobr/releases/download/v0.0.1/2010_deaths.parquet' } ### Download - df <- arrow::read_parquet(url, as_data_frame = FALSE) + local_file <- download_file(file_url = file_url, + showProgress = showProgress, + cache = cache) - # check downloaded - # if (is.null(df)) {message()} + # check if download worked + if(is.null(local_file)) { return(NULL) } - # load('R:/Dropbox/bases_de_dados/censo_demografico/censo_2010/data/censo2010_BRdeaths.Rdata') - # - # head(censo2010_BRdeaths) - # - # df <- arrow::as_arrow_table(censo2010_BRdeaths, ) - # - # arrow::write_parquet(df, '2010_deaths.parquet') + # read data + df <- arrow::read_parquet(local_file, as_data_frame = FALSE) ### Select @@ -55,11 +58,6 @@ read_deaths <- function(year = 2010, df <- dplyr::select(df, columns) } - - - df |> dplyr::collect() - - ### output format if (isTRUE(as_data_frame)) { return( dplyr::collect(df) ) } else { diff --git a/R/utils.R b/R/utils.R index 6f03fa2..6117f86 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,53 +1,53 @@ - - #' Download file from url #' -#' @param file_url String. A url passed from. -#' @param showProgress Logical, passed from -#' @param dest_file String, passed from -#' -#' @return Silently saves downloaded file to temp dir. +#' @param file_url String. A url. +#' @param showProgress Logical. +#' @param cache Logical. + +#' @return A string to the address of the file in a tempdir #' #' @keywords internal #' @examples \dontrun{ if (interactive()) { #' # Generate url -#' file_url <- get_flights_url(type='basica', year=2000, month=11) +#' file_url <- 'https://github.com/ipeaGIT/censobr/releases/download/v0.0.1/2010_deaths.parquet' #' #' # download data -#' download_flightsbr_file(file_url=file_url, -#' showProgress=TRUE, -#' dest_file = tempfile(fileext = ".zip") -#' ) +#' download_file(file_url = file_url, +#' showProgress = TRUE, +#' cache = TRUE) #'}} -download_flightsbr_file <- function(file_url = parent.frame()$file_url, - showProgress = parent.frame()$showProgress, - dest_file = temp_local_file){ - - # download data - try( - httr::GET(url=file_url, - if(showProgress==T){ httr::progress()}, - httr::write_disk(dest_file, overwrite = T), - config = httr::config(ssl_verifypeer = FALSE) - ), silent = TRUE) - - # check if file has NOT been downloaded, try a 2nd time - if (!file.exists(dest_file) | file.info(dest_file)$size == 0) { - - # download data: try a 2nd time - try( +download_file <- function(file_url = parent.frame()$file_url, + showProgress = parent.frame()$showProgress, + cache = parent.frame()$cache){ # nocov start + + # create temp local file + file_name <- basename(file_url) + temp_local_file <- paste0(tempdir(),"/",file_name) + + # use cached files or not + if (cache==FALSE & file.exists(temp_local_file)) { + unlink(temp_local_file, recursive = T) + } + + # has the file been downloaded already? If not, download it + if (cache==FALSE | !file.exists(temp_local_file) | file.info(temp_local_file)$size == 0) { + + # download data + try(silent = TRUE, httr::GET(url=file_url, - if(showProgress==T){ httr::progress()}, - httr::write_disk(dest_file, overwrite = T), - config = httr::config(ssl_verifypeer = FALSE) - ), silent = TRUE) + if(showProgress==TRUE){ httr::progress()}, + httr::write_disk(temp_local_file, overwrite = T), + config = httr::config(ssl_verifypeer = FALSE)) + ) } # Halt function if download failed - if (!file.exists(dest_file) | file.info(dest_file)$size == 0) { + if (!file.exists(temp_local_file) | file.info(temp_local_file)$size == 0) { message('Internet connection not working.') - return(invisible(NULL)) } -} - + return(invisible(NULL)) + } else { + return(temp_local_file) + } + } # nocov end diff --git a/man/download_file.Rd b/man/download_file.Rd new file mode 100644 index 0000000..d228def --- /dev/null +++ b/man/download_file.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{download_file} +\alias{download_file} +\title{Download file from url} +\usage{ +download_file( + file_url = parent.frame()$file_url, + showProgress = parent.frame()$showProgress, + cache = parent.frame()$cache +) +} +\arguments{ +\item{file_url}{String. A url.} + +\item{showProgress}{Logical.} + +\item{cache}{Logical.} +} +\value{ +A string to the address of the file in a tempdir +} +\description{ +Download file from url +} +\examples{ +\dontrun{ if (interactive()) { +# Generate url +file_url <- 'https://github.com/ipeaGIT/censobr/releases/download/v0.0.1/2010_deaths.parquet' + +# download data +download_file(file_url = file_url, + showProgress = TRUE, + cache = TRUE) +}} +} +\keyword{internal} diff --git a/man/download_flightsbr_file.Rd b/man/download_flightsbr_file.Rd deleted file mode 100644 index f77332b..0000000 --- a/man/download_flightsbr_file.Rd +++ /dev/null @@ -1,38 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{download_flightsbr_file} -\alias{download_flightsbr_file} -\title{Download file from url} -\usage{ -download_flightsbr_file( - file_url = parent.frame()$file_url, - showProgress = parent.frame()$showProgress, - dest_file = temp_local_file -) -} -\arguments{ -\item{file_url}{String. A url passed from.} - -\item{showProgress}{Logical, passed from} - -\item{dest_file}{String, passed from} -} -\value{ -Silently saves downloaded file to temp dir. -} -\description{ -Download file from url -} -\examples{ -\dontrun{ if (interactive()) { -# Generate url -file_url <- get_flights_url(type='basica', year=2000, month=11) - -# download data -download_flightsbr_file(file_url=file_url, - showProgress=TRUE, - dest_file = tempfile(fileext = ".zip") - ) -}} -} -\keyword{internal} diff --git a/man/read_deaths.Rd b/man/read_deaths.Rd index 2bc7bff..23e0d62 100644 --- a/man/read_deaths.Rd +++ b/man/read_deaths.Rd @@ -4,7 +4,13 @@ \alias{read_deaths} \title{Download microdata of death records from Brazil's census} \usage{ -read_deaths(year = 2010, columns = NULL, as_data_frame = TRUE) +read_deaths( + year = 2010, + columns = NULL, + as_data_frame = TRUE, + showProgress = TRUE, + cache = TRUE +) } \arguments{ \item{year}{Numeric. Year of reference in the format \code{yyyymm}. Defaults to @@ -15,6 +21,11 @@ columns are not read. Defaults to \code{NULL} and read all columns.} \item{as_data_frame}{Logical. Whether the function should return a \code{data.frame} (Default) or an Arrow Table. Defaults to \code{TRUE}.} + +\item{showProgress}{Logical. Defaults to \code{TRUE} display download progress bar.} + +\item{cache}{Logical. Whether the function should read cached data downloaded +previously in the same R session. Defaults to \code{TRUE}.} } \value{ An Arrow table or a \code{"data.frame"} object. diff --git a/man/roxygen/templates/cache.R b/man/roxygen/templates/cache.R new file mode 100644 index 0000000..ec706a7 --- /dev/null +++ b/man/roxygen/templates/cache.R @@ -0,0 +1,2 @@ +#' @param cache Logical. Whether the function should read cached data downloaded +#' previously in the same R session. Defaults to `TRUE`. diff --git a/man/roxygen/templates/showProgress.R b/man/roxygen/templates/showProgress.R new file mode 100644 index 0000000..aecfccc --- /dev/null +++ b/man/roxygen/templates/showProgress.R @@ -0,0 +1 @@ +#' @param showProgress Logical. Defaults to `TRUE` display download progress bar. diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..3792f2b --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(censobr) + +test_check("censobr") diff --git a/tests/testthat/test_read_deaths.R b/tests/testthat/test_read_deaths.R new file mode 100644 index 0000000..8c6aafa --- /dev/null +++ b/tests/testthat/test_read_deaths.R @@ -0,0 +1,54 @@ +context("read_deaths") + +# skip tests because they take too much time +skip_if(Sys.getenv("TEST_ONE") != "") +testthat::skip_on_cran() + + +# Reading the data ----------------------- + +test_that("read_deaths", { + + # (default) + test1 <- read_deaths() + testthat::expect_true(is(test1, "data.frame")) + testthat::expect_true(nrow(test1) >0 ) + testthat::expect_equal( class(test1$V0010), 'numeric') + + # select columns + cols <- c('V0002') + test2 <- read_deaths(columns = cols) + testthat::expect_true(names(test2) %in% cols) + + # arrow table + test3 <- read_deaths(as_data_frame = FALSE) + testthat::expect_true(is(test3, "ArrowTabular")) + + # check whether cache argument is working + time_first <- system.time( + t1 <- read_deaths(year = 2010, as_data_frame = FALSE)) + + time_cache_true <- system.time( + t2 <- read_deaths(year = 2010, as_data_frame = FALSE, cache = TRUE)) + + time_cache_false <- system.time( + t3 <- read_deaths(year = 2010, as_data_frame = FALSE, cache = FALSE)) + + testthat::expect_true( time_cache_true[['elapsed']] < time_cache_false[['elapsed']] ) + + }) + + +# ERRORS and messages ----------------------- +test_that("read_deaths", { + + # Wrong date 4 digits + testthat::expect_error(read_deaths(year=999)) + testthat::expect_error(read_deaths(year='999')) + testthat::expect_error(read_deaths(columns = 'banana')) + testthat::expect_error(read_deaths(as_data_frame = 'banana')) + testthat::expect_error(read_deaths(showProgress = 'banana' )) + testthat::expect_error(read_deaths(cache = 'banana')) + + +}) diff --git a/tests_rafa/test_rafa.R b/tests_rafa/test_rafa.R index 3a85d0e..051eb6d 100644 --- a/tests_rafa/test_rafa.R +++ b/tests_rafa/test_rafa.R @@ -13,8 +13,8 @@ Sys.setenv(NOT_CRAN = "true") # each function separately -t1 <- covr::function_coverage(fun=read_aircrafts, test_file("tests/testthat/test_read_aircrafts.R")) - +t1 <- covr::function_coverage(fun=read_deaths, test_file("tests/testthat/test_read_deaths.R")) +t1 # nocov start