Skip to content

Commit

Permalink
full read_deaths
Browse files Browse the repository at this point in the history
  • Loading branch information
rafapereirabr committed Aug 24, 2023
1 parent 5037b98 commit 80eca53
Show file tree
Hide file tree
Showing 11 changed files with 167 additions and 98 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: censobr
Title: Download Data from Brazil's Population Census
Version: 0.0.01
Version: 0.0.1
Authors@R:
c(person(given="Rafael H. M.", family="Pereira",
email="rafa.pereira.br@gmail.com",
Expand Down
36 changes: 17 additions & 19 deletions R/read_deaths.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#' @template year
#' @template columns
#' @template as_data_frame
#' @template showProgress
#' @template cache
#'
#' @return An Arrow table or a `"data.frame"` object.
#' @export
Expand All @@ -20,46 +22,42 @@
#'}}
read_deaths <- function(year = 2010,
columns = NULL,
as_data_frame = TRUE){
as_data_frame = TRUE,
showProgress = TRUE,
cache = TRUE){

### check inputs
checkmate::assert_logical(as_data_frame)
checkmate::assert_vector(columns, null.ok = TRUE)
checkmate::assert_numeric(year)
checkmate::assert_vector(columns, null.ok = TRUE)
checkmate::assert_logical(as_data_frame)
checkmate::assert_logical(showProgress)
checkmate::assert_logical(cache)

years <- c(2010)
if (isFALSE(year %in% years)) { stop(paste0("Error: Data set only available for the years ",
paste(years), collapse = " "))}

### Get url
if (year==2010) { url <- '2010_deaths.parquet' }
if (year==2010) { file_url <- 'https://github.com/ipeaGIT/censobr/releases/download/v0.0.1/2010_deaths.parquet' }


### Download
df <- arrow::read_parquet(url, as_data_frame = FALSE)
local_file <- download_file(file_url = file_url,
showProgress = showProgress,
cache = cache)

# check downloaded
# if (is.null(df)) {message()}
# check if download worked
if(is.null(local_file)) { return(NULL) }

# load('R:/Dropbox/bases_de_dados/censo_demografico/censo_2010/data/censo2010_BRdeaths.Rdata')
#
# head(censo2010_BRdeaths)
#
# df <- arrow::as_arrow_table(censo2010_BRdeaths, )
#
# arrow::write_parquet(df, '2010_deaths.parquet')
# read data
df <- arrow::read_parquet(local_file, as_data_frame = FALSE)


### Select
if (!is.null(columns)) { # columns <- c('V0002','V0011')
df <- dplyr::select(df, columns)
}



df |> dplyr::collect()


### output format
if (isTRUE(as_data_frame)) { return( dplyr::collect(df) )
} else {
Expand Down
74 changes: 37 additions & 37 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -1,53 +1,53 @@


#' Download file from url
#'
#' @param file_url String. A url passed from.
#' @param showProgress Logical, passed from
#' @param dest_file String, passed from
#'
#' @return Silently saves downloaded file to temp dir.
#' @param file_url String. A url.
#' @param showProgress Logical.
#' @param cache Logical.

#' @return A string to the address of the file in a tempdir
#'
#' @keywords internal
#' @examples \dontrun{ if (interactive()) {
#' # Generate url
#' file_url <- get_flights_url(type='basica', year=2000, month=11)
#' file_url <- 'https://github.com/ipeaGIT/censobr/releases/download/v0.0.1/2010_deaths.parquet'
#'
#' # download data
#' download_flightsbr_file(file_url=file_url,
#' showProgress=TRUE,
#' dest_file = tempfile(fileext = ".zip")
#' )
#' download_file(file_url = file_url,
#' showProgress = TRUE,
#' cache = TRUE)
#'}}
download_flightsbr_file <- function(file_url = parent.frame()$file_url,
showProgress = parent.frame()$showProgress,
dest_file = temp_local_file){

# download data
try(
httr::GET(url=file_url,
if(showProgress==T){ httr::progress()},
httr::write_disk(dest_file, overwrite = T),
config = httr::config(ssl_verifypeer = FALSE)
), silent = TRUE)

# check if file has NOT been downloaded, try a 2nd time
if (!file.exists(dest_file) | file.info(dest_file)$size == 0) {

# download data: try a 2nd time
try(
download_file <- function(file_url = parent.frame()$file_url,
showProgress = parent.frame()$showProgress,
cache = parent.frame()$cache){ # nocov start

# create temp local file
file_name <- basename(file_url)
temp_local_file <- paste0(tempdir(),"/",file_name)

# use cached files or not
if (cache==FALSE & file.exists(temp_local_file)) {
unlink(temp_local_file, recursive = T)
}

# has the file been downloaded already? If not, download it
if (cache==FALSE | !file.exists(temp_local_file) | file.info(temp_local_file)$size == 0) {

# download data
try(silent = TRUE,
httr::GET(url=file_url,
if(showProgress==T){ httr::progress()},
httr::write_disk(dest_file, overwrite = T),
config = httr::config(ssl_verifypeer = FALSE)
), silent = TRUE)
if(showProgress==TRUE){ httr::progress()},
httr::write_disk(temp_local_file, overwrite = T),
config = httr::config(ssl_verifypeer = FALSE))
)
}

# Halt function if download failed
if (!file.exists(dest_file) | file.info(dest_file)$size == 0) {
if (!file.exists(temp_local_file) | file.info(temp_local_file)$size == 0) {
message('Internet connection not working.')
return(invisible(NULL)) }
}

return(invisible(NULL))

} else {
return(temp_local_file)
}
} # nocov end

37 changes: 37 additions & 0 deletions man/download_file.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 0 additions & 38 deletions man/download_flightsbr_file.Rd

This file was deleted.

13 changes: 12 additions & 1 deletion man/read_deaths.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions man/roxygen/templates/cache.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#' @param cache Logical. Whether the function should read cached data downloaded
#' previously in the same R session. Defaults to `TRUE`.
1 change: 1 addition & 0 deletions man/roxygen/templates/showProgress.R
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#' @param showProgress Logical. Defaults to `TRUE` display download progress bar.
4 changes: 4 additions & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
library(testthat)
library(censobr)

test_check("censobr")
54 changes: 54 additions & 0 deletions tests/testthat/test_read_deaths.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
context("read_deaths")

# skip tests because they take too much time
skip_if(Sys.getenv("TEST_ONE") != "")
testthat::skip_on_cran()


# Reading the data -----------------------

test_that("read_deaths", {

# (default)
test1 <- read_deaths()
testthat::expect_true(is(test1, "data.frame"))
testthat::expect_true(nrow(test1) >0 )
testthat::expect_equal( class(test1$V0010), 'numeric')

# select columns
cols <- c('V0002')
test2 <- read_deaths(columns = cols)
testthat::expect_true(names(test2) %in% cols)

# arrow table
test3 <- read_deaths(as_data_frame = FALSE)
testthat::expect_true(is(test3, "ArrowTabular"))

# check whether cache argument is working
time_first <- system.time(
t1 <- read_deaths(year = 2010, as_data_frame = FALSE))

time_cache_true <- system.time(
t2 <- read_deaths(year = 2010, as_data_frame = FALSE, cache = TRUE))

time_cache_false <- system.time(
t3 <- read_deaths(year = 2010, as_data_frame = FALSE, cache = FALSE))

testthat::expect_true( time_cache_true[['elapsed']] < time_cache_false[['elapsed']] )

})


# ERRORS and messages -----------------------
test_that("read_deaths", {

# Wrong date 4 digits
testthat::expect_error(read_deaths(year=999))
testthat::expect_error(read_deaths(year='999'))
testthat::expect_error(read_deaths(columns = 'banana'))
testthat::expect_error(read_deaths(as_data_frame = 'banana'))
testthat::expect_error(read_deaths(showProgress = 'banana' ))
testthat::expect_error(read_deaths(cache = 'banana'))


})
4 changes: 2 additions & 2 deletions tests_rafa/test_rafa.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ Sys.setenv(NOT_CRAN = "true")


# each function separately
t1 <- covr::function_coverage(fun=read_aircrafts, test_file("tests/testthat/test_read_aircrafts.R"))

t1 <- covr::function_coverage(fun=read_deaths, test_file("tests/testthat/test_read_deaths.R"))
t1

# nocov start

Expand Down

0 comments on commit 80eca53

Please sign in to comment.