Skip to content

Commit

Permalink
fixing 2022 census tracts
Browse files Browse the repository at this point in the history
  • Loading branch information
rafapereirabr committed Jul 29, 2024
1 parent aaaa1d4 commit 8a8dd9b
Showing 1 changed file with 21 additions and 10 deletions.
31 changes: 21 additions & 10 deletions data_prep/R/census_tract_2022.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dir.create(dest_dir, recursive = T)

if(year == 2022){

ftp <- 'https://ftp.ibge.gov.br/Censos/Censo_Demografico_2022/Agregados_por_Setores_Censitarios_preliminares/malha_com_atributos/setores/gpkg/BR/BR_Malha_Preliminar_2022.zip'
dest_file <- download_file(file_url = ftp, dest_dir = raw_dir)

}
Expand All @@ -28,21 +29,22 @@ if(year == 2022){
temp_dir <- tempdir()

unzip(dest_file, exdir = temp_dir)
local_file <- list.files(temp_dir, full.names = T, pattern = 'gpkg')

local_file <- unzip_fun(dest_file)


# read and save original raw data
df <- sf::st_read(local_file)
saveRDS(df, paste0(raw_dir,'/BR_Malha_Preliminar_2022.rds'))

df <- sf::st_read('./data_raw/census_tracts/2022/BR_Malha_Preliminar_2022.gpkg')
saveRDS(df, './data_raw/census_tracts/2022/BR_Malha_Preliminar_2022.rds')



#### 1. clean and save data -----------------

df$AREA_KM2 <- NULL
#### 1. clean and save data -----------------
df <- readRDS(paste0(raw_dir,'/BR_Malha_Preliminar_2022.rds'))

temp_sf <- dplyr::rename(df,
temp_sf <- dplyr::select(df,
code_tract = CD_SETOR,
code_muni = CD_MUN,
name_muni = NM_MUN,
Expand Down Expand Up @@ -73,11 +75,15 @@ temp_sf <- mutate(temp_sf, code_tract = gsub("P","", code_tract))
head(temp_sf)


# make all columns as character
char_cols <- names(temp_sf)
char_cols <- char_cols[char_cols %like% 'code_|name_']
# make all name columns as character
all_cols <- names(temp_sf)
char_cols <- all_cols[all_cols %like% 'name_']
temp_sf <- mutate(temp_sf, across(all_of(char_cols), as.character))

# make all columns as character
num_cols <- all_cols[all_cols %like% 'code_']
temp_sf <- mutate(temp_sf, across(all_of(char_cols), as.numeric))


# Use UTF-8 encoding
temp_sf <- use_encoding_utf8(temp_sf)
Expand All @@ -91,14 +97,19 @@ gc()

save_state <- function(code_uf){ # code_uf <- 33

message(code_uf)

temp_sf2 <- subset(temp_sf, code_state == code_uf)
# temp_sf2 <- subset(temp_sf2, code_muni == '3304557')
# temp_sf2 <- subset(temp_sf, code_muni == '3304557')

# convert to MULTIPOLYGON
temp_sf2 <- to_multipolygon(temp_sf2)

temp_sf2 <- fix_topoly(temp_sf2)

# simplify
temp_sf_simplified <- simplify_temp_sf(temp_sf2, tolerance = 10)
temp_sf_simplified <- fix_topoly(temp_sf_simplified)

# Save cleaned sf in the cleaned directory
sf::st_write(temp_sf2, paste0(dest_dir,'/', code_uf,'census_tract_', year, '.gpkg'))
Expand Down

0 comments on commit 8a8dd9b

Please sign in to comment.