Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

emile's code review feedback #14

Open
wants to merge 1 commit into
base: interaction
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion 4_misc/outreach/press/check_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ source(glue("{REPO}/energy-code-release-2020/4_misc/",
"outreach/press/energy_outreach_data.R"))
data_root <- "/mnt/CIL_energy/impacts_outreach/"


# Are you checking anywhere that the total energy files, non converted to gdp, are the sum of the other ?
# If that is doable at all ?

# Check completeless of data
## year: verify that files with the following filenames have the corresponding columns
Expand Down Expand Up @@ -49,6 +50,7 @@ d = do.call(rbind, mcmapply(
mc.cores = 60
))

# using the 5 numbers below assume they are correct. Probably trivial, but who knows, have you checked?
# check that files have correct number of rows
len_all_IRs = length(return_region_list("all_IRs"))
len_states = length(return_region_list("states"))
Expand Down Expand Up @@ -140,6 +142,9 @@ check_invalid_values <- function(file) {
if (apply_function(func = function(x) is.na(x), dat)) {return(glue("{file} has NAs"))}
if (apply_function(func = function(x) is.nan(x), dat)) {return(glue("{file} has NaNs"))}
if (apply_function(func = function(x) is.infinite(x), dat)) {return(glue("{file} has Infs"))}

#10e30 is quite arbitrary. have you done something looking at various summary statistics across all files?
# like the kind of stuff we do after projections now.
if (apply_function(func = function(x) abs(x) > 10e30, dat)) {return(glue("{file} has large values"))}
}

Expand Down Expand Up @@ -175,6 +180,7 @@ check_pct_gdp <- function(file) {
if (!is.na(y)) return(y)
else return(FALSE)
}
# have you checked zero or negative values?
if (apply_function(func = function(x) abs(x)>=100, dat)) {return(glue("{file} has >100 values"))}
}

Expand Down Expand Up @@ -212,6 +218,8 @@ check_unit_conversion <- function(file) {
if (!is.na(y)) return(y)
else return(FALSE)
}

# why floor ? isn't conversion 277.778?
if (!apply_function(func = function(x) floor(x)==277, r)) {return(glue("{file} has conversion problem"))}
}

Expand Down
11 changes: 11 additions & 0 deletions 4_misc/outreach/press/energy_outreach_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ library(haven)
library(ncdf4)
library(tidyr)


# I think you also want to have the README (that's in the repo, describing the data) in the root of the data folder itself, no ? Remember doing that for Hannah.

# cilpath.r:::cilpath()
db = '/mnt/CIL_energy/'
output = '/mnt/CIL_energy/'
Expand All @@ -25,6 +28,9 @@ setwd(paste0(REPO))
# Source codes that help us load projection system outputs
miceadds::source.all(paste0(projection.packages,"load_projection/"))

# since the function below seems to be the 'top' main code to run, it would be nice to have more details about data sources,
# for example, what the gdp data is, how the statistics are computed, what time steps mean exactly etc...
# of course only if that is not documented somewhere else, it might be in the README.. in which case it's fine.
#' Wrapper that calls get_energy_impacts, transform, reshape and save results
#' @param time_step what years to output ("averaged","all")
#' @param impact_type unit of output ("impacts_gj", "impacts_kwh", "impacts_pct_gdp")
Expand Down Expand Up @@ -108,6 +114,10 @@ select_and_transform = function(df, impact_type, resolution, stats, ...) {
}


# would be nice to document that function the same way as the functions above with the same format. I,e what each parameter
# means and what the values can be. So that people checkign/using it don't have to look at the details of the code.

# Same for the functions below, perhaps not all of them, but at elast the important ones, for example get_energy_impacts.
# reshape output and save to file
reshape_and_save = function(df, stats, resolution, impact_type, time_step, rcp, fuel, export,...) {

Expand Down Expand Up @@ -339,6 +349,7 @@ return_region_list = function(regions) {
return(regions)
}

# what kind of GDP data is that ? where does it come from
# get regional GDP time series at the spatial resolution specified
return_region_gdp = function(resolution) {

Expand Down
35 changes: 31 additions & 4 deletions 4_misc/outreach/press/energy_outreach_data_script.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,25 @@ library(glue)
library(parallel)
library(vroom)


# (1)
# would be nice to have this ready to be run as much as possible.
# Omitting the repo path problem, at least uncommenting stuff and have the script
# be such that if it is run correctly, it produces all what's needed an in the outreach data, without
# testing stuff around.


# (2)
# the global files look a little weird, not sure that was the case for mortality, but just wanted to flag. E.g. :
# Global year_2020 year_2021
# 0.022236756 0.022411465


# (3)
# I noticed a lot of empty rows in the country files. For example :
# " unit_total_energy_impacts_pct_gdp_geography_country_level_years_all_rcp85_SSP3_quantiles_q5.csv"
# is this normal ?

REPO <- "/home/liruixue/repos"

source(glue("{REPO}/mortality/utils/wrap_mapply.R"))
Expand All @@ -11,7 +30,7 @@ source(glue("{REPO}/energy-code-release-2020/4_misc/",
"outreach/press/energy_outreach_data.R"))


# # testing function
# # # testing function
# out = ProcessImpacts(
# time_step="all",
# impact_type="impacts_pct_gdp",
Expand All @@ -20,7 +39,10 @@ source(glue("{REPO}/energy-code-release-2020/4_misc/",
# stats="q50",
# fuel = "total_energy",
# regenerate = FALSE,
# export = TRUE)
# export = FALSE)





# ###########################################################
Expand Down Expand Up @@ -160,6 +182,11 @@ out = wrap_mapply(
# )



# Here you're iterating over all IR files to pick cities and save aside for the 'city' style output right?
# Could wrap this in one single function, so that it looks as clean as the above ? Or other suggestion, keep only
# the last call (wrap_mapply) and the above moved to energy_outreach_data.R. Since this is suppoed to be only a script (calling stuff).

# filter 500k cities from all IR level files

path = "/mnt/CIL_energy/impacts_outreach/"
Expand All @@ -170,8 +197,8 @@ all_IRs_files = list.files(path = path,
recursive = TRUE,
include.dirs = TRUE)

cities_500k = read_csv("~/repos/energy-code-release-2020/data/500k_cities.csv") %>%
select(city, country, Region_ID)
cities_500k = read_csv("~/repos/energy-code-release-2020/data/500k_cities.csv") %>%
select(city, country, Region_ID)

cities_500k_regions = unlist(cities_500k$Region_ID)

Expand Down