diff --git a/4_misc/outreach/press/check_files.R b/4_misc/outreach/press/check_files.R index f8282bf13..4de67f9fd 100644 --- a/4_misc/outreach/press/check_files.R +++ b/4_misc/outreach/press/check_files.R @@ -5,7 +5,8 @@ source(glue("{REPO}/energy-code-release-2020/4_misc/", "outreach/press/energy_outreach_data.R")) data_root <- "/mnt/CIL_energy/impacts_outreach/" - +# Are you checking anywhere that the total energy files, non converted to gdp, are the sum of the other ? +# If that is doable at all ? # Check completeless of data ## year: verify that files with the following filenames have the corresponding columns @@ -49,6 +50,7 @@ d = do.call(rbind, mcmapply( mc.cores = 60 )) +# using the 5 numbers below assume they are correct. Probably trivial, but who knows, have you checked? # check that files have correct number of rows len_all_IRs = length(return_region_list("all_IRs")) len_states = length(return_region_list("states")) @@ -140,6 +142,9 @@ check_invalid_values <- function(file) { if (apply_function(func = function(x) is.na(x), dat)) {return(glue("{file} has NAs"))} if (apply_function(func = function(x) is.nan(x), dat)) {return(glue("{file} has NaNs"))} if (apply_function(func = function(x) is.infinite(x), dat)) {return(glue("{file} has Infs"))} + + #10e30 is quite arbitrary. have you done something looking at various summary statistics across all files? + # like the kind of stuff we do after projections now. if (apply_function(func = function(x) abs(x) > 10e30, dat)) {return(glue("{file} has large values"))} } @@ -175,6 +180,7 @@ check_pct_gdp <- function(file) { if (!is.na(y)) return(y) else return(FALSE) } + # have you checked zero or negative values? if (apply_function(func = function(x) abs(x)>=100, dat)) {return(glue("{file} has >100 values"))} } @@ -212,6 +218,8 @@ check_unit_conversion <- function(file) { if (!is.na(y)) return(y) else return(FALSE) } + + # why floor ? isn't conversion 277.778? if (!apply_function(func = function(x) floor(x)==277, r)) {return(glue("{file} has conversion problem"))} } diff --git a/4_misc/outreach/press/energy_outreach_data.R b/4_misc/outreach/press/energy_outreach_data.R index bb77a9304..8c7b963f7 100644 --- a/4_misc/outreach/press/energy_outreach_data.R +++ b/4_misc/outreach/press/energy_outreach_data.R @@ -9,6 +9,9 @@ library(haven) library(ncdf4) library(tidyr) + +# I think you also want to have the README (that's in the repo, describing the data) in the root of the data folder itself, no ? Remember doing that for Hannah. + # cilpath.r:::cilpath() db = '/mnt/CIL_energy/' output = '/mnt/CIL_energy/' @@ -25,6 +28,9 @@ setwd(paste0(REPO)) # Source codes that help us load projection system outputs miceadds::source.all(paste0(projection.packages,"load_projection/")) +# since the function below seems to be the 'top' main code to run, it would be nice to have more details about data sources, +# for example, what the gdp data is, how the statistics are computed, what time steps mean exactly etc... +# of course only if that is not documented somewhere else, it might be in the README.. in which case it's fine. #' Wrapper that calls get_energy_impacts, transform, reshape and save results #' @param time_step what years to output ("averaged","all") #' @param impact_type unit of output ("impacts_gj", "impacts_kwh", "impacts_pct_gdp") @@ -108,6 +114,10 @@ select_and_transform = function(df, impact_type, resolution, stats, ...) { } +# would be nice to document that function the same way as the functions above with the same format. I,e what each parameter +# means and what the values can be. So that people checkign/using it don't have to look at the details of the code. + +# Same for the functions below, perhaps not all of them, but at elast the important ones, for example get_energy_impacts. # reshape output and save to file reshape_and_save = function(df, stats, resolution, impact_type, time_step, rcp, fuel, export,...) { @@ -339,6 +349,7 @@ return_region_list = function(regions) { return(regions) } +# what kind of GDP data is that ? where does it come from # get regional GDP time series at the spatial resolution specified return_region_gdp = function(resolution) { diff --git a/4_misc/outreach/press/energy_outreach_data_script.R b/4_misc/outreach/press/energy_outreach_data_script.R index bfdd95d99..6055cc083 100644 --- a/4_misc/outreach/press/energy_outreach_data_script.R +++ b/4_misc/outreach/press/energy_outreach_data_script.R @@ -3,6 +3,25 @@ library(glue) library(parallel) library(vroom) + +# (1) +# would be nice to have this ready to be run as much as possible. +# Omitting the repo path problem, at least uncommenting stuff and have the script +# be such that if it is run correctly, it produces all what's needed an in the outreach data, without +# testing stuff around. + + +# (2) +# the global files look a little weird, not sure that was the case for mortality, but just wanted to flag. E.g. : +# Global year_2020 year_2021 +# 0.022236756 0.022411465 + + +# (3) +# I noticed a lot of empty rows in the country files. For example : +# " unit_total_energy_impacts_pct_gdp_geography_country_level_years_all_rcp85_SSP3_quantiles_q5.csv" +# is this normal ? + REPO <- "/home/liruixue/repos" source(glue("{REPO}/mortality/utils/wrap_mapply.R")) @@ -11,7 +30,7 @@ source(glue("{REPO}/energy-code-release-2020/4_misc/", "outreach/press/energy_outreach_data.R")) -# # testing function +# # # testing function # out = ProcessImpacts( # time_step="all", # impact_type="impacts_pct_gdp", @@ -20,7 +39,10 @@ source(glue("{REPO}/energy-code-release-2020/4_misc/", # stats="q50", # fuel = "total_energy", # regenerate = FALSE, -# export = TRUE) +# export = FALSE) + + + # ########################################################### @@ -160,6 +182,11 @@ out = wrap_mapply( # ) + +# Here you're iterating over all IR files to pick cities and save aside for the 'city' style output right? +# Could wrap this in one single function, so that it looks as clean as the above ? Or other suggestion, keep only +# the last call (wrap_mapply) and the above moved to energy_outreach_data.R. Since this is suppoed to be only a script (calling stuff). + # filter 500k cities from all IR level files path = "/mnt/CIL_energy/impacts_outreach/" @@ -170,8 +197,8 @@ all_IRs_files = list.files(path = path, recursive = TRUE, include.dirs = TRUE) -cities_500k = read_csv("~/repos/energy-code-release-2020/data/500k_cities.csv") %>% - select(city, country, Region_ID) +cities_500k = read_csv("~/repos/energy-code-release-2020/data/500k_cities.csv") %>% + select(city, country, Region_ID) cities_500k_regions = unlist(cities_500k$Region_ID)