ClimateImpactLab · ruixue-li · Jul 6, 2021
diff --git a/4_misc/outreach/press/check_files.R b/4_misc/outreach/press/check_files.R
@@ -5,7 +5,8 @@ source(glue("{REPO}/energy-code-release-2020/4_misc/",
     "outreach/press/energy_outreach_data.R"))
 data_root <- "/mnt/CIL_energy/impacts_outreach/"
 
-
+# Are you checking anywhere that the total energy files, non converted to gdp, are the sum of the other ? 
+# If that is doable at all ?
 
 # Check completeless of data
 ## year: verify that files with the following filenames have the corresponding columns
@@ -49,6 +50,7 @@ d = do.call(rbind, mcmapply(
 	mc.cores = 60
 	))
 
+# using the 5 numbers below assume they are correct. Probably trivial, but who knows, have you checked? 
 # check that files have correct number of rows
 len_all_IRs = length(return_region_list("all_IRs"))
 len_states = length(return_region_list("states"))
@@ -140,6 +142,9 @@ check_invalid_values <- function(file) {
 	if (apply_function(func = function(x) is.na(x), dat)) {return(glue("{file} has NAs"))}
 	if (apply_function(func = function(x) is.nan(x), dat)) {return(glue("{file} has NaNs"))}
 	if (apply_function(func = function(x) is.infinite(x), dat)) {return(glue("{file} has Infs"))}
+
+	#10e30 is quite arbitrary. have you done something looking at various summary statistics across all files? 
+	# like the kind of stuff we do after projections now. 
 	if (apply_function(func = function(x) abs(x) > 10e30, dat)) {return(glue("{file} has large values"))}
 }
 
@@ -175,6 +180,7 @@ check_pct_gdp <- function(file) {
 		if (!is.na(y)) return(y)
 		else return(FALSE)
 	}
+	# have you checked zero or negative values? 
 	if (apply_function(func = function(x) abs(x)>=100, dat)) {return(glue("{file} has >100 values"))}
 }
 
@@ -212,6 +218,8 @@ check_unit_conversion <- function(file) {
 		if (!is.na(y)) return(y)
 		else return(FALSE)
 	}
+
+	# why floor ? isn't conversion 277.778?
 	if (!apply_function(func = function(x) floor(x)==277, r)) {return(glue("{file} has conversion problem"))}
 }
 

diff --git a/4_misc/outreach/press/energy_outreach_data.R b/4_misc/outreach/press/energy_outreach_data.R
@@ -9,6 +9,9 @@ library(haven)
 library(ncdf4)
 library(tidyr)
 
+
+# I think you also want to have the README (that's in the repo, describing the data) in the root of the data folder itself, no ? Remember doing that for Hannah. 
+
 # cilpath.r:::cilpath()
 db = '/mnt/CIL_energy/'
 output = '/mnt/CIL_energy/'
@@ -25,6 +28,9 @@ setwd(paste0(REPO))
 # Source codes that help us load projection system outputs
 miceadds::source.all(paste0(projection.packages,"load_projection/"))
 
+# since the function below seems to be the 'top' main code to run, it would be nice to have more details about data sources,
+# for example, what the gdp data is, how the statistics are computed, what time steps mean exactly etc...
+# of course only if that is not documented somewhere else, it might be in the README.. in which case it's fine. 
 #' Wrapper that calls get_energy_impacts, transform, reshape and save results
 #' @param time_step what years to output ("averaged","all")
 #' @param impact_type unit of output ("impacts_gj", "impacts_kwh", "impacts_pct_gdp")
@@ -108,6 +114,10 @@ select_and_transform = function(df, impact_type, resolution, stats, ...) {
 }
 
 
+# would be nice to document that function the same way as the functions above with the same format. I,e what each parameter
+# means and what the values can be. So that people checkign/using it don't have to look at the details of the code. 
+
+# Same for the functions below, perhaps not all of them, but at elast the important ones, for example get_energy_impacts. 
 # reshape output and save to file
 reshape_and_save = function(df, stats, resolution, impact_type, time_step, rcp, fuel, export,...) {
 
@@ -339,6 +349,7 @@ return_region_list = function(regions) {
     return(regions)
 }
 
+# what kind of GDP data is that ? where does it come from 
 # get regional GDP time series at the spatial resolution specified
 return_region_gdp = function(resolution) {
 

diff --git a/4_misc/outreach/press/energy_outreach_data_script.R b/4_misc/outreach/press/energy_outreach_data_script.R
@@ -3,6 +3,25 @@ library(glue)
 library(parallel)
 library(vroom)
 
+
+# (1) 
+# would be nice to have this ready to be run as much as possible. 
+# Omitting the repo path problem, at least uncommenting stuff and have the script
+# be such that if it is run correctly, it produces all what's needed an in the outreach data, without
+# testing stuff around. 
+
+
+# (2) 
+# the global files look a little weird, not sure that was the case for mortality, but just wanted to flag. E.g. : 
+# Global  year_2020 year_2021
+#  0.022236756 0.022411465
+
+
+# (3)
+# I noticed a lot of empty rows in the country files. For example : 
+# " unit_total_energy_impacts_pct_gdp_geography_country_level_years_all_rcp85_SSP3_quantiles_q5.csv"
+# is this normal ?
+
 REPO <- "/home/liruixue/repos"
 
 source(glue("{REPO}/mortality/utils/wrap_mapply.R"))
@@ -11,7 +30,7 @@ source(glue("{REPO}/energy-code-release-2020/4_misc/",
     "outreach/press/energy_outreach_data.R"))
 
 
-# # testing function
+# # # testing function
 # out = ProcessImpacts(
 #   time_step="all",
 #   impact_type="impacts_pct_gdp",
@@ -20,7 +39,10 @@ source(glue("{REPO}/energy-code-release-2020/4_misc/",
 #   stats="q50",
 #   fuel = "total_energy",
 #   regenerate = FALSE,
-#   export = TRUE)
+#   export = FALSE)
+
+
+
 
 
 # ###########################################################
@@ -160,6 +182,11 @@ out = wrap_mapply(
 # )
 
 
+
+# Here you're iterating over all IR files to pick cities and save aside for the 'city' style output right?
+# Could wrap this in one single function, so that it looks as clean as the above ? Or other suggestion, keep only
+# the last call (wrap_mapply) and the above moved to energy_outreach_data.R.  Since this is suppoed to be only a script (calling stuff). 
+
 # filter 500k cities from all IR level files
 
 path = "/mnt/CIL_energy/impacts_outreach/"
@@ -170,8 +197,8 @@ all_IRs_files = list.files(path = path,
   recursive = TRUE,
   include.dirs = TRUE)
 
-cities_500k = read_csv("~/repos/energy-code-release-2020/data/500k_cities.csv")	%>% 
-	select(city, country, Region_ID)
+cities_500k = read_csv("~/repos/energy-code-release-2020/data/500k_cities.csv") %>% 
+  select(city, country, Region_ID)
 
 cities_500k_regions = unlist(cities_500k$Region_ID)