From 12987e3e7fb15e02ac156c361c232d48f325bb58 Mon Sep 17 00:00:00 2001 From: David Klein Date: Tue, 24 Oct 2023 13:56:39 +0200 Subject: [PATCH 1/4] Clean and update code: - set minimal bioenergy demand in MAgPIE to zero to avoid multiple scenarios with the same demand but different prices. This improves the fits in many cases. - remove deprecated calculation of co2 price trajectories. They are now only taken from REMIND reports - add mute_ghgprices_until to the scenario_config_emulator.csv to have the same tax regimes as in REMIND-MAgPIE coupled runs - update order in which the emulator runs are started. The runs will be started in descending order according to their running time in order to achieve the lowest possible total running time. --- config/scenario_config_emulator.csv | 22 ++++-- scripts/start/extra/emulator.R | 116 ++++++++++------------------ 2 files changed, 56 insertions(+), 82 deletions(-) diff --git a/config/scenario_config_emulator.csv b/config/scenario_config_emulator.csv index e415b5b1a8..d071e157ae 100644 --- a/config/scenario_config_emulator.csv +++ b/config/scenario_config_emulator.csv @@ -1,6 +1,16 @@ -title;start;mag_scen;co2tax_name;co2tax_2025 -2;1;SSP2,NPI;Base;C_SSP2-Base-rem-5 -3;0;SSP2,NDC;NDC;C_SSP2-NDC-rem-5 -4;0;SSP2,NDC;PkBudg1300;C_SSP2-PkBudg1300-rem-5 -5;0;SSP2,NDC;PkBudg1100;C_SSP2-PkBudg1100-rem-5 -5;0;SSP2,NDC;PkBudg900;C_SSP2-PkBudg900-rem-5 +start;mag_scen;ghgtax_name;mifname;no_ghgprices_land_until +0;SDP-MC|NPI|nocc_hist;Base; C_SDP_MC-Base;y2150 +0;SDP-MC|NDC|nocc_hist;NDC; C_SDP_MC-NDC;y2150 +0;SDP-MC|NDC|nocc_hist;PkBudg500; C_SDP_MC-PkBudg500;y2030 +0;SSP1|NPI|nocc_hist;Base; C_SSP1-Base;y2150 +0;SSP1|NDC|nocc_hist;NDC; C_SSP1-NDC;y2150 +0;SSP1|NDC|nocc_hist;PkBudg1150; C_SSP1-PkBudg1150;y2030 +0;SSP1|NDC|nocc_hist;PkBudg500; C_SSP1-PkBudg500;y2030 +0;SSP2|NPI|nocc_hist;Base; C_SSP2EU-Base;y2150 +0;SSP2|NDC|nocc_hist;NDC; C_SSP2EU-NDC;y2150 +1;SSP2|NDC|nocc_hist;PkBudg1150; C_SSP2EU-PkBudg1150;y2030 +0;SSP2|NDC|nocc_hist;PkBudg500; C_SSP2EU-PkBudg500;y2030 +0;SSP5|NPI|nocc_hist;Base; C_SSP5-Base;y2150 +0;SSP5|NDC|nocc_hist;NDC; C_SSP5-NDC;y2150 +0;SSP5|NDC|nocc_hist;PkBudg1150; C_SSP5-PkBudg1150;y2030 +0;SSP5|NDC|nocc_hist;PkBudg500; C_SSP5-PkBudg500;y2030 diff --git a/scripts/start/extra/emulator.R b/scripts/start/extra/emulator.R index 7a5b65bd1c..8926a6a775 100644 --- a/scripts/start/extra/emulator.R +++ b/scripts/start/extra/emulator.R @@ -9,7 +9,6 @@ # description: run simulations for calculation of MAgPIE emulators # --------------------------------------------------------------- - ######################################################### #### Start MAgPIE runs to derive price emulator from #### ######################################################### @@ -26,16 +25,10 @@ source("config/default.cfg") ##################### General settings ######################## ############################################################### +cfg$qos <- "standby" cfg$results_folder <- "output/:title:" -cfg$output <- c("report","emulator") #unique(c(cfg$output,"remind","bioenergy","coupling_report","david")) - -# use old regions: c30c1c580039c2b300d86cc46ff4036a -# use H12 regions: 690d3718e151be1b450b394c1064b1c5 -#cfg$input <- c("isimip_rcp-IPSL_CM5A_LR-rcp2p6-noco2_rev29_h200_690d3718e151be1b450b394c1064b1c5.tgz", -# "rev3.15_690d3718e151be1b450b394c1064b1c5_magpie.tgz", -# "rev3.15_690d3718e151be1b450b394c1064b1c5_validation.tgz", -# "additional_data_rev3.28.tgz", -# "npi_ndc_base_SSP2_fixed.tgz") +cfg$output <- c("rds_report","extra/emulator") #unique(c(cfg$output,"remind","bioenergy","coupling_report","david")) +cfg$gms$s60_2ndgen_bioenergy_dem_min <- 0 # Download bioenergy demand scenarios filemap <- gms::download_unpack(input="emulator.tgz", targetdir="input", repositories=list("/p/projects/landuse/data/input/archive"=NULL), debug=FALSE) @@ -47,9 +40,6 @@ demand <- add_columns(demand,dim = 2.1,addnm = c("y2110","y2130","y2150")) # keep demand constant after 2100 demand[,c("y2110","y2130","y2150"),] <- setYears(demand[,"y2100",]) -#reg <- read.csv2(cfg$regionmapping) # read regional resolution, used for ghg tax -reg <- list(RegionCode = "GLO") - scenarios <- read.csv2("config/scenario_config_emulator.csv",strip.white=TRUE,stringsAsFactors=FALSE) scenarios <- subset(scenarios, subset=(start == "1")) @@ -57,62 +47,36 @@ scenarios <- subset(scenarios, subset=(start == "1")) ######################## Functions ############################ ############################################################### -# calculate expoLinear tax with transition in 2060 -write.ghgtax <- function(co2tax_2025=NULL,regions=NULL,out="./modules/56_ghg_policy/input/f56_pollutant_prices_emulator.cs3") { - - fname <- paste0(scenarios[scen,"co2tax_2025"],".mif") - - if (file.exists(fname)) { - - # If there is a REMIND report with the name, read the GHG prices from the file, otherwise calculate expo-linear tax - cat("Loading CO2 prices from",fname,"\n") - tmp <- read.report(fname, as.list = FALSE) +# Read GHG prices from REMIND or coupled mif file +write.ghgtax <- function(mifname, outfile) { - # Select variables from REMIND report - ghg_price_names <- c("Price|Carbon (US$2005/t CO2)", - "Price|N2O (US$2005/t N2O)", - "Price|CH4 (US$2005/t CH4)") - tmp <- collapseNames(tmp[,,ghg_price_names]) - # remove global dimension - tmp <- tmp["GLO",,,invert=TRUE] + fname <- paste0(mifname, ".mif") - # interpolate missing years (REMIND has less_TS only, emulator script need 5 year time steps) - time <- seq(1995,2100,5) - tmp <- time_interpolate(tmp,interpolated_year=time,extrapolation_type="constant") + if(!file.exists(fname)) stop("Could not find ",fname) - ghgtax <- new.magpie(cells_and_regions = getRegions(tmp),years = time,fill = NA,sets = c("regions","years","gas"),names = c("n2o_n_direct","n2o_n_indirect","ch4","co2_c")) + # If there is a REMIND report with the name, read the GHG prices from the file, otherwise calculate expo-linear tax + cat("Loading GHG prices from",fname,"\n") + tmp <- read.report(fname, as.list = FALSE) - # unit defined in modules/56_ghg_policy/input/f56_pollutant_prices.cs3: US$ 2005 per Mg N2O-N CH4 and CO2-C - ghgtax[,,"co2_c"] <- tmp[,,"Price|Carbon (US$2005/t CO2)"] * 44/12 # US$2005/tCO2 -> US$2005/tC - ghgtax[,,"ch4"] <- tmp[,,"Price|CH4 (US$2005/t CH4)"] - ghgtax[,,"n2o_n_direct"] <- tmp[,,"Price|N2O (US$2005/t N2O)"] * 44/28 # US$2005/tN2O -> US$2005/tN - ghgtax[,,"n2o_n_indirect"] <- tmp[,,"Price|N2O (US$2005/t N2O)"] * 44/28 # US$2005/tN2O -> US$2005/tN + # Select variables from REMIND report + ghg_price_names <- c("Price|Carbon (US$2005/t CO2)", + "Price|N2O (US$2005/t N2O)", + "Price|CH4 (US$2005/t CH4)") + tmp <- collapseNames(tmp[,,ghg_price_names]) + # remove global dimension + tmp <- tmp["GLO",,,invert=TRUE] - } else { - - stop("Could not find ", fname) - - # Calculate expo-linear tax - - if(is.null(co2tax_2025)) stop("No initial value for ghg tax supplied.") - if(is.null(regions)) stop("Please supply regions for ghg tax.") + # interpolate missing years (REMIND has less_TS only, emulator script need 5 year time steps) + time <- seq(1995,2100,5) + tmp <- time_interpolate(tmp,interpolated_year=time,extrapolation_type="constant") - # construct combination of exponential tax (5% increase) until 2050 and linear continuation thereafter (using the slope of 2045-2050) - time <- seq(1995,2100,5) - co2tax <- as.numeric(co2tax_2025) * 1.05 ^(time-2025) - names(co2tax)<-time - slope <- (co2tax["2050"] - co2tax["2045"]) / (2050 - 2045) - co2tax[names(co2tax)>"2050"] <- co2tax["2050"] + slope * (time[time>2050] - 2050) + ghgtax <- new.magpie(cells_and_regions = getRegions(tmp),years = time,fill = NA,sets = c("regions","years","gas"),names = c("n2o_n_direct","n2o_n_indirect","ch4","co2_c")) - ghgtax <- new.magpie(cells_and_regions = regions,years = time,fill = NA,sets = c("regions","years","gas"),names = c("n2o_n_direct","n2o_n_indirect","ch4","co2_c")) - - # unit defined in modules/56_ghg_policy/input/f56_pollutant_prices.cs3: US$ 2005 per Mg N2O-N CH4 and CO2-C - ghgtax[,,"co2_c"] <- as.magpie(co2tax) * 44/12 # US$2005/tCO2 -> US$2005/tC - ghgtax[,,"ch4"] <- as.magpie(co2tax) * 28 # US$2005/tCO2 -> US$2005/tCH4 (using Global Warming Potentials from AR5 WG1 CH08 Table 8.7) - ghgtax[,,"n2o_n_direct"] <- as.magpie(co2tax) * 44/28 * 265 # US$2005/tCO2 -> US$2005/tN (using Global Warming Potentials from AR5 WG1 CH08 Table 8.7) - ghgtax[,,"n2o_n_indirect"] <- as.magpie(co2tax) * 44/28 * 265 # US$2005/tCO2 -> US$2005/tN (using Global Warming Potentials from AR5 WG1 CH08 Table 8.7) - - } + # unit defined in modules/56_ghg_policy/input/f56_pollutant_prices.cs3: US$ 2005 per Mg N2O-N CH4 and CO2-C + ghgtax[,,"co2_c"] <- tmp[,,"Price|Carbon (US$2005/t CO2)"] * 44/12 # US$2005/tCO2 -> US$2005/tC + ghgtax[,,"ch4"] <- tmp[,,"Price|CH4 (US$2005/t CH4)"] + ghgtax[,,"n2o_n_direct"] <- tmp[,,"Price|N2O (US$2005/t N2O)"] * 44/28 # US$2005/tN2O -> US$2005/tN + ghgtax[,,"n2o_n_indirect"] <- tmp[,,"Price|N2O (US$2005/t N2O)"] * 44/28 # US$2005/tN2O -> US$2005/tN # set ghg prices before and in 2020 to zero ghgtax[,getYears(ghgtax)<="y2020",] <- 0 @@ -127,8 +91,8 @@ write.ghgtax <- function(co2tax_2025=NULL,regions=NULL,out="./modules/56_ghg_pol #for_plot <- for_plot[,c("y1995","y2110","y2130","y2150"),,invert=TRUE] txtplot(as.numeric(gsub("y","",getYears(for_plot))),for_plot,ylab="US$2005/tCO2") - cat("Writing GHG tax scenario",scenarios[scen,"co2tax_name"],"\n\n") - write.magpie(ghgtax,file_name = out) + cat("Writing GHG tax scenario",scenarios[scen,"ghgtax_name"],"\n\n") + write.magpie(ghgtax, file_name = outfile) #library(ggplot2) #library(luplot) @@ -144,28 +108,28 @@ for (scen in rownames(scenarios)) { cat("\n################ Scenario",scen,"################\n") # Configure MAgPIE # Set scenario - cfg<-setScenario(cfg,scenario = c(trimws(unlist(strsplit(scenarios[scen,"mag_scen"],split=","))))) + cfg<-setScenario(cfg,scenario = c(trimws(unlist(strsplit(scenarios[scen,"mag_scen"],split="\\|"))))) # emulator has to be set AFTER SSP because SSP would set bioenergy demand to predefined scenario and not to input from this script cfg<-setScenario(cfg,scenario="emulator") # Choose GHG tax scenario - if (scenarios[scen,"co2tax_2025"] == "built-in") { + if (scenarios[scen,"mifname"] == "built-in") { # see magpie/config/default.cfg for available scenarios - cfg$gms$c56_pollutant_prices <- scenarios[scen,"co2tax_name"] + cfg$gms$c56_pollutant_prices <- scenarios[scen,"ghgtax_name"] } else { - # If none of the built-in GHG price scenarios was chosen, provide GHG prices + # If no built-in GHG price scenario was chosen, take GHG prices from REMIND report cfg$gms$c56_pollutant_prices <- "emulator" - write.ghgtax(co2tax_2025=scenarios[scen,"co2tax_2025"],regions=unique(reg$RegionCode)) + cfg$gms$c56_mute_ghgprices_until <- scenarios[scen, "no_ghgprices_land_until"] + write.ghgtax(mifname = scenarios[scen, "mifname"], outfile = "./modules/56_ghg_policy/input/f56_pollutant_prices_emulator.cs3") } # Compose string with scenario name - expname <- paste0(gsub(",","-",scenarios[scen,"mag_scen"]),"-",scenarios[scen,"co2tax_name"]) + expname <- paste0(gsub("\\|","-",scenarios[scen,"mag_scen"]),"-",scenarios[scen,"ghgtax_name"]) - # run numbers sorted in descending by runtime (taken from former SSP2-26 emulator runs) - runtime_order <- c("4","17","34","12","11","22","32","15","21","2","58","18","20","16","19", - "31","67","41","48","54","65","47","13","44","70","28","52","53","62","36","40","9","14","46", - "10","29","38","71","57","50","60","37","64","69","68","51","61","5","27","7","66","6","49", - "35","45","59","56","24","72","25","63","42","30","1","55","43","26","3","39","73","23","33","8") + # run numbers sorted descending by runtime (determined with script sort_runtimes.R from dklein) + runtime_order <- c('32','68','5','46','22','58','41','53','65','11','10','12','47','70','17','34','56','18','54','29', + '62','57','2','26','48','51','20','8','15','52','21','44','16','30','71','38','13','50','67','35','45','1','37','6','31', + '49','40','24','60','69','14','4','9','33','66','61','64','25','42','55','28','23','19','7','43','59','63','39','3','72','73','36','27') # the intersect command in the for-loop below keeps the order of the vector given first # Copy bioenergy demand files and start runs @@ -179,7 +143,7 @@ for (scen in rownames(scenarios)) { #for_plot <- for_plot[,c("y1995","y2110","y2130","y2150"),,invert=TRUE] txtplot(as.numeric(gsub("y","",getYears(for_plot))),for_plot,ylab="EJ/yr") write.magpie(setNames(dem,NULL), file_name = "./modules/60_bioenergy/input/glo.2ndgen_bioenergy_demand.csv") - manipulateConfig("scripts/run_submit/submit.sh","--job-name"=cfg$title,line_endings = "NOTwin") + manipulateConfig(paste0("scripts/run_submit/submit_", cfg$qos, ".sh"), "--job-name" = cfg$title, line_endings = "NOTwin") start_run(cfg,codeCheck=FALSE) } } From 0aa50cb85d774be21bffeb713a4acb022fb73f9c Mon Sep 17 00:00:00 2001 From: David Klein Date: Tue, 24 Oct 2023 14:05:02 +0200 Subject: [PATCH 2/4] Replace fit variable, since the variable that was used so far also included other bioenergy sources, not just bioenergy crops. This, in combination with the minimal bioenergy demand > 0 (changed in the previous commit), led to clusters of data points with the same very low demand but multiple prices. - old: "Demand|Bioenergy|++|2nd generation (EJ/yr)" - new: "Demand|Bioenergy|2nd generation|++|Bioenergy crops (EJ/yr)" Further changes: - remove commented sections - keep regionscode from being removed --- scripts/output/extra/emulator.R | 49 ++++++++------------------------- 1 file changed, 11 insertions(+), 38 deletions(-) diff --git a/scripts/output/extra/emulator.R b/scripts/output/extra/emulator.R index 68adaa910c..5af6da2122 100644 --- a/scripts/output/extra/emulator.R +++ b/scripts/output/extra/emulator.R @@ -121,61 +121,36 @@ collect_data_and_make_emulator <- function(outputdir,name_of_fit="linear") { # Prices|Bioenergy (US$05/GJ) # Clean data + "Demand|Bioenergy|2nd generation|++|Bioenergy crops (EJ/yr)" + "Prices|Bioenergy (US$05/GJ)" # 1. Exclude points with zero production (there are cases where production is zero but there is a price) - x[,,"Demand|Bioenergy|++|2nd generation (EJ/yr)"][x[,,"Demand|Bioenergy|++|2nd generation (EJ/yr)"]==0] <- NA - x[,,"Prices|Bioenergy (US$05/GJ)"][is.na(x[,,"Demand|Bioenergy|++|2nd generation (EJ/yr)"])] <- NA + x[,,"Demand|Bioenergy|2nd generation|++|Bioenergy crops (EJ/yr)"][x[,,"Demand|Bioenergy|2nd generation|++|Bioenergy crops (EJ/yr)"]==0] <- NA + x[,,"Prices|Bioenergy (US$05/GJ)"][is.na(x[,,"Demand|Bioenergy|2nd generation|++|Bioenergy crops (EJ/yr)"])] <- NA # 2. Normally, where production (x) is zero resulting prices (y) are NA -> set production to NA where prices are NA - x[,,"Demand|Bioenergy|++|2nd generation (EJ/yr)"][is.na(x[,,"Prices|Bioenergy (US$05/GJ)"])] <- NA + x[,,"Demand|Bioenergy|2nd generation|++|Bioenergy crops (EJ/yr)"][is.na(x[,,"Prices|Bioenergy (US$05/GJ)"])] <- NA x[,,"Modelstatus (-)"] <- x["GLO",,"Modelstatus (-)"] # Convert units to REMIND units TWa_2_EJ <- 365.25*24*3600/1E6 - tmp1 <- x[,,"Demand|Bioenergy|++|2nd generation (EJ/yr)"] / TWa_2_EJ # EJ -> TWa + tmp1 <- x[,,"Demand|Bioenergy|2nd generation|++|Bioenergy crops (EJ/yr)"] / TWa_2_EJ # EJ -> TWa tmp2 <- x[,,"Prices|Bioenergy (US$05/GJ)"] * TWa_2_EJ/1000 # $/GJ -> T$/TWa getNames(tmp1,dim=4) <- gsub("EJ/yr","TWa/yr", getNames(tmp1,dim=4),fixed=TRUE) getNames(tmp2,dim=4) <- gsub("US$05/GJ","T$/TWa",getNames(tmp2,dim=4),fixed=TRUE) x <- mbind(x,tmp1,tmp2) + + # transfer regionscode from mag_res to x since it was erased above everywhere where x was 'mbind'ed + regionscode <- attributes(mag_res)$regionscode + attributes(x)$regionscode <- regionscode ############################################################### ############# C A L C U L A T E E M U L A T O R ############# ############################################################### - # vars <- c("Demand|Bioenergy|++|2nd generation (EJ/yr)", - # "Prices|Bioenergy (US$05/GJ)", - # "Modelstatus (-)") - # - # y <- x[,,vars] - # - # # Make up model data for fitting: If in current year not enough data is availalbe copy it from other years - # # Criteria for "enough" data available: - # # number of - # # 1. non-zero and - # # 2. feasible and - # # 3. unique and - # # 4. non-NA elements - # # > n - # - # # 1. non-zero: set zero elements to NA - # # has been done before -> does not have to be checked here - # - # # 2. feasible: set data to NA in infeasible years and the years after - # y <- mute_infes(data = y, name="Modelstatus (-)", infeasible = 5) - # - # # 3. unique elements: set duplicated samples to NA - # y <- mute_duplicated(y) - # - # # 4. non-NA: find number of non-NA elements - # n_exist <- as.magpie(apply(unwrap(y),c(1,2,3,5,6),function(x)sum(!is.na(x)))) - # nodata <- n_exist[,,"Demand|Bioenergy|++|2nd generation (EJ/yr)"]<1 - # - # # Finally: Copy data from other years where data is not availalbe - # z <- fill_missing_years(y,nodata) - # Calculate emulator fc <- emulator(data=x, - name_x="Demand|Bioenergy|++|2nd generation (TWa/yr)", + name_x="Demand|Bioenergy|2nd generation|++|Bioenergy crops (TWa/yr)", name_y="Prices|Bioenergy (T$/TWa)", name_modelstat="Modelstatus (-)", userfun=function(param,x)return(param[[1]] + param[[2]] * x), @@ -190,8 +165,6 @@ collect_data_and_make_emulator <- function(outputdir,name_of_fit="linear") { print(fc) print(attributes(fc)) - regionscode <- attributes(mag_res)$regionscode - # write fit coefficients to REMIND input file for (scen in getNames(fc,dim="scenario")) { write.magpie(fc,file_name = paste0("f30_bioen_price_",scen,"_",regionscode,".cs4r"), file_folder = file.path(emu_path,scen,name_of_fit)) From ae08b789839b613b0e0486d01050438682221b44 Mon Sep 17 00:00:00 2001 From: David Klein Date: Tue, 24 Oct 2023 14:36:06 +0200 Subject: [PATCH 3/4] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 532523977d..0f1a5b3c84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### changed - **inputdata** There was a major bug (related to proj/terra) in the rev4.91 inputdata that was fixed with rev4.92 - **scripts** LUH2_disaggregation output script was modified. Specifically, flooded area was made compatible with the LUH definition, cropland and grazing land were added to the states.nc file, and specific naming/details (datatype, zname, xname, and yname) were added when creating the .nc files. +- **scripts** For the emulator scripts select a diffrent bioenergy demand variable that excludes bioenergy sources other than second generation bioenergy crops. Set the minimal bioenergy demand to zero. Both avoid artificial clustering of data points and allow for better fits. - **36_employment** regression between hourly labor regression and GDP pc changed from linear to log-log ### added From be76e2de2c8a37af2b9447ef28310d5d3dca2c9e Mon Sep 17 00:00:00 2001 From: David Klein Date: Tue, 24 Oct 2023 14:38:39 +0200 Subject: [PATCH 4/4] Typo in CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f1a5b3c84..5ba750102f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### changed - **inputdata** There was a major bug (related to proj/terra) in the rev4.91 inputdata that was fixed with rev4.92 - **scripts** LUH2_disaggregation output script was modified. Specifically, flooded area was made compatible with the LUH definition, cropland and grazing land were added to the states.nc file, and specific naming/details (datatype, zname, xname, and yname) were added when creating the .nc files. -- **scripts** For the emulator scripts select a diffrent bioenergy demand variable that excludes bioenergy sources other than second generation bioenergy crops. Set the minimal bioenergy demand to zero. Both avoid artificial clustering of data points and allow for better fits. +- **scripts** For the emulator scripts select a different bioenergy demand variable that excludes bioenergy sources other than second generation bioenergy crops. Set the minimal bioenergy demand to zero. Both avoid artificial clustering of data points and allow for better fits. - **36_employment** regression between hourly labor regression and GDP pc changed from linear to log-log ### added