Skip to content

Commit

Permalink
improve fixOnRef to let it support multiple models and scenarios
Browse files Browse the repository at this point in the history
  • Loading branch information
orichters committed Oct 9, 2023
1 parent 851ea51 commit b69c09c
Show file tree
Hide file tree
Showing 9 changed files with 129 additions and 84 deletions.
2 changes: 1 addition & 1 deletion .buildlibrary
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ValidationKey: '608716'
ValidationKey: '785560'
AutocreateReadme: yes
allowLinterWarnings: no
AddInReadme: tutorial.md
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ repos:
- id: mixed-line-ending

- repo: https://github.com/lorenzwalthert/precommit
rev: v0.3.2.9019
rev: v0.3.2.9021
hooks:
- id: parsable-R
- id: deps-in-desc
Expand Down
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ cff-version: 1.2.0
message: If you use this software, please cite it using the metadata from this file.
type: software
title: 'piamInterfaces: Project specific interfaces to REMIND / MAgPIE'
version: 0.3.1
date-released: '2023-10-06'
version: 0.4.0
date-released: '2023-10-09'
abstract: Project specific interfaces to REMIND / MAgPIE.
authors:
- family-names: Benke
Expand Down
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Type: Package
Package: piamInterfaces
Title: Project specific interfaces to REMIND / MAgPIE
Version: 0.3.1
Date: 2023-10-06
Version: 0.4.0
Date: 2023-10-09
Authors@R: c(
person("Falk", "Benke", , "benke@pik-potsdam.de", role = c("aut", "cre")),
person("Oliver", "Richters", role = "aut")
Expand Down
4 changes: 3 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,12 @@ importFrom(data.table,fwrite)
importFrom(data.table,setnames)
importFrom(dplyr,"%>%")
importFrom(dplyr,arrange)
importFrom(dplyr,case_when)
importFrom(dplyr,count)
importFrom(dplyr,desc)
importFrom(dplyr,distinct)
importFrom(dplyr,filter)
importFrom(dplyr,first)
importFrom(dplyr,group_by)
importFrom(dplyr,inner_join)
importFrom(dplyr,left_join)
Expand All @@ -50,7 +52,6 @@ importFrom(dplyr,summarise)
importFrom(dplyr,summarize)
importFrom(dplyr,ungroup)
importFrom(gms,chooseFromList)
importFrom(gms,getLine)
importFrom(grDevices,dev.off)
importFrom(grDevices,pdf)
importFrom(magclass,as.magpie)
Expand All @@ -65,6 +66,7 @@ importFrom(quitte,as.quitte)
importFrom(quitte,getModels)
importFrom(quitte,getRegs)
importFrom(quitte,getScenarios)
importFrom(quitte,quitteSort)
importFrom(quitte,read.quitte)
importFrom(quitte,write.IAMCxlsx)
importFrom(quitte,write.mif)
Expand Down
163 changes: 98 additions & 65 deletions R/fixOnRef.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,94 +2,127 @@
#'
#' @md
#' @author Oliver Richters
#' @param mif data or path to mif file of single scenario
#' @param mifRef data or path to mif file of reference scenario
#' @param startyear first time step for each mif and mifRef are expected to differ
#' @param ret "boolean", "fixed" or "fails", depending on what you want to get
#' @param failfile csv file to which failing check are written to
#' @importFrom dplyr group_by summarise ungroup left_join mutate arrange %>%
#' @param data quitte object or mif file
#' @param refscen scenario name of reference scenario, or file or quitte object with reference data
#' @param startyear first time step for which scenarios and reference scenario are expected to differ
#' @param ret "boolean": just return TRUE/FALSE if check was successful
#' "fails": data frame with mismatches between scenario and reference data
#' "fixed": quitte object with data correctly fixed on reference data
#' @param failfile csv file to which mismatches are written to
#' @importFrom dplyr case_when first group_by summarise ungroup left_join mutate arrange %>%
#' filter select desc n
#' @importFrom gms getLine
#' @importFrom quitte as.quitte
#' @importFrom quitte as.quitte quitteSort
#' @importFrom tidyr pivot_wider
#' @importFrom utils write.csv
#'
#' @return see parameter 'ret'
#' @export


fixOnRef <- function(mif, mifRef, startyear, ret = "boolean", failfile = NULL) {
scenario <- variable <- period <- value <- ref <- reldiff <- group <- NULL
mif <- droplevels(as.quitte(mif, na.rm = TRUE))
mifRef <- droplevels(as.quitte(mifRef, na.rm = TRUE))
fixOnRef <- function(data, refscen, startyear, ret = "boolean", failfile = NULL) {
scenario <- variable <- period <- value <- ref <- reldiff <- NULL
data <- droplevels(as.quitte(data, na.rm = TRUE))
startyear <- suppressWarnings(as.numeric(startyear))
# check whether refscen is just the scenario name, or rather data
if (is.character(refscen) && ! file.exists(refscen) && all(refscen %in% levels(data$scenario))) {
refdata <- droplevels(filter(data, scenario == refscen))
} else {
refdata <- droplevels(as.quitte(refscen, na.rm = TRUE))
}
refscen <- levels(refdata$scenario)
stopifnot(
`'mif' must contain data from one scenario only` = length(levels(mif$scenario)) == 1,
`'mifRef' must contain data from one scenario only` = length(levels(mifRef$scenario)) == 1,
`'refscen' must be a single scenario only` = length(refscen) == 1,
`'startyear' must be a single numeric value` = (length(startyear) == 1 && ! is.na(startyear)),
`'ret' must be 'boolean', 'fails' or 'fixed'` = (length(ret) == 1 && ret %in% c("boolean", "fails", "fixed"))
)

title <- levels(mif$scenario)
titleRef <- levels(mifRef$scenario)

if (identical(levels(mif$scenario), levels(mifRef$scenario))) {
levels(mifRef$scenario) <- paste0(levels(mifRef$scenario), "_ref")
returnhelper <- function(ret, boolean, fails, fixed) {
messages <- c(boolean = "Returning a boolean.",
fails = "Returning failing data.",
fixed = "Returning fixed data.")
message(messages[[ret]])
if (ret == "boolean") return(boolean)
if (ret == "fails") return(fails)
if (ret == "fixed") return(fixed)
}

falsepositives <- grep("Moving Avg$", levels(mif$variable), value = TRUE)

message("Comparing ", title, " with reference run ", titleRef, " for t < ", startyear)

if (startyear <= min(mif$period)) {
message("No data before startyear found, so no fixing happened")
return(if (ret == "fails") NULL else if (ret == "fixed") mif else TRUE)
message("Comparing with reference run ", refscen, " for t < ", startyear)
if (startyear <= min(data$period)) {
message("No data before startyear found, so no fixing happened.")
return(returnhelper(ret, boolean = TRUE, fails = NULL, fixed = data))
}
comp <- rbind(mutate(mif, scenario = "value"), mutate(mifRef, scenario = "ref")) %>%

# define false-positives that necessarily differ
falsepositives <- c(grep("Moving Avg$", levels(data$variable), value = TRUE),
grep("Interest Rate (t+1)/(t-1)", levels(data$variable), value = TRUE, fixed = TRUE)
)
# prepare reference data to left_join it to data
refcomp <- refdata %>%
select(-scenario) %>%
rename(ref = value) %>%
droplevels()
# left_join data with reference run and select everything with bigger differences
comp <- data %>%
left_join(refcomp, by = c("model", "region", "variable", "unit", "period")) %>%
filter(! variable %in% falsepositives, period < startyear) %>%
arrange(variable) %>%
pivot_wider(names_from = scenario) %>%
mutate(reldiff = abs(value - ref) / pmax(1E-14, abs(value), abs(ref), na.rm = TRUE)) %>%
filter(abs(reldiff) > 1E-14) %>%
mutate(scenario = factor(title)) %>%
droplevels()
if (nrow(comp) == 0) {
message("# Run is perfectly fixed on reference run!")
return(if (ret == "fails") NULL else if (ret == "fixed") mif else TRUE)
}
mismatches <- comp %>%
mutate(model = NULL, scenario = NULL, variable = factor(removePlus(variable))) %>%
arrange(variable) %>%
summarise(period = paste(sort(unique(period)), collapse = ","),
reldiff = max(reldiff),
.by = variable) %>%
mutate(group = factor(gsub("(\\|.*?)\\|.*$", "\\1", variable))) %>%
# mutate(group = factor(gsub("\\|.*", "", variable))) %>% # to group more coarsely
summarise(variable = if (length(unique(variable)) == 1) unique(variable) else unique(group),
variables = n(),
period = paste(sort(unique(strsplit(period, ",")[[1]])), collapse = ", "),
reldiff = max(reldiff),
.by = group) %>%
mutate(reldiff = niceround(reldiff), group = variable, variable = NULL) %>%
droplevels()

showrows <- 250
rlang::with_options(width = 160, print(mismatches, n = showrows))
if (showrows < nrow(mismatches)) {
message("Further ", (nrow(mismatches) - showrows), " variable groups differ.")
message("\n### All runs are perfectly fixed on reference run!")
return(returnhelper(ret, boolean = TRUE, fails = NULL, fixed = data))
}
# print human-readbable summary
.printRefDiff(data, comp)
# save mismatches to file, if requested
if (! is.null(failfile) && nrow(comp) > 0) {
message("Find failing variables in '", failfile, "'.")
write.csv(comp, failfile, quote = FALSE, row.names = FALSE)
}
if (ret %in% c("fails", "boolean")) {
return(if (ret == "boolean") FALSE else comp)
# fix correctly on ref
fixeddata <- data %>%
left_join(refcomp, by = c("model", "region", "variable", "unit", "period")) %>%
mutate(value = case_when(
period >= startyear ~ value,
variable %in% falsepositives ~ value,
.default = ref
)) %>%
select(-ref) %>%
quitteSort()
return(returnhelper(ret, boolean = FALSE, fails = comp, fixed = fixeddata))
}

.printRefDiff <- function(data, comp) {
model <- scenario <- variable <- period <- reldiff <- group <- NULL
for (m in levels(data$model)) {
for (s in levels(data$scenario)) {
mismatches <- comp %>%
filter(model == m, scenario == s) %>%
select(-model, -scenario)
if (nrow(mismatches) == 0) {
message("\n### Everything fine for model=", m, " and scenario=", s)
} else {
groupdepth <- 3
groupgrep <- paste(c(rep("(\\|.*?)", groupdepth - 1), "\\|.*$"), collapse = "")
mismatches <- mismatches %>%
mutate(variable = factor(removePlus(variable))) %>%
arrange(variable) %>%
summarise(period = paste(sort(unique(period)), collapse = ","),
reldiff = max(reldiff),
.by = variable) %>%
mutate(group = factor(gsub(groupgrep, "\\1", variable))) %>%
summarise(variable = if (length(unique(variable)) == 1) unique(variable) else unique(group),
variables = n(),
period = paste(sort(unique(strsplit(period, ",")[[1]])), collapse = ", "),
reldiff = max(reldiff),
.by = group) %>%
mutate(reldiff = niceround(reldiff), group = variable, variable = NULL) %>%
droplevels()
message("\n### Printing results for model=", m, " and scenario=", s)
showrows <- 250
rlang::with_options(width = 160, print(mismatches, n = showrows))
if (showrows < nrow(mismatches)) {
message("Further ", (nrow(mismatches) - showrows), " variable groups differ.")
}
}
}
}
message("Returning corrected data for ", title, ".")
di <- rbind(
filter(mif, period >= startyear | ! variable %in% levels(mifRef$variable) | variable %in% falsepositives),
filter(mifRef, period < startyear, variable %in% setdiff(levels(mif$variable), falsepositives))
) %>%
mutate(scenario = factor(title)) %>%
quitteSort()
return(di)
}
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Project specific interfaces to REMIND / MAgPIE

R package **piamInterfaces**, version **0.3.1**
R package **piamInterfaces**, version **0.4.0**

[![CRAN status](https://www.r-pkg.org/badges/version/piamInterfaces)](https://cran.r-project.org/package=piamInterfaces) [![R build status](https://github.com/pik-piam/piamInterfaces/workflows/check/badge.svg)](https://github.com/pik-piam/piamInterfaces/actions) [![codecov](https://codecov.io/gh/pik-piam/piamInterfaces/branch/master/graph/badge.svg)](https://app.codecov.io/gh/pik-piam/piamInterfaces) [![r-universe](https://pik-piam.r-universe.dev/badges/piamInterfaces)](https://pik-piam.r-universe.dev/builds)

Expand Down Expand Up @@ -64,7 +64,7 @@ In case of questions / problems please contact Falk Benke <benke@pik-potsdam.de>

To cite package **piamInterfaces** in publications use:

Benke F, Richters O (2023). _piamInterfaces: Project specific interfaces to REMIND / MAgPIE_. R package version 0.3.1, <https://github.com/pik-piam/piamInterfaces>.
Benke F, Richters O (2023). _piamInterfaces: Project specific interfaces to REMIND / MAgPIE_. R package version 0.4.0, <URL: https://github.com/pik-piam/piamInterfaces>.

A BibTeX entry for LaTeX users is

Expand All @@ -73,7 +73,7 @@ A BibTeX entry for LaTeX users is
title = {piamInterfaces: Project specific interfaces to REMIND / MAgPIE},
author = {Falk Benke and Oliver Richters},
year = {2023},
note = {R package version 0.3.1},
note = {R package version 0.4.0},
url = {https://github.com/pik-piam/piamInterfaces},
}
```
17 changes: 11 additions & 6 deletions man/fixOnRef.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 8 additions & 3 deletions tests/testthat/test-fixOnRef.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
test_that("fixOnRef works", {
qe <- quitte::quitte_example_dataAR6
d <- droplevels(dplyr::filter(qe,
scenario == levels(scenario)[[1]],
model == levels(model)[[1]]))
scenario == first(scenario),
model == first(model)))

expect_true(fixOnRef(d, d, startyear = 2020, ret = "boolean"))
# somehow, only MESSAGEix results are correct in the dataset above
expect_true(fixOnRef(filter(qe, model == "MESSAGEix"), "Current Policies", startyear = 2020, ret = "boolean"))
qefixed <- fixOnRef(qe, "Current Policies", startyear = 2020, ret = "fixed")
expect_true(fixOnRef(qefixed, "Current Policies", startyear = 2020, ret = "boolean"))
expect_identical(fixOnRef(d, d, startyear = 2020, ret = "fixed"), d)
expect_true(is.null(fixOnRef(d, d, startyear = 2020, ret = "fails")))
# generate object with wrong data in 2020
Expand All @@ -21,7 +25,8 @@ test_that("fixOnRef works", {
expect_identical(dwrong, fixOnRef(dwrong, d, startyear = 2010, ret = "fixed"))
expect_true(fixOnRef(dwrong, d, startyear = min(dwrong$period), ret = "boolean"))
# make sure only valid settings are accepted
expect_no_error(fixOnRef(qe, filter(qe, scenario == first(scenario)), startyear = 2005, ret = "boolean"))
expect_error(fixOnRef(dwrong, d, startyear = 2020, ret = "whatever"), "ret")
expect_error(fixOnRef(dwrong, d, startyear = "whenever"), "startyear")
expect_error(fixOnRef(qe, qe, startyear = 2020), "mif")
expect_error(fixOnRef(qe, qe, startyear = 2020), "refscen")
})

0 comments on commit b69c09c

Please sign in to comment.