.

Merge branch 'jorge-devel' into devel # Conflicts: # .gitignore
SantanderMetGroup · Feb 11, 2021 · d08eaa1 · d08eaa1
2 parents 6accec0 + a0eaa8c
commit d08eaa1
Show file tree

Hide file tree

Showing 17 changed files with 71 additions and 54 deletions.
diff --git a/.gitignore b/.gitignore
@@ -11,4 +11,4 @@ downscaleR.Rproj
 R/lmdown.R
 vignettes/bias_correction_figs
 .cache
-.DS_Store
+.DS_Store
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -48,5 +48,5 @@ Encoding: UTF-8
 Description: Tools for climate data calibration (bias correction, quantile mapping etc.) and perfect-prog downscaling, as part of the climate4R framework (<http://meteo.unican.es/climate4R>). 
 License: GPL (>= 3)
 LazyData: true
-RoxygenNote: 7.1.0
+RoxygenNote: 7.1.1
 VignetteBuilder: knitr
diff --git a/NEWS b/NEWS
@@ -82,7 +82,10 @@ See the [Releases section](https://github.com/SantanderMetGroup/downscaleR/relea
  * Avoid dependency from lubridate
  * Other minor changes and documentation updates
 
-## v3.3.2 (in devel)
+## v3.3.2 (09 Feb 2021)
 
  * Minor internal changes in biasCorrection for improved memory usage
  * Other minor changes and documentation updates
+ * New optional parameter in functions downscalePredict, downscaleTrain, donwscaleCV and downscale called `simulate` that permits to simulate from the distributional parameters infered from the GLMs
+
+## v3.3.3 (in devel)
diff --git a/R/downsGLM.R b/R/downsGLM.R
@@ -6,7 +6,6 @@
 #' @param x The grid data. Class: matrix.
 #' @param y The observations data. Class: matrix.
 #' @param fitting A string indicating the types of objective functions and how to fit the linear model.
-#' @param simulate A logic value indicating wether we want a stochastic or a deterministic GLM. Stochastic GLMs only accept gamma 
 #' @param model.verbose A logic value. Indicates wether the information concerning the model infered is limited to the 
 #' essential information (model.verbose = FALSE)  or a more detailed information (model.verbose = TRUE, DEFAULT). This is
 #' recommended when you want to save memory. Only operates for GLM.
@@ -56,7 +55,7 @@
 #' @author J. Bano-Medina
 #' @importFrom stats step formula
 #' @importFrom glmnet glmnet cv.glmnet
-glm.train <- function(x, y, fitting = NULL, simulate = FALSE, model.verbose = TRUE,
+glm.train <- function(x, y, fitting = NULL, model.verbose = TRUE,
                       stepwise.arg = NULL,
                       ...) {
   colnames(x) <- attr(x,"predictorNames")
@@ -108,30 +107,30 @@ glm.train <- function(x, y, fitting = NULL, simulate = FALSE, model.verbose = TR
   if (!isTRUE(model.verbose)) {
     weights$fitted.values <- NULL
     weights$effects <- NULL
-    # weights$qr$qr <- NULL
     weights$fitted.values <- NULL
     weights$linear.predictors <- NULL
     weights$prior.weights <- NULL
-    weights$y <- NULL
     weights$model <- NULL
     weights$data <- NULL
+    weights$R <- NULL
   }
   arglist <- list(...) 
   if (is.null(arglist$family)) {family = "gaussian"}
   else {family <- arglist$family}
-  return(list("weights" = weights, "info" = list("fitting" = fitting, "simulate" = simulate, "family" = family)))}
+  return(list("weights" = weights, "info" = list("fitting" = fitting, "family" = family)))}
 
 #' @title Donwscaling with a given generalized linear model (GLM).
 #' @description Donwscaling with a given generalized linear models (GLM) calculated in \code{\link[downscaleR]{downscalePredict}} via \code{\link[downscaleR]{glm.train}}.
 #' @param x The grid data. Class: matrix.
 #' @param weights Object as returned from \code{\link[downscaleR]{glm.train}}
 #' @param info A list containing information of the experiment: the fitting, the family of the generalized linear model and 
 #' if it is deterministic or stochastic.
+#' @param simulate A logic value indicating whether we want a stochastic or a deterministic GLM. Stochastic GLMs only accept gamma 
 #' @return The predicted matrix.
 #' @details Predicts by using the base function \code{\link[stats]{predict}}. This function is internal and should not be used by the user.
 #' The user should use \code{\link[downscaleR]{downscalePredict}}.
 #' @author J. Bano-Medina
-glm.predict <- function(x, weights, info) {
+glm.predict <- function(x, weights, info, simulate) {
   colnames(x) <- attr(x,"predictorNames")
   if (is.null(info$fitting) || info$fitting == "stepwise") {
     df <- data.frame(x)
@@ -140,7 +139,7 @@ glm.predict <- function(x, weights, info) {
   else if (info$fitting == "L1" || info$fitting == "L2" || info$fitting == "L1L2" || info$fitting == "gLASSO") {
     pred <- drop(predict(weights,x,type = "response"))
   }
-  if (isTRUE(info$simulate)) {
+  if (isTRUE(simulate)) {
     if (info$family$family == "binomial") {
       rnd <- runif(length(pred), min = 0, max = 1)
       ind01 <- which(pred > rnd)

diff --git a/R/downscale.R b/R/downscale.R
@@ -22,7 +22,7 @@
 #' @param x The input grid. It should be an object as returned by \pkg{loadeR}.
 #' @param newdata It should be an object as returned by \pkg{loadeR} and consistent with x. Default is newdata = x.
 #' @param method Downscaling method. Options are c = ("analogs","glm","lm"). Glm can only be set when downscaling precipitation. 
-#' @param simulate Logic. Options are \code{"FALSE"}, \code{"TRUE"}.
+#' @param simulate A logic value indicating whether we want to simulate or not based on the GLM distributional parameters. Only relevant when perdicting with a GLM. Default to FALSE. 
 #' @param n.analogs Applies only when \code{method="analogs"} (otherwise ignored). Integer indicating the number of closest neigbours to retain for analog construction. Default to 1.
 #' @param sel.fun Applies only when \code{method="analogs"} (otherwise ignored). Criterion for the construction of analogs when several neigbours are chosen. Ignored when \code{n = 1}.
 #' Current values are \code{"mean"} (the default), \code{"wmean"},  \code{"max"},  \code{"min"} and  \code{"median"}.
@@ -135,16 +135,16 @@ downscale <- function(y,
     gridt <- prepareNewData(newdata,gridT)
     if (method == "analogs") {
       model <- downscaleTrain(gridT,method = "analogs", n.analogs = n.analogs, sel.fun = sel.fun)
-      yp <- downscalePredict(gridt,model)
+      yp <- downscalePredict(gridt,model, simulate = FALSE)
     }
     else if (method == "glm") {
       # Amounts
       model.reg <- downscaleTrain(gridT, method = "GLM", family = Gamma(link = "log"), condition = "GT", threshold = 0, simulate = simulate)
-      yp.reg <- downscalePredict(gridt,model.reg)
+      yp.reg <- downscalePredict(gridt,model.reg,simulate = simulate)
       # Ocurrence
       gridT <- prepareData(x,y.ocu,global.vars = getVarNames(x),spatial.predictors)
       model.ocu <- downscaleTrain(gridT,method = "GLM", family = binomial(link = "logit"), simulate = simulate)
-      yp.ocu <- downscalePredict(gridt,model.ocu)
+      yp.ocu <- downscalePredict(gridt,model.ocu,simulate = simulate)
       # Complete serie
       if (!isTRUE(simulate)) {
         yp.ocu <- binaryGrid(yp.ocu, ref.obs = y.ocu, ref.pred = yp.ocu)
@@ -157,7 +157,7 @@ downscale <- function(y,
     }
     else if (method == "lm") {
       model <- downscaleTrain(gridT,method = "GLM", family = "gaussian")
-      yp <- downscalePredict(gridt,model)
+      yp <- downscalePredict(gridt,model,simulate = simulate)
     }
   }  
   else {# Leave-one-out and cross-validation 

diff --git a/R/downscaleCV.R b/R/downscaleCV.R
@@ -21,6 +21,7 @@
 #' @param x The input grid (admits both single and multigrid, see \code{\link[transformeR]{makeMultiGrid}}). It should be an object as returned by \pkg{loadeR}.
 #' @param y The observations dataset. It should be an object as returned by \pkg{loadeR}.
 #' @param method A string value. Type of transer function. Currently implemented options are \code{"analogs"}, \code{"GLM"} and \code{"NN"}.
+#' @param simulate A logic value indicating whether we want to simulate or not based on the GLM distributional parameters. Only relevant when perdicting with a GLM. Default to FALSE. 
 #' @param sampling.strategy Specifies a sampling strategy to define the training and test subsets. Possible values are 
 #' \code{"kfold.chronological"} (the default), \code{"kfold.random"}, \code{"leave-one-year-out"} and NULL.
 #' The \code{sampling.strategy} choices are next described:
@@ -81,6 +82,7 @@
 #' x <- makeMultiGrid(NCEP_Iberia_hus850, NCEP_Iberia_ta850)
 #' x <- subsetGrid(x, years = 1985:1995)
 #' # Loading predictands
+#' data("VALUE_Iberia_pr")
 #' y <- VALUE_Iberia_pr
 #' y <- getTemporalIntersection(obs = y, prd = x, "obs")
 #' x <- getTemporalIntersection(obs = y, prd = x, "prd")
@@ -110,7 +112,7 @@
 
 downscaleCV <- function(x, y, method,
                         sampling.strategy = "kfold.chronological", folds = 4, 
-                        scaleGrid.args = NULL,
+                        scaleGrid.args = NULL, simulate = FALSE,
                         prepareData.args = list("global.vars" = NULL, "combined.only" = TRUE, "spatial.predictors" = NULL, "local.predictors" = NULL, "extended.predictors" = NULL),
                         condition = NULL, threshold = NULL, ...) {
 
@@ -163,22 +165,23 @@ downscaleCV <- function(x, y, method,
     xt <- prepareNewData(newdata = xt, data.structure = xT)
     model <- downscaleTrain(xT, method, condition, threshold, ...)
     if (all(as.vector(y$Data) %in% c(0,1,NA,NaN), na.rm = TRUE)) {
-      y.prob <- downscalePredict(xt, model)
+      y.prob <- downscalePredict(xt, model, simulate)
       if (method == "GLM") {
-        if (isTRUE(model$model$atomic_model[[1]]$info$simulate)) {
+        if (isTRUE(simulate)) {
           y.bin  <- y.prob
         }
         else {
           y.bin  <- binaryGrid(y.prob, ref.obs = yT, ref.pred = model$pred)
         }
       }
       else{
-        y.bin  <- binaryGrid(y.prob, ref.obs = yT, ref.pred = model$pred)}
+        y.bin  <- binaryGrid(y.prob, ref.obs = yT, ref.pred = model$pred)
+      }
       out <- makeMultiGrid(list(y.prob,y.bin)) %>% redim(drop = TRUE)
       out$Variable$varName <- c("prob","bin")
     }
     else {
-      out <- downscalePredict(xt, model)
+      out <- downscalePredict(xt, model, simulate)
     }
     return(out)
   }) %>% bindGrid(dimension = "time")

diff --git a/R/downscaleChunk.R b/R/downscaleChunk.R
@@ -23,6 +23,7 @@
 #' @param newdata New datasets where to apply the model infered. It should be a list of objects as returned by \pkg{loadeR},
 #' containing the new dataset/s.
 #' @param method A string value. Type of transer function. Currently implemented options are \code{"analogs"}, \code{"GLM"} and \code{"NN"}.
+#' @param simulate A logic value indicating whether we want to simulate or not based on the GLM distributional parameters. Only relevant when perdicting with a GLM. Default to FALSE. 
 #' @param prepareData.args A list with the arguments of the \code{\link[downscaleR]{prepareData}} function. Please refer to \code{\link[downscaleR]{prepareData}} help for
 #' more details about this parameter.
 #' @param condition Inequality operator to be applied considering the given threshold.
@@ -44,7 +45,7 @@
 #' @author J. Bano-Medina
 #' @export
 
-downscaleChunk <- function(x, y, newdata,
+downscaleChunk <- function(x, y, newdata, simulate = FALSE,
                            method, ...,
                            prepareData.args = list("global.vars" = NULL, "combined.only" = TRUE, "spatial.predictors" = NULL, "local.predictors" = NULL, "extended.predictors" = NULL),
                            condition = NULL, threshold = NULL, predict = TRUE,
@@ -65,12 +66,14 @@ downscaleChunk <- function(x, y, newdata,
     print(paste("Training chunk:",z,"out of",chunks))
     y_chunk <- subsetDimension(y,dimension = "lat", indices = z)
     xyT <- prepareData(x = x, y = y_chunk, global.vars = prepareData.args$global.vars, combined.only = prepareData.args$combined.only, spatial.predictors = prepareData.args$spatial.predictors, local.predictors = prepareData.args$local.predictors, extended.predictors = prepareData.args$extended.predictors)
-    model <- downscaleTrain(xyT, method, condition, threshold, predict, ...)
+    model <- downscaleTrain(xyT, method = method, condition = condition, threshold = threshold, predict = predict, model.verbose = FALSE, ...)
 
-    p <- lapply(newdata, function(zz) {
-      xyt <- prepareNewData(zz,xyT)
-      downscalePredict(newdata = xyt,model)
-    })
+    p <- lapply(simulate, function(sim) {
+      lapply(newdata, function(zz) {
+        xyt <- prepareNewData(zz,xyT)
+        downscalePredict(newdata = xyt,model, simulate = sim)
+      })
+    }) %>% unlist(recursive = FALSE)
 
     if (z < 10) {zn <- paste0("00",z)}
     else if (z < 100 & z >= 10) {zn <- paste0("0",z)}
@@ -88,14 +91,14 @@ downscaleChunk <- function(x, y, newdata,
     })
     p <- NULL
   })
-  ini <- ifelse(isTRUE(predict),length(newdata)+1,length(newdata))
+  ini <- ifelse(isTRUE(predict),(length(newdata)*length(simulate))+1,length(newdata)*length(simulate))
   pred <- list()
   for (i in 1:ini) {
     lf <- list.files("./", pattern = paste0("dataset",i), full.names = TRUE)
     chunk.list <- lapply(lf, function(x) get(load(x)))
-    pred[[i]] <- bindGrid(chunk.list, dimension = "lat")
+    pred[[i]] <- bindGrid(chunk.list, dimension = "lat") %>% redim(drop = TRUE)
     file.remove(lf)
   }
-  if (ini == 1) pred <- pred[[1]]
+  if (ini == 1) pred <- pred[[1]] %>% redim(drop = TRUE)
   return(pred)
 }
diff --git a/R/downscalePredict.R b/R/downscalePredict.R
@@ -22,6 +22,7 @@
 #' @description Downscale data to local scales by statistical models previously obtained by \code{\link[downscaleR]{downscaleTrain}}.
 #' @param newdata The grid data. It should be an object as returned by  \code{\link[downscaleR]{prepareNewData}}.
 #' @param model An object containing the statistical model as returned from  \code{\link[downscaleR]{downscaleTrain}}.
+#' @param simulate A logic value indicating whether we want to simulate or not based on the GLM distributional parameters. Only relevant when perdicting with a GLM. Default to FALSE. 
 #' @return A regular/irregular grid object.
 #' @seealso 
 #' downscaleTrain for training a downscaling model
@@ -62,7 +63,7 @@
 #' plot(yt$Data[,5],pred$Data[,5])
 #' }
 
-downscalePredict <- function(newdata, model) {
+downscalePredict <- function(newdata, model, simulate = FALSE) {
   n <- length(newdata$x.global) # number of members
   p <- lapply(1:n, function(z) {
     # Multi-site
@@ -71,7 +72,7 @@ downscalePredict <- function(newdata, model) {
       attr(xx,"predictorNames") <- attr(newdata$x.global,"predictorNames")
       xx %<>% sticky()
       if (model$model$method == "analogs") {model$model$atomic_model$dates$test <- getRefDates(newdata)}
-      yp <- as.matrix(downs.predict(xx, model$model$method, model$model$atomic_model))}
+      yp <- as.matrix(downs.predict(xx, model$model$method, model$model$atomic_model, simulate))}
     # Single-site
     else if (model$model$site == "single") {
       stations <- length(model$model$atomic_model)
@@ -98,7 +99,7 @@ downscalePredict <- function(newdata, model) {
           if (is.null(model$model$atomic_model[[i]])) {
             yp[,i] = rep(NA, 1, n.obs)
           } else {
-            yp[,i] <- downs.predict(xx, model$model$method, model$model$atomic_model[[i]])
+            yp[,i] <- downs.predict(xx, model$model$method, model$model$atomic_model[[i]], simulate)
           }
         }
       }
@@ -125,7 +126,7 @@ downscalePredict <- function(newdata, model) {
           if (is.null(model$model$atomic_model[[i]])) {
             yp[,i] = rep(NA, 1, n.obs)
           } else {
-            yp[,i] <- downs.predict(xx, model$model$method, model$model$atomic_model[[i]])
+            yp[,i] <- downs.predict(xx, model$model$method, model$model$atomic_model[[i]], simulate)
           }
         }
       }
@@ -168,13 +169,14 @@ downscalePredict <- function(newdata, model) {
 #' @param x The grid data. Class: matrix.
 #' @param method The method of the given model.
 #' @param atomic_model An object containing the statistical model of the selected method.
+#' @param simulate A logic value indicating whether we want to simulate or not based on the GLM distributional parameters. 
 #' @return A matrix with the predictions.
 #' @details This function is internal and should not be used by the user. The user should use \code{\link[downscaleR]{downscalePredict}}.
 #' @importFrom deepnet nn.predict
 #' @author J. Bano-Medina
-downs.predict <- function(x, method, atomic_model){
+downs.predict <- function(x, method, atomic_model, simulate){
   switch(method,
          analogs = pred <- analogs.test(x, atomic_model$dataset_x, atomic_model$dataset_y, atomic_model$dates, atomic_model$info),
-         GLM     = pred <- glm.predict(x, atomic_model$weights, atomic_model$info),
+         GLM     = pred <- glm.predict(x, atomic_model$weights, atomic_model$info, simulate),
          NN      = pred <- nn.predict(atomic_model, x)) 
   return(pred)}
diff --git a/R/downscaleTrain.R b/R/downscaleTrain.R
@@ -27,6 +27,7 @@
 #' essential information (model.verbose = FALSE)  or a more detailed information (model.verbose = TRUE, DEFAULT). This is
 #' recommended when you want to save memory. Only operates for GLM.
 #' @param predict A logic value. Should the prediction on the training set should be returned? Default is TRUE.
+#' @param simulate A logic value indicating whether we want to simulate or not based on the GLM distributional parameters when prediting on the train set. Only relevant when perdicting with a GLM. Default to FALSE. 
 #' @param ... Optional parameters. These parameters are different depending on the method selected. Every parameter has a default value set in the atomic functions in case that no selection is wanted. 
 #' Everything concerning these parameters is explained in the section \code{Details}. 
 #' However, if wanted, the atomic functions can be seen here: \code{\link[downscaleR]{glm.train}} and \code{\link[deepnet]{nn.train}}.  
@@ -132,7 +133,7 @@
 #' # Plotting the results for station 5
 #' plot(y$Data[,5],model.analogs$pred$Data[,5], xlab = "obs", ylab = "pred")}
 
-downscaleTrain <- function(obj, method, condition = NULL, threshold = NULL, model.verbose = TRUE, predict = TRUE, ...) {
+downscaleTrain <- function(obj, method, condition = NULL, threshold = NULL, model.verbose = TRUE, predict = TRUE, simulate = FALSE, ...) {
   method <- match.arg(method, choices = c("analogs", "GLM", "NN"))
   if ( method == "GLM") {
     if (attr(obj, "nature") == "spatial+local") {
@@ -210,7 +211,7 @@ downscaleTrain <- function(obj, method, condition = NULL, threshold = NULL, mode
       }
       atomic_model <- downs.train(xx, yy, method, model.verbose, ...)
     }
-    if (isTRUE(predict)) mat.p <- as.matrix(downs.predict(obj$x.global, method, atomic_model))
+    if (isTRUE(predict)) mat.p <- as.matrix(downs.predict(obj$x.global, method, atomic_model, simulate))
   }
   # Single-site
   else if (site == "single") {
@@ -247,7 +248,7 @@ downscaleTrain <- function(obj, method, condition = NULL, threshold = NULL, mode
           if (is.null(atomic_model[[i]])) {
             mat.p[,i] <- rep(NA, 1, nrow(mat.p))
           } else {
-            mat.p[,i] <- downs.predict(xx, method, atomic_model[[i]])
+            mat.p[,i] <- downs.predict(xx, method, atomic_model[[i]], simulate)
           }
         }
       }
@@ -283,7 +284,7 @@ downscaleTrain <- function(obj, method, condition = NULL, threshold = NULL, mode
           })
         }
         if (method == "analogs") {atomic_model[[i]]$dates$test <- getRefDates(obj$y)}
-        if (isTRUE(predict)) mat.p[,i] <- downs.predict(xx, method, atomic_model[[i]])}
+        if (isTRUE(predict)) mat.p[,i] <- downs.predict(xx, method, atomic_model[[i]], simulate)}
     }
   }
   if (isTRUE(predict)) {

diff --git a/man/downs.predict.Rd b/man/downs.predict.Rd
diff --git a/man/downscale.Rd b/man/downscale.Rd