Skip to content

Commit

Permalink
Merge pull request #49 from abigailsnyder/acs-fldgen-subsetting-outpu…
Browse files Browse the repository at this point in the history
…t-fcn

acs-intermediate fix to saved fldgen memory bloat
  • Loading branch information
abigailsnyder authored Jul 15, 2020
2 parents b85eff4 + d28b269 commit ca50388
Show file tree
Hide file tree
Showing 10 changed files with 269 additions and 123 deletions.
3 changes: 1 addition & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export(concatGrids)
export(concatGrids.general)
export(coord_array)
export(drop_NAs)
export(emulator_reducer)
export(eof_analyze)
export(extract_box)
export(file.pairer)
Expand All @@ -16,7 +17,6 @@ export(fldgen_object_TP)
export(fldts2df)
export(generate.TP.fullgrids)
export(generate.TP.resids)
export(loadmodel)
export(mkcorrts)
export(normalize.resids)
export(phase_eqn_coef)
Expand All @@ -32,7 +32,6 @@ export(read.temperatures)
export(read_globalAvg)
export(readtgav)
export(reconst_fields)
export(savemodel)
export(splitGrids)
export(splitGrids.general)
export(split_eof)
Expand Down
2 changes: 1 addition & 1 deletion R/generateTPresids.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#' @export
generate.TP.resids <- function(emulator, ngen, method = 1){

Ngrid <- ncol(emulator$meanfldT$r)
Ngrid <- nrow(emulator$meanfldT$w)

newgrids <- lapply(1:ngen,
function(x) {
Expand Down
23 changes: 0 additions & 23 deletions R/readdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -619,29 +619,6 @@ read.precipitations <- function(filename, len=NULL, tag=basename(filename), varn
}


#' @rdname saving_and_restoring
#' @export
loadmodel <- function(file, oldfmt=FALSE)
{
if(oldfmt) {
load(file)
if(!exists('modeldata', inherits=FALSE)) {
modeldata <- NULL # silence check notes.
stop('No model data in file.')
}
}
else {
modeldata <- readRDS(file)
}

if(!inherits(modeldata, 'fldgen')) {
stop('Object loaded from file is not of type "fldgen".')
}

modeldata
}


#' Read and format global mean temperature
#'
#' Read global mean temperature from an input netCDF file and format for use
Expand Down
123 changes: 92 additions & 31 deletions R/writedata.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,97 @@
#### Output functions

#' Subset a trained emulator.
#'
#' A trained fldgen emulator features a large amount of data for both
#' using the emulator and rigorously validating an emulator.
#'
#' If one is just interested in the use of an emulator for generating
#' felds, this function can be called to reduce a trained emulator to
#' the bare essential list entries, which can then be saved and called
#' the same as an unreduced emulator by generate.TP.resids and
#' generate.TP.fullgrids
#'
#' Note that with this reduced emulator, there is NO way to reconstruct
#' the training data. A fully trained emulator contains a copy of the
#' training data, in addition to the training regressor values (tgav),
#' and the estimated linear model parameters and residuals
#' (meanfieldT$b, w, r), which together can also reconstruct the data.
#'
#' Even though the coordinate information stored in an emulator$griddataT
#' is not needed directly to generate a new field of residuals or full data,
#' it is often needed in downstream use of the fields. Therfore an entry
#' reducedEmulator$griddataT$coord containg a matrix is saved in the
#' reducedEmulator. Each is a matrix of coordinates for each grid cell, with
#' cells in rows and latitude, longitude in the two columns. Keeping these
#' coordinate matrices for T and P is negligible.
#'
#' Finally, the reduced emulator produced by this function is specifically
#' meant for temperature and precipitation only, and is not robust to
#' extension to other variables.
#'
#' Finally finally, if a user is interested in a different subset of
#' list entries in a trained emulator, they are encouraged to subset and
#' save themself, as appropriate for their project.
#'
#' @param emulator A trained fldgen emulator, with all entries needed
#' for generating new residuals and for rigourously validating the
#' quality of the trained emulator
#'
#' @return reducedEmulator A trained fldgen emulator with only the list
#' entries needed by generate.TP.resids and generate.TP.fullgrids for
#' generating new fields:
#' \describe{
#' \item{griddataT}{Only the coordinate ids and set information.}
#' \item{griddataP}{Only the coordinate ids and set information, and the
#' function to convert from logP to P.}\
#' \item{tgav}{The Tgav data from training.}
#' \item{meanfldT}{the slope (w) and intercept (b) terms from the mean field
#' fit.}
#' \item{meanfldP}{the slope (w) and intercept (b) terms from the mean field
#' fit.}
#'\item{tfuns}{The empirical quantile functions for temperature, mapping
#'N(0,1) to the native distribution in each grid cell.}
#'\item{pfuns}{The empirical quantile functions for logP, mapping
#'N(0,1) to the native distribution in each grid cell.}
#'\item{reof}{The EOFs.}
#'\item{fx}{Time coefficients for each EOF from training data.}
#'\item{infiles}{The names of the files used for training the emulator.}
#' }
#'
#' @author ACS July 2020
#' @export
emulator_reducer <- function(emulator){

if(length(names(emulator)) < 10){ # a full emulator has 10 list entries, check
# to make sure that's showing up.
stop('Your emulator is already reduced (missing at least one list entry)')
}

# This function reduces the size of the object while preserving the structure
# expected by generate.TP.resids and generate.TP.fullgrids.
list(griddataT = list(gridid_full = emulator$griddataT$gridid_full,
coord = emulator$griddataT$coord),
griddataP = list(gridid_full = emulator$griddataP$gridid_full,
coord = emulator$griddataP$coord,
pvarconvert_fcn = emulator$griddataP$pvarconvert_fcn),
tgav = emulator$tgav,
# not reconstructing training data, don't need residuals in the
# mean fields
meanfldT = list(w = emulator$meanfldT$w,
b = emulator$meanfldT$b),
meanfldP = list(w = emulator$meanfldP$w,
b = emulator$meanfldP$b),
tfuns = list(quant = emulator$tfuns$quant),
pfuns = list(quant = emulator$pfuns$quant),
reof = emulator$reof,
fx = emulator$fx,
infiles = emulator$infiles) ->
reducedEmulator

return(reducedEmulator)
}


#' Write a temperature field as a netcdf file.
#'
#' Format a field as a netcdf file and write it to the specified file. The lat,
Expand Down Expand Up @@ -56,34 +148,3 @@ write.temperature <- function(fld, file, griddata, varname='tas', varunit='K',

ncdf4::nc_close(ncout)
}

#' Load and save emulator training data
#'
#' \code{savemodel} saves the results of training an emulator in a portable
#' format. \code{loadmodel} loads a model from a file created this way and
#' returns it as a \code{fldgen} object.
#'
#' @param modeldata A \code{fldgen} object returned by either
#' \code{\link{train}} or \code{\link{fldgen_object}}.
#' @param file Name of the file to write the data to.
#' @param clobber Flag indicating whether it's ok to overwrite an existing file
#' @param oldfmt Flag indicating that we should try to load the old (.rda) format from
#' pre-2.1 versions of fldgen.
#' @name saving_and_restoring
NULL

#' @rdname saving_and_restoring
#' @export
savemodel <- function(modeldata, file, clobber=FALSE)
{
compress='xz'

if(!inherits(modeldata, 'fldgen')) {
stop('modeldata must be a fldgen object.')
}

if(!clobber && file.exists(file)) {
stop('File ', file, ' exists, and noclobber is set.')
}
saveRDS(modeldata, file=file, compress=compress)
}
29 changes: 29 additions & 0 deletions inst/scripts/train-emulators.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
library('fldgen')

train_models <- function(models, tasvar='tasAdjust', prvar='prAdjust',
datadir='./training-data') {

## The following would give you the complete set of models:
## models <- c('GFDL-ESM2M', 'HadGEM2-ES', 'IPSL-CM5A-LR', 'MIROC5')

for (model in models) {
datafiles <- list.files(path=datadir, pattern=model, full.names=TRUE)
cat('Processing model ', model, ' datafiles:\n', paste(datafiles, collapse='\n'),'\n')
emu <- trainTP(datafiles, tvarname=tasvar, pvarname=prvar)
emu$griddataP$vardata_raw <- NULL
outfilename <- paste0('fldgen-',model, '.rds')
coord <- emu$griddataT$coord
coord[67382, ] <- c(-49.75, 178.75)
emu$griddataT$coord <- coord
emu$griddataP$coord <- coord

saveRDS(emu, outfilename)


emulator <- emulator_reducer(emu)
outfilename <- paste0('fldgen-',model, '_reducedEmulator.rds')
saveRDS(reducedEmulator, outfilename)
}
}


16 changes: 16 additions & 0 deletions inst/scripts/train-emulators.zsh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/zsh

#SBATCH -p short
#SBATCH -t 180
#SBATCH -A IHESD


module purge
module load gcc/8.1.0
module load netcdf
module load R/3.4.3

##
echo Rscript -e \"source('train-emulators.R'); train_models('$1')\"

Rscript -e "source('train-emulators.R'); train_models('$1')"
71 changes: 71 additions & 0 deletions man/emulator_reducer.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 0 additions & 28 deletions man/saving_and_restoring.Rd

This file was deleted.

Loading

0 comments on commit ca50388

Please sign in to comment.