|
13 | 13 | #' @param filename (optional) name of .rds file to save aggregate simulation file to. If not specified
|
14 | 14 | #' then the results will only be returned in the R console
|
15 | 15 | #'
|
16 |
| -#' @param dirs a \code{character} vector containing the names of the \code{save_results} directories to be |
17 |
| -#' aggregated. A new folder will be created and placed in the \code{results_dirname} output folder |
18 |
| -#' |
19 |
| -#' @param results_dirname the new directory to place the aggregated results files |
20 |
| -#' |
21 | 16 | #' @param select a character vector indicating columns to variables to select from the
|
22 | 17 | #' \code{SimExtract(what='results')} information. This is mainly useful when RAM is an issue
|
23 | 18 | #' given simulations with many stored estimates. Default includes the results objects
|
|
32 | 27 | #' the simulation files returned the desired number of replications. If missing, the highest
|
33 | 28 | #' detected value from the collected set of replication information will be used
|
34 | 29 | #'
|
35 |
| -#' @param batch.filesize if the number of files exceed this number the |
36 |
| -#' aggregation will be done in batches of this size instead until complete |
37 |
| -#' |
38 |
| -#' @param verbose logical; include verbose progress output? |
39 | 30 | #'
|
40 | 31 | #' @return if \code{files} is used the function returns a \code{data.frame/tibble} with the (weighted) average
|
41 | 32 | #' of the simulation results. Otherwise, if \code{dirs} is used, the function returns NULL
|
|
75 | 66 | #' # files <- c(SimExtract(ret1, 'filename'), SimExtract(ret2, 'filename'))
|
76 | 67 | #' # final <- SimCollect(files = files)
|
77 | 68 | #'
|
78 |
| -#' # aggregate saved results for .rds files and results directories |
79 |
| -#' # runSimulation(..., seed=seeds1, save_results = TRUE, |
80 |
| -#' # save_details = list(save_results_dirname = 'dir1')) |
81 |
| -#' # runSimulation(..., seed=seeds2, save_results = TRUE, |
82 |
| -#' # save_details = list(save_results_dirname = 'dir2')) |
83 |
| -#' |
84 |
| -#' # place new saved results in 'SimDesign_results/' by default |
85 |
| -#' SimCollect(files = c('file1.rds', 'file2.rds'), |
86 |
| -#' filename='aggreged_sim.rds', |
87 |
| -#' dirs = c('dir1', 'dir2')) |
88 |
| -#' |
89 |
| -#' # If dirnames not included, can be extracted from results |
90 |
| -#' # dirs <- c(SimExtract(ret1, 'save_results_dirname'), |
91 |
| -#' SimExtract(ret2, 'save_results_dirname')) |
92 |
| -#' # SimCollect(dirs = dirs) |
93 | 69 | #'
|
94 | 70 | #' #################################################
|
95 | 71 | #' # Example where each row condition is repeated, evaluated independently,
|
96 | 72 | #' # and later collapsed into a single analysis object
|
97 | 73 | #'
|
98 | 74 | #' # Each condition repeated four times (hence, replications
|
99 | 75 | #' # should be set to desired.reps/4)
|
100 |
| -#' Design <- createDesign(N = c(30, 60), |
101 |
| -#' mu = c(0,5)) |
| 76 | +#' Design <- createDesign(mu = c(0,5), |
| 77 | +#' N = c(30, 60)) |
102 | 78 | #' Design
|
103 | 79 | #'
|
104 |
| -#' Design4 <- expandDesign(Design, 4) |
105 |
| -#' Design4 |
| 80 | +#' # assume the N=60 takes longer, and should be spread out across more arrays |
| 81 | +#' Design_long <- expandDesign(Design, c(2,2,4,4)) |
| 82 | +#' Design_long |
| 83 | +#' |
| 84 | +#' replications <- c(rep(50, 4), rep(25,8)) |
| 85 | +#' data.frame(Design_long, replications) |
106 | 86 | #'
|
107 | 87 | #' #-------------------------------------------------------------------
|
108 | 88 | #'
|
|
123 | 103 | #'
|
124 | 104 | #' #-------------------------------------------------------------------
|
125 | 105 | #'
|
126 |
| -#' # Generate fixed seeds to be distributed |
127 |
| -#' set.seed(1234) |
128 |
| -#' seeds <- genSeeds(Design) |
129 |
| -#' seeds |
130 |
| -#' |
131 |
| -#' # replications vector (constant is fine if the same across conditions; |
132 |
| -#' # below is vectorized to demonstrate that this could change) |
133 |
| -#' replications <- rep(250, nrow(Design)) |
134 |
| -#' |
135 | 106 | #' # create directory to store all final simulation files
|
136 | 107 | #' dir.create('sim_files/')
|
137 | 108 | #'
|
138 | 109 | #' # distribute jobs independently (explicitly parallelize here on cluster,
|
139 | 110 | #' # which is more elegantly managed via runArraySimulation)
|
140 |
| -#' sapply(1:nrow(Design), \(i) { |
141 |
| -#' runSimulation(design=Design[i, ], replications=replications[i], |
| 111 | +#' sapply(1:nrow(Design_long), \(i) { |
| 112 | +#' runSimulation(design=Design_long[i, ], replications=replications[i], |
142 | 113 | #' generate=Generate, analyse=Analyse, summarise=Summarise,
|
143 | 114 | #' filename=paste0('sim_files/job-', i)) |> invisible()
|
144 | 115 | #' })
|
|
154 | 125 | #' sim <- SimCollect(files = paste0('sim_files/job-', 1:nrow(Design), ".rds"))
|
155 | 126 | #' sim
|
156 | 127 | #'
|
| 128 | +#' SimClean(dir='sim_files/') |
| 129 | +#' |
157 | 130 | #' }
|
158 | 131 | SimCollect <- function(files = NULL, filename = NULL,
|
159 |
| - dirs = NULL, results_dirname = 'SimDesign_aggregate_results', |
160 |
| - select = NULL, check.only = FALSE, target.reps = NULL, |
161 |
| - batch.filesize = 100, verbose = TRUE){ |
162 |
| - if(!is.null(files) && length(files) > batch.filesize){ |
163 |
| - index <- seq(0L, length(files), by = batch.filesize) |
164 |
| - if(max(index) != length(files)) index <- c(index, length(files)) |
165 |
| - tmpfilenames <- character(length(index) - 1L) |
166 |
| - for(i in 1L:(length(index)-1L)){ |
167 |
| - if(verbose) |
168 |
| - cat(sprintf('Batch %i/%i', i, length(tmpfilenames))) |
169 |
| - pick <- (index[i]+1L):index[i+1L] |
170 |
| - tmpfilenames[i] <- tempfile() |
171 |
| - out <- SimCollect(files=files[pick], filename=tmpfilenames[i], |
172 |
| - dirs=dirs, results_dirname=results_dirname, |
173 |
| - select=select, verbose=FALSE) |
174 |
| - } |
175 |
| - ret <- SimCollect(files=tmpfilenames, filename=filename, |
176 |
| - dirs=dirs, results_dirname=results_dirname, select=NULL, |
177 |
| - check.only=FALSE, target.reps=target.repts, batch.filesize=Inf, |
178 |
| - verbose=verbose) |
179 |
| - sapply(tmpfilenames, \(f) file.remove(f)) |
180 |
| - return(ret) |
181 |
| - } |
| 132 | + select = NULL, check.only = FALSE, target.reps = NULL){ |
182 | 133 | if(check.only) select <- 'REPLICATIONS'
|
183 | 134 | oldfiles <- files
|
184 |
| - if(!is.null(dirs)){ |
185 |
| - if(!all(sapply(dirs, dir.exists))) stop('One or more directories not found') |
186 |
| - files <- lapply(dirs, function(x) dir(x)) |
187 |
| - if(!all(sapply(files, function(x) all(x == files[[1L]])))) |
188 |
| - stop('File names are not all the same') |
189 |
| - files <- files[[1L]] |
190 |
| - ndirs <- length(dirs) |
191 |
| - if(dir.exists(results_dirname)) |
192 |
| - stop(sprintf('Directory \'%s/\' already exists. Please fix', results_dirname), |
193 |
| - call.=FALSE) |
194 |
| - dir.create(results_dirname) |
195 |
| - if(verbose) |
196 |
| - message(sprintf('Writing aggregate results folders to \"%s\"', results_dirname)) |
197 |
| - for(f in files){ |
198 |
| - readin <- lapply(1:ndirs, function(x){ |
199 |
| - inp <- readRDS(paste0(dirs[x], '/', f)) |
200 |
| - inp <- subset_results(inp, select=select) |
201 |
| - inp |
202 |
| - }) |
203 |
| - ret <- readin[[1L]] |
204 |
| - collapse <- !is.list(ret$results) || is.data.frame(ret$results) |
205 |
| - results <- lapply(readin, function(x) x$results) |
206 |
| - ret$results <- do.call(if(collapse) rbind else c, results) |
207 |
| - tmp <- do.call(c, lapply(readin, function(x) x$warnings)) |
208 |
| - nms <- names(tmp) |
209 |
| - if(length(nms)) |
210 |
| - ret$warnings <- table(do.call(c, lapply(1:length(nms), |
211 |
| - function(x) rep(nms[x], each = tmp[x])))) |
212 |
| - tmp <- do.call(c, lapply(readin, function(x) x$errors)) |
213 |
| - nms <- names(tmp) |
214 |
| - if(length(nms)) |
215 |
| - ret$errors <- table(do.call(c, lapply(1:length(nms), |
216 |
| - function(x) rep(nms[x], each = tmp[x])))) |
217 |
| - saveRDS(ret, paste0(results_dirname, '/', f)) |
218 |
| - } |
219 |
| - } |
220 | 135 | files <- oldfiles
|
221 | 136 | if(!is.null(files)){
|
222 | 137 | filenames <- files
|
|
0 commit comments