Skip to content

Commit dcc408f

Browse files
committed
simplify args
1 parent b04699d commit dcc408f

File tree

2 files changed

+26
-143
lines changed

2 files changed

+26
-143
lines changed

R/SimCollect.R

Lines changed: 13 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,6 @@
1313
#' @param filename (optional) name of .rds file to save aggregate simulation file to. If not specified
1414
#' then the results will only be returned in the R console
1515
#'
16-
#' @param dirs a \code{character} vector containing the names of the \code{save_results} directories to be
17-
#' aggregated. A new folder will be created and placed in the \code{results_dirname} output folder
18-
#'
19-
#' @param results_dirname the new directory to place the aggregated results files
20-
#'
2116
#' @param select a character vector indicating columns to variables to select from the
2217
#' \code{SimExtract(what='results')} information. This is mainly useful when RAM is an issue
2318
#' given simulations with many stored estimates. Default includes the results objects
@@ -32,10 +27,6 @@
3227
#' the simulation files returned the desired number of replications. If missing, the highest
3328
#' detected value from the collected set of replication information will be used
3429
#'
35-
#' @param batch.filesize if the number of files exceed this number the
36-
#' aggregation will be done in batches of this size instead until complete
37-
#'
38-
#' @param verbose logical; include verbose progress output?
3930
#'
4031
#' @return if \code{files} is used the function returns a \code{data.frame/tibble} with the (weighted) average
4132
#' of the simulation results. Otherwise, if \code{dirs} is used, the function returns NULL
@@ -75,34 +66,23 @@
7566
#' # files <- c(SimExtract(ret1, 'filename'), SimExtract(ret2, 'filename'))
7667
#' # final <- SimCollect(files = files)
7768
#'
78-
#' # aggregate saved results for .rds files and results directories
79-
#' # runSimulation(..., seed=seeds1, save_results = TRUE,
80-
#' # save_details = list(save_results_dirname = 'dir1'))
81-
#' # runSimulation(..., seed=seeds2, save_results = TRUE,
82-
#' # save_details = list(save_results_dirname = 'dir2'))
83-
#'
84-
#' # place new saved results in 'SimDesign_results/' by default
85-
#' SimCollect(files = c('file1.rds', 'file2.rds'),
86-
#' filename='aggreged_sim.rds',
87-
#' dirs = c('dir1', 'dir2'))
88-
#'
89-
#' # If dirnames not included, can be extracted from results
90-
#' # dirs <- c(SimExtract(ret1, 'save_results_dirname'),
91-
#' SimExtract(ret2, 'save_results_dirname'))
92-
#' # SimCollect(dirs = dirs)
9369
#'
9470
#' #################################################
9571
#' # Example where each row condition is repeated, evaluated independently,
9672
#' # and later collapsed into a single analysis object
9773
#'
9874
#' # Each condition repeated four times (hence, replications
9975
#' # should be set to desired.reps/4)
100-
#' Design <- createDesign(N = c(30, 60),
101-
#' mu = c(0,5))
76+
#' Design <- createDesign(mu = c(0,5),
77+
#' N = c(30, 60))
10278
#' Design
10379
#'
104-
#' Design4 <- expandDesign(Design, 4)
105-
#' Design4
80+
#' # assume the N=60 takes longer, and should be spread out across more arrays
81+
#' Design_long <- expandDesign(Design, c(2,2,4,4))
82+
#' Design_long
83+
#'
84+
#' replications <- c(rep(50, 4), rep(25,8))
85+
#' data.frame(Design_long, replications)
10686
#'
10787
#' #-------------------------------------------------------------------
10888
#'
@@ -123,22 +103,13 @@
123103
#'
124104
#' #-------------------------------------------------------------------
125105
#'
126-
#' # Generate fixed seeds to be distributed
127-
#' set.seed(1234)
128-
#' seeds <- genSeeds(Design)
129-
#' seeds
130-
#'
131-
#' # replications vector (constant is fine if the same across conditions;
132-
#' # below is vectorized to demonstrate that this could change)
133-
#' replications <- rep(250, nrow(Design))
134-
#'
135106
#' # create directory to store all final simulation files
136107
#' dir.create('sim_files/')
137108
#'
138109
#' # distribute jobs independently (explicitly parallelize here on cluster,
139110
#' # which is more elegantly managed via runArraySimulation)
140-
#' sapply(1:nrow(Design), \(i) {
141-
#' runSimulation(design=Design[i, ], replications=replications[i],
111+
#' sapply(1:nrow(Design_long), \(i) {
112+
#' runSimulation(design=Design_long[i, ], replications=replications[i],
142113
#' generate=Generate, analyse=Analyse, summarise=Summarise,
143114
#' filename=paste0('sim_files/job-', i)) |> invisible()
144115
#' })
@@ -154,69 +125,13 @@
154125
#' sim <- SimCollect(files = paste0('sim_files/job-', 1:nrow(Design), ".rds"))
155126
#' sim
156127
#'
128+
#' SimClean(dir='sim_files/')
129+
#'
157130
#' }
158131
SimCollect <- function(files = NULL, filename = NULL,
159-
dirs = NULL, results_dirname = 'SimDesign_aggregate_results',
160-
select = NULL, check.only = FALSE, target.reps = NULL,
161-
batch.filesize = 100, verbose = TRUE){
162-
if(!is.null(files) && length(files) > batch.filesize){
163-
index <- seq(0L, length(files), by = batch.filesize)
164-
if(max(index) != length(files)) index <- c(index, length(files))
165-
tmpfilenames <- character(length(index) - 1L)
166-
for(i in 1L:(length(index)-1L)){
167-
if(verbose)
168-
cat(sprintf('Batch %i/%i', i, length(tmpfilenames)))
169-
pick <- (index[i]+1L):index[i+1L]
170-
tmpfilenames[i] <- tempfile()
171-
out <- SimCollect(files=files[pick], filename=tmpfilenames[i],
172-
dirs=dirs, results_dirname=results_dirname,
173-
select=select, verbose=FALSE)
174-
}
175-
ret <- SimCollect(files=tmpfilenames, filename=filename,
176-
dirs=dirs, results_dirname=results_dirname, select=NULL,
177-
check.only=FALSE, target.reps=target.repts, batch.filesize=Inf,
178-
verbose=verbose)
179-
sapply(tmpfilenames, \(f) file.remove(f))
180-
return(ret)
181-
}
132+
select = NULL, check.only = FALSE, target.reps = NULL){
182133
if(check.only) select <- 'REPLICATIONS'
183134
oldfiles <- files
184-
if(!is.null(dirs)){
185-
if(!all(sapply(dirs, dir.exists))) stop('One or more directories not found')
186-
files <- lapply(dirs, function(x) dir(x))
187-
if(!all(sapply(files, function(x) all(x == files[[1L]]))))
188-
stop('File names are not all the same')
189-
files <- files[[1L]]
190-
ndirs <- length(dirs)
191-
if(dir.exists(results_dirname))
192-
stop(sprintf('Directory \'%s/\' already exists. Please fix', results_dirname),
193-
call.=FALSE)
194-
dir.create(results_dirname)
195-
if(verbose)
196-
message(sprintf('Writing aggregate results folders to \"%s\"', results_dirname))
197-
for(f in files){
198-
readin <- lapply(1:ndirs, function(x){
199-
inp <- readRDS(paste0(dirs[x], '/', f))
200-
inp <- subset_results(inp, select=select)
201-
inp
202-
})
203-
ret <- readin[[1L]]
204-
collapse <- !is.list(ret$results) || is.data.frame(ret$results)
205-
results <- lapply(readin, function(x) x$results)
206-
ret$results <- do.call(if(collapse) rbind else c, results)
207-
tmp <- do.call(c, lapply(readin, function(x) x$warnings))
208-
nms <- names(tmp)
209-
if(length(nms))
210-
ret$warnings <- table(do.call(c, lapply(1:length(nms),
211-
function(x) rep(nms[x], each = tmp[x]))))
212-
tmp <- do.call(c, lapply(readin, function(x) x$errors))
213-
nms <- names(tmp)
214-
if(length(nms))
215-
ret$errors <- table(do.call(c, lapply(1:length(nms),
216-
function(x) rep(nms[x], each = tmp[x]))))
217-
saveRDS(ret, paste0(results_dirname, '/', f))
218-
}
219-
}
220135
files <- oldfiles
221136
if(!is.null(files)){
222137
filenames <- files

man/SimCollect.Rd

Lines changed: 13 additions & 45 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)