diff --git a/DESCRIPTION b/DESCRIPTION index 987dd5da..a4513e89 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -49,7 +49,8 @@ Suggests: styler, rClr, crayon, - webshot + webshot, + parallel Encoding: UTF-8 LazyData: true RoxygenNote: 7.2.3 diff --git a/R/utilities-ratio-comparison.R b/R/utilities-ratio-comparison.R index 5531c3aa..d8fc6fc3 100644 --- a/R/utilities-ratio-comparison.R +++ b/R/utilities-ratio-comparison.R @@ -152,8 +152,7 @@ getPKRatioSummaryStatistics <- function(pkData, referencePKData, simulationSetNa ) %>% mutate(Ratio = Value / ValueRef) # Data summary is applied to subgroups of QuantityPath and Parameter - ratioSummary <- ratioData %>% - group_by(QuantityPath, Parameter) %>% + ratioSummary <- ratioData[, c("QuantityPath", "Parameter", "Ratio")] %>% summarise( N = n(), Perc5 = quantile(Ratio, probs = 0.05, na.rm = TRUE), @@ -165,7 +164,7 @@ getPKRatioSummaryStatistics <- function(pkData, referencePKData, simulationSetNa SD = sd(Ratio, na.rm = TRUE), GeoMean = exp(mean(log(Ratio), na.rm = TRUE)), GeoSD = exp(sd(log(Ratio), na.rm = TRUE)), - .groups = "drop_last" + .by = c("QuantityPath", "Parameter") ) %>% mutate(SimulationSetName = simulationSetName, .before = everything()) @@ -211,22 +210,20 @@ mcSampling <- function(dataSize, sampleSize, n = getDefaultMCRepetitions(), seed #' @import dplyr getPKRatioSummaryFromAnalyticalSolution <- function(pkData, referencePKData, simulationSetName) { pkSummary <- pkData %>% - group_by(QuantityPath, Parameter) %>% summarise( Mean = mean(Value, na.rm = TRUE), SD = sd(Value, na.rm = TRUE), MeanLog = mean(log(Value), na.rm = TRUE), SDLog = sd(log(Value), na.rm = TRUE), - .groups = "drop_last" + .by = c("QuantityPath", "Parameter") ) referencePKSummary <- referencePKData %>% - group_by(QuantityPath, Parameter) %>% summarise( Mean = mean(Value, na.rm = TRUE), SD = sd(Value, na.rm = TRUE), MeanLog = mean(log(Value), na.rm = TRUE), SDLog = sd(log(Value), na.rm = TRUE), - .groups = "drop_last" + .by = c("QuantityPath", "Parameter") ) # Merge data summaries ratioSummary <- left_join( @@ -248,15 +245,14 @@ getPKRatioSummaryFromAnalyticalSolution <- function(pkData, referencePKData, sim } -#' @title getPKRatioSummaryFromMCSampling +#' @title getPKRatioSummaryFromSettings #' @description Get PK Parameter Ratio Measure From Monte Carlo Sampling #' @param pkData A data.frame of PK Parameter values for Population to compare #' @param referencePKData A data.frame of PK Parameter values for reference Population -#' @param simulationSetName Name of simulation set #' @param settings A list of task settings #' @return A data.frame of the PK Parameter ratios summary statistics #' @keywords internal -getPKRatioSummaryFromMCSampling <- function(pkData, referencePKData, simulationSetName, settings = NULL) { +getPKRatioSummaryFromSettings <- function(pkData, referencePKData, settings) { # Sample from largest population if size is different # Create a list of Sampled PK Parameters for each MC repetition and calculate their Ratio pkSize <- length(unique(pkData$IndividualId)) @@ -264,47 +260,116 @@ getPKRatioSummaryFromMCSampling <- function(pkData, referencePKData, simulationS mcRepetitions <- settings$mcRepetitions %||% getDefaultMCRepetitions() mcRandomSeed <- settings$mcRandomSeed %||% getDefaultMCRandomSeed() - t0 <- tic() - logInfo(messages$monteCarlo(simulationSetName, mcRepetitions, mcRandomSeed)) + widerPopulation <- ifelse(pkSize < referenceSize, "reference", "comparison") + + selectedIds <- mcSampling( + dataSize = switch(widerPopulation, + "reference" = referenceSize, + "comparison" = pkSize + ), + sampleSize = switch(widerPopulation, + "reference" = pkSize, + "comparison" = referenceSize + ), + n = mcRepetitions, + seed = mcRandomSeed + ) + useParallel <- all( + requireNamespace("parallel", quietly = TRUE), + settings$numberOfCores > 1 + ) + if (useParallel) { + cl <- parallel::makeCluster(settings$numberOfCores) + on.exit(parallel::stopCluster(cl)) + + # Parallel code requires that some objects are exported and to use dplyr package + # Re-import and rename function in this specific environment to be exported on clusters + getParallelSummaryStatistics <- getPKRatioSummaryStatistics + invisible(parallel::clusterEvalQ(cl, library(dplyr))) + invisible(parallel::clusterExport(cl, c( + "selectedIds", + "pkSize", + "referenceSize", + "pkData", + "referencePKData", + "getParallelSummaryStatistics" + ), + envir = environment() + )) + listOfMCRatioData <- switch(widerPopulation, + "reference" = parallel::parLapply( + cl = cl, + seq_along(selectedIds), + function(repetition) { + getParallelSummaryStatistics( + pkData = pkData %>% + mutate(IndividualId = ave( + 1:pkSize, + pkData$QuantityPath, + pkData$Parameter, + FUN = identity + )), + referencePKData = referencePKData %>% + slice(selectedIds[[repetition]], .by = c("QuantityPath", "Parameter")) %>% + mutate(IndividualId = ave(1:pkSize, QuantityPath, Parameter, FUN = identity)), + simulationSetName = repetition + ) + } + ), + "comparison" = parallel::parLapply( + cl = cl, + seq_along(selectedIds), + function(repetition) { + getParallelSummaryStatistics( + pkData %>% + slice(selectedIds[[repetition]], .by = c("QuantityPath", "Parameter")) %>% + mutate(IndividualId = ave(1:referenceSize, QuantityPath, Parameter, FUN = identity)), + referencePKData = referencePKData %>% + mutate(IndividualId = ave( + 1:referenceSize, + referencePKData$QuantityPath, + referencePKData$Parameter, + FUN = identity + )), + simulationSetName = repetition + ) + } + ) + ) + mcRatioData <- do.call("rbind", listOfMCRatioData) + return(mcRatioData) + } + if (settings$showProgress) { loadingProgress <- txtProgressBar(max = mcRepetitions, style = 3) + on.exit(close(loadingProgress)) } - if (pkSize < referenceSize) { - selectedIds <- mcSampling( - dataSize = referenceSize, - sampleSize = pkSize, - n = mcRepetitions, - seed = mcRandomSeed - ) - - listOfMCRatioData <- lapply( + listOfMCRatioData <- switch(widerPopulation, + "reference" = lapply( seq_along(selectedIds), function(repetition) { if (settings$showProgress) { setTxtProgressBar(loadingProgress, value = repetition) } # data.frame is arranged by increasing values of IndividualId + # since group_by is slow, ave is used instead getPKRatioSummaryStatistics( pkData = pkData %>% - group_by(QuantityPath, Parameter) %>% - mutate(IndividualId = 1:pkSize), + mutate(IndividualId = ave( + 1:pkSize, + pkData$QuantityPath, + pkData$Parameter, + FUN = identity + )), referencePKData = referencePKData %>% - group_by(QuantityPath, Parameter) %>% - slice(selectedIds[[repetition]]) %>% - mutate(IndividualId = 1:pkSize), + slice(selectedIds[[repetition]], .by = c("QuantityPath", "Parameter")) %>% + mutate(IndividualId = ave(1:pkSize, QuantityPath, Parameter, FUN = identity)), simulationSetName = repetition ) } - ) - } else { - selectedIds <- mcSampling( - dataSize = pkSize, - sampleSize = referenceSize, - n = mcRepetitions, - seed = mcRandomSeed - ) - listOfMCRatioData <- lapply( + ), + "comparison" = lapply( seq_along(selectedIds), function(repetition) { if (settings$showProgress) { @@ -312,28 +377,49 @@ getPKRatioSummaryFromMCSampling <- function(pkData, referencePKData, simulationS } # data.frame is arranged by increasing values of IndividualId getPKRatioSummaryStatistics( - pkData = pkData %>% - group_by(QuantityPath, Parameter) %>% - slice(selectedIds[[repetition]]) %>% - mutate(IndividualId = 1:referenceSize), + pkData %>% + slice(selectedIds[[repetition]], .by = c("QuantityPath", "Parameter")) %>% + mutate(IndividualId = ave(1:referenceSize, QuantityPath, Parameter, FUN = identity)), referencePKData = referencePKData %>% - group_by(QuantityPath, Parameter) %>% - mutate(IndividualId = 1:referenceSize), + mutate(IndividualId = ave( + 1:referenceSize, + referencePKData$QuantityPath, + referencePKData$Parameter, + FUN = identity + )), simulationSetName = repetition ) } ) - } - if (settings$showProgress) { - close(loadingProgress) - } - logInfo(messages$runCompleted(getElapsedTime(t0), "Monte Carlo Sampling")) - # Transform list to table + ) mcRatioData <- do.call("rbind", listOfMCRatioData) + return(mcRatioData) +} + +#' @title getPKRatioSummaryFromMCSampling +#' @description Get PK Parameter Ratio Measure From Monte Carlo Sampling +#' @param pkData A data.frame of PK Parameter values for Population to compare +#' @param referencePKData A data.frame of PK Parameter values for reference Population +#' @param simulationSetName Name of simulation set +#' @param settings A list of task settings +#' @return A data.frame of the PK Parameter ratios summary statistics +#' @keywords internal +getPKRatioSummaryFromMCSampling <- function(pkData, referencePKData, simulationSetName, settings = NULL) { + t0 <- tic() + mcRepetitions <- settings$mcRepetitions %||% getDefaultMCRepetitions() + mcRandomSeed <- settings$mcRandomSeed %||% getDefaultMCRandomSeed() + logInfo(messages$monteCarlo(simulationSetName, mcRepetitions, mcRandomSeed)) + mcRatioData <- getPKRatioSummaryFromSettings( + pkData = pkData, + referencePKData = referencePKData, + settings = settings + ) + logInfo(messages$runCompleted(getElapsedTime(t0), "Monte Carlo Sampling")) + # Get median statistics over all MC repetitions as a data.frame mcRatioSummary <- mcRatioData %>% group_by(QuantityPath, Parameter) %>% - summarise_all(median, .groups = "drop_last") %>% + summarise_all(median, .group = "drop_last") %>% mutate(SimulationSetName = simulationSetName, .before = everything()) return(mcRatioSummary) diff --git a/man/getPKRatioSummaryFromSettings.Rd b/man/getPKRatioSummaryFromSettings.Rd new file mode 100644 index 00000000..5c7c69b2 --- /dev/null +++ b/man/getPKRatioSummaryFromSettings.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utilities-ratio-comparison.R +\name{getPKRatioSummaryFromSettings} +\alias{getPKRatioSummaryFromSettings} +\title{getPKRatioSummaryFromSettings} +\usage{ +getPKRatioSummaryFromSettings(pkData, referencePKData, settings) +} +\arguments{ +\item{pkData}{A data.frame of PK Parameter values for Population to compare} + +\item{referencePKData}{A data.frame of PK Parameter values for reference Population} + +\item{settings}{A list of task settings} +} +\value{ +A data.frame of the PK Parameter ratios summary statistics +} +\description{ +Get PK Parameter Ratio Measure From Monte Carlo Sampling +} +\keyword{internal} diff --git a/tests/testthat/test-monte-carlo-sampling.R b/tests/testthat/test-monte-carlo-sampling.R index 94693e73..403eb8b5 100644 --- a/tests/testthat/test-monte-carlo-sampling.R +++ b/tests/testthat/test-monte-carlo-sampling.R @@ -63,7 +63,7 @@ test_that("Monte Carlo Solution is close to known solution", { referencePKData = referenceData, simulationSetName = "test", # With 10000 repetitions the method runs longer - settings = list(showProgress = FALSE, mcRepetitions = 200) + settings = list(showProgress = FALSE, numberOfCores = 1, mcRepetitions = 200) ) expect_equal( @@ -84,13 +84,13 @@ test_that("Monte Carlo Solution is repeatable", { pkData = comparisonData, referencePKData = referenceData, simulationSetName = "test", - settings = list(showProgress = FALSE, mcRepetitions = 100, mcRandomSeed = 3333) + settings = list(showProgress = FALSE, numberOfCores = 1, mcRepetitions = 100, mcRandomSeed = 3333) ) mcSolution2 <- ospsuite.reportingengine:::getPKRatioSummaryFromMCSampling( pkData = comparisonData, referencePKData = referenceData, simulationSetName = "test", - settings = list(showProgress = FALSE, mcRepetitions = 100, mcRandomSeed = 3333) + settings = list(showProgress = FALSE, numberOfCores = 1, mcRepetitions = 100, mcRandomSeed = 3333) ) expect_equal(mcSolution1, mcSolution2)