Skip to content

Commit

Permalink
Fixes #1086 ratios data.frame is pivoted into matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
pchelle committed Oct 25, 2023
1 parent d540956 commit 9e9ff2b
Show file tree
Hide file tree
Showing 6 changed files with 194 additions and 96 deletions.
236 changes: 149 additions & 87 deletions R/utilities-ratio-comparison.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,13 @@ getPKRatioSummaryForDifferentPopulations <- function(structureSet, referenceSet,
filePath = referenceSet$pkAnalysisResultsFileNames,
simulation = ospsuite::loadSimulation(referenceSet$simulationSet$simulationFile)
)
# Use arrange to ensure Ids are consistent between PK parameters and output paths
pkData <- ospsuite::pkAnalysesToTibble(pkAnalyses) %>% arrange(IndividualId)
referencePKData <- ospsuite::pkAnalysesToTibble(referencePKAnalyses) %>% arrange(IndividualId)
# Use arrange to ensure Ids, QuantityPath and Parameter are consistent between PK parameters and output paths
pkData <- ospsuite::pkAnalysesToTibble(pkAnalyses) %>%
select(IndividualId, QuantityPath, Parameter, Value) %>%
arrange(IndividualId, QuantityPath, Parameter)
referencePKData <- ospsuite::pkAnalysesToTibble(referencePKAnalyses) %>%
select(IndividualId, QuantityPath, Parameter, Value) %>%
arrange(IndividualId, QuantityPath, Parameter)

# Check that both PK data to be compared are included in reference PK data
validateIsIncluded(unique(pkData$QuantityPath), unique(referencePKData$QuantityPath))
Expand Down Expand Up @@ -117,58 +121,120 @@ getPKRatioSummaryForSamePopulation <- function(structureSet, referenceSet) {
filePath = referenceSet$pkAnalysisResultsFileNames,
simulation = ospsuite::loadSimulation(referenceSet$simulationSet$simulationFile)
)
pkData <- ospsuite::pkAnalysesToTibble(pkAnalyses)
referencePKData <- ospsuite::pkAnalysesToTibble(referencePKAnalyses)
pkData <- ospsuite::pkAnalysesToTibble(pkAnalyses) %>%
select(IndividualId, QuantityPath, Parameter, Value) %>%
arrange(IndividualId, QuantityPath, Parameter)
referencePKData <- ospsuite::pkAnalysesToTibble(referencePKAnalyses) %>%
select(IndividualId, QuantityPath, Parameter, Value) %>%
arrange(IndividualId, QuantityPath, Parameter)
# Check that both PK data to be compared are included in reference PK data
validateIsIncluded(unique(pkData$QuantityPath), unique(referencePKData$QuantityPath))
validateIsIncluded(unique(pkData$Parameter), unique(referencePKData$Parameter))


# Pivot table to get fast computation of ratios and their statistics
quantityPaths <- unique(pkData$QuantityPath)
parameters <- unique(pkData$Parameter)

pkRatioSummaryGroups <- data.frame(
SimulationSetName = structureSet$simulationSet$simulationSetName,
QuantityPath = rep(quantityPaths, each = length(parameters)),
Parameter = rep(parameters, length(quantityPaths))
)

pkData <- pkData %>%
tidyr::pivot_wider(
names_from = c(QuantityPath, Parameter),
values_from = Value
)
referencePKData <- referencePKData %>%
tidyr::pivot_wider(
names_from = c(QuantityPath, Parameter),
values_from = Value
)

pkRatioSummary <- getPKRatioSummaryStatistics(
pkData = pkData,
referencePKData = referencePKData,
simulationSetName = structureSet$simulationSet$simulationSetName
pkData = as.matrix(pkData),
# In case reference data had more paths and parameters, restrict to those in pkData
referencePKData = as.matrix(referencePKData[,names(referencePKData) %in% names(pkData)])
)
# Combine stats to their parameters, paths and simulation set name
pkRatioSummary <- bind_cols(
pkRatioSummaryGroups,
summaryStatisticsToDataFrame(pkRatioSummary)
)

return(pkRatioSummary)
}

#' @title getPKRatioSummaryStatistics
#' @description
#' Calculate and save summary statistics of ratios of PK parameters
#' @param pkData A data.frame of PK Parameter values for Population to compare
#' @param referencePKData A data.frame of PK Parameter values for reference Population
#' @param simulationSetName Name of simulation set
#' @return A data.frame of the PK Parameter ratios summary statistics
#' Note that this function computes on matrix objects to be faster
#' than on data.frame when Monte Carlo simulation is performed
#' @param pkData A matrix of PK Parameter values for Population to compare
#' @param referencePKData A matrix of PK Parameter values for reference Population
#' @return A matrix of the PK Parameter ratios summary statistics
#' @keywords internal
#' @import dplyr
getPKRatioSummaryStatistics <- function(pkData, referencePKData, simulationSetName) {
# Use left join to include only comparable data
# in case reference includes additional parameters or paths
ratioData <- left_join(
pkData,
referencePKData,
by = c("IndividualId", "QuantityPath", "Parameter"),
suffix = c("", "Ref")
) %>% mutate(Ratio = Value / ValueRef)
getPKRatioSummaryStatistics <- function(pkData, referencePKData) {
# Since data.frame objects were pivoted to get matrix objects
# Rows contained unique individuals while
# columns contained all combinations of QuantityPath and Parameter
# Since the object is a matrix of numeric values, calculation is vectorized
ratioData <- pkData / referencePKData
ratioSummary <- rbind(
# Apply with MARGIN = 2 means that calculation is performed by columns
N = apply(ratioData, 2, FUN = function(x){sum(!is.na(x))}),
apply(ratioData, 2, FUN = function(x){quantile(x, probs = c(0.05,0.25,0.5,0.75,0.95), na.rm = TRUE)}),
Mean = apply(ratioData, 2, FUN = function(x){mean(x, na.rm = TRUE)}),
SD = apply(ratioData, 2, FUN = function(x){sd(x, na.rm = TRUE)}),
GeoMean = apply(ratioData, 2, FUN = function(x){exp(mean(log(x), na.rm = TRUE))}),
GeoSD = apply(ratioData, 2, FUN = function(x){exp(sd(log(x), na.rm = TRUE))})
)
return(ratioSummary)
}

# Data summary is applied to subgroups of QuantityPath and Parameter
ratioSummary <- ratioData[, c("QuantityPath", "Parameter", "Ratio")] %>%
summarise(
N = n(),
Perc5 = quantile(Ratio, probs = 0.05, na.rm = TRUE),
Perc25 = quantile(Ratio, probs = 0.25, na.rm = TRUE),
Perc50 = quantile(Ratio, probs = 0.5, na.rm = TRUE),
Perc75 = quantile(Ratio, probs = 0.75, na.rm = TRUE),
Perc95 = quantile(Ratio, probs = 0.95, na.rm = TRUE),
Mean = mean(Ratio, na.rm = TRUE),
SD = sd(Ratio, na.rm = TRUE),
GeoMean = exp(mean(log(Ratio), na.rm = TRUE)),
GeoSD = exp(sd(log(Ratio), na.rm = TRUE)),
.by = c("QuantityPath", "Parameter")
) %>%
mutate(SimulationSetName = simulationSetName, .before = everything())
#' @title summaryStatisticsToDataFrame
#' @description
#' Translate matrix of summary statistics to data.frame with appropriate names
#' @param data A matrix of summary statistics
#' @return A data.frame displaying summary statistics by column
#' @keywords internal
summaryStatisticsToDataFrame <- function(data, monteCarlo = FALSE){
# For same population, summary statistics is by row and needs to be transposed
# For different populations, summary statistics is directly by column (because of sapply usage)
if(!monteCarlo){
data <- t(data)
}
data <- as.data.frame(data)
names(data) <- c("N", paste0("Perc", c(5,25,50,75,95)), "Mean", "SD", "GeoMean", "GeoSD")
return(data)
}

return(as.data.frame(ratioSummary))
#' @title getMonteCarloMedians
#' @description
#' Get all median values from a list of Monte Carlo simulations
#' @param listOfData A list of summary statistics matrices
#' @return A data.frame displaying summary statistics by column
#' @keywords internal
getMonteCarloMedians <- function(listOfData){
# Translate list of matrix objects into a single matrix object
# The matrix object has 10 rows of summary statistics,
# repeated as many times as the number of Monte Carlo simulations
data <- do.call("rbind", listOfData)
medianData <- sapply(
1:10,
function(statsIndex){
selectedRows <- statsIndex + seq(0, nrow(data)-10, 10)
# Get median of each column per selected stats
apply(data[selectedRows,], 2, median)
}
)
medianData <- summaryStatisticsToDataFrame(
# First row is summary stat for individual ids and needs to be removed
tail(medianData, -1),
monteCarlo = TRUE
)
return(medianData)
}

#' @title mcSampling
Expand Down Expand Up @@ -247,16 +313,16 @@ getPKRatioSummaryFromAnalyticalSolution <- function(pkData, referencePKData, sim

#' @title getPKRatioSummaryFromSettings
#' @description Get PK Parameter Ratio Measure From Monte Carlo Sampling
#' @param pkData A data.frame of PK Parameter values for Population to compare
#' @param referencePKData A data.frame of PK Parameter values for reference Population
#' @param pkData A matrix of PK Parameter values for Population to compare
#' @param referencePKData A matrix of PK Parameter values for reference Population
#' @param settings A list of task settings
#' @return A data.frame of the PK Parameter ratios summary statistics
#' @keywords internal
getPKRatioSummaryFromSettings <- function(pkData, referencePKData, settings) {
# Sample from largest population if size is different
# Create a list of Sampled PK Parameters for each MC repetition and calculate their Ratio
pkSize <- length(unique(pkData$IndividualId))
referenceSize <- length(unique(referencePKData$IndividualId))
pkSize <- nrow(pkData)
referenceSize <- nrow(referencePKData)
mcRepetitions <- settings$mcRepetitions %||% getDefaultMCRepetitions()
mcRandomSeed <- settings$mcRandomSeed %||% getDefaultMCRandomSeed()

Expand Down Expand Up @@ -285,11 +351,8 @@ getPKRatioSummaryFromSettings <- function(pkData, referencePKData, settings) {
# Parallel code requires that some objects are exported and to use dplyr package
# Re-import and rename function in this specific environment to be exported on clusters
getParallelSummaryStatistics <- getPKRatioSummaryStatistics
invisible(parallel::clusterEvalQ(cl, library(dplyr)))
invisible(parallel::clusterExport(cl, c(
"selectedIds",
"pkSize",
"referenceSize",
"pkData",
"referencePKData",
"getParallelSummaryStatistics"
Expand All @@ -302,12 +365,8 @@ getPKRatioSummaryFromSettings <- function(pkData, referencePKData, settings) {
seq_along(selectedIds),
function(repetition) {
getParallelSummaryStatistics(
pkData = pkData %>%
mutate(IndividualId = 1:pkSize, .by = c("QuantityPath", "Parameter")),
referencePKData = referencePKData %>%
slice(selectedIds[[repetition]], .by = c("QuantityPath", "Parameter")) %>%
mutate(IndividualId = 1:pkSize, .by = c("QuantityPath", "Parameter")),
simulationSetName = repetition
pkData = pkData,
referencePKData = referencePKData[selectedIds[[repetition]], ]
)
}
),
Expand All @@ -316,18 +375,13 @@ getPKRatioSummaryFromSettings <- function(pkData, referencePKData, settings) {
seq_along(selectedIds),
function(repetition) {
getParallelSummaryStatistics(
pkData %>%
slice(selectedIds[[repetition]], .by = c("QuantityPath", "Parameter")) %>%
mutate(IndividualId = 1:referenceSize, .by = c("QuantityPath", "Parameter")),
referencePKData = referencePKData %>%
mutate(IndividualId = 1:referenceSize, .by = c("QuantityPath", "Parameter")),
simulationSetName = repetition
pkData = pkData[selectedIds[[repetition]], ],
referencePKData = referencePKData
)
}
)
)
mcRatioData <- do.call("rbind", listOfMCRatioData)
return(mcRatioData)
return(getMonteCarloMedians(listOfMCRatioData))
}

if (settings$showProgress) {
Expand All @@ -342,15 +396,11 @@ getPKRatioSummaryFromSettings <- function(pkData, referencePKData, settings) {
if (settings$showProgress) {
setTxtProgressBar(loadingProgress, value = repetition)
}
# data.frame is arranged by increasing values of IndividualId
# since group_by is slow, .by is used instead
# With pivoted matrix objects, there is no need to group_by rows correspond to unique individual ids
# thus, smaller population can be used as is and wider only needs to be subset by simulated Monte Carlo rows
getPKRatioSummaryStatistics(
pkData = pkData %>%
mutate(IndividualId = 1:pkSize, .by = c("QuantityPath", "Parameter")),
referencePKData = referencePKData %>%
slice(selectedIds[[repetition]], .by = c("QuantityPath", "Parameter")) %>%
mutate(IndividualId = 1:pkSize, .by = c("QuantityPath", "Parameter")),
simulationSetName = repetition
pkData = pkData,
referencePKData = referencePKData[selectedIds[[repetition]], ]
)
}
),
Expand All @@ -362,18 +412,13 @@ getPKRatioSummaryFromSettings <- function(pkData, referencePKData, settings) {
}
# data.frame is arranged by increasing values of IndividualId
getPKRatioSummaryStatistics(
pkData %>%
slice(selectedIds[[repetition]], .by = c("QuantityPath", "Parameter")) %>%
mutate(IndividualId = 1:referenceSize, .by = c("QuantityPath", "Parameter")),
referencePKData = referencePKData %>%
mutate(IndividualId = 1:referenceSize, .by = c("QuantityPath", "Parameter")),
simulationSetName = repetition
pkData = pkData[selectedIds[[repetition]], ],
referencePKData = referencePKData
)
}
)
)
mcRatioData <- do.call("rbind", listOfMCRatioData)
return(mcRatioData)
return(getMonteCarloMedians(listOfMCRatioData))
}

#' @title getPKRatioSummaryFromMCSampling
Expand All @@ -389,20 +434,37 @@ getPKRatioSummaryFromMCSampling <- function(pkData, referencePKData, simulationS
mcRepetitions <- settings$mcRepetitions %||% getDefaultMCRepetitions()
mcRandomSeed <- settings$mcRandomSeed %||% getDefaultMCRandomSeed()
logInfo(messages$monteCarlo(simulationSetName, mcRepetitions, mcRandomSeed))
mcRatioData <- getPKRatioSummaryFromSettings(
pkData = pkData,
referencePKData = referencePKData,

# Pivot table to get fast computation of ratios and their statistics
quantityPaths <- unique(pkData$QuantityPath)
parameters <- unique(pkData$Parameter)

pkRatioSummaryGroups <- data.frame(
SimulationSetName = simulationSetName,
QuantityPath = rep(quantityPaths, each = length(parameters)),
Parameter = rep(parameters, length(quantityPaths))
)

pkData <- pkData %>%
tidyr::pivot_wider(
names_from = c(QuantityPath, Parameter),
values_from = Value
)
referencePKData <- referencePKData %>%
tidyr::pivot_wider(
names_from = c(QuantityPath, Parameter),
values_from = Value
)

pkRatioSummary <- getPKRatioSummaryFromSettings(
pkData = as.matrix(pkData),
referencePKData = as.matrix(referencePKData[,names(referencePKData) %in% names(pkData)]),
settings = settings
)
pkRatioSummary <- bind_cols(pkRatioSummaryGroups, pkRatioSummary)
logInfo(messages$runCompleted(getElapsedTime(t0), "Monte Carlo Sampling"))

# Get median statistics over all MC repetitions as a data.frame
mcRatioSummary <- mcRatioData %>%
group_by(QuantityPath, Parameter) %>%
summarise_all(median, .group = "drop_last") %>%
mutate(SimulationSetName = simulationSetName, .before = everything())

return(mcRatioSummary)
return(pkRatioSummary)
}

#' @title ratioBoxplot
Expand Down
18 changes: 18 additions & 0 deletions man/getMonteCarloMedians.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/getPKRatioSummaryFromSettings.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions man/getPKRatioSummaryStatistics.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 9e9ff2b

Please sign in to comment.