diff --git a/R/SampleSize.R b/R/SampleSize.R index 5c8cfc7..0b4932e 100644 --- a/R/SampleSize.R +++ b/R/SampleSize.R @@ -9,7 +9,7 @@ #' @param Eper Optional. Vector of length 2 specifying the period effect in a `dtype = "2x2"` design, applied to c(Period 0, Period 1). Defaults to `c(0, 0)` if not provided. Ignored for `dtype = "parallel"`. #' @param Eco Optional. Vector of length 2 specifying the carry-over effect for each arm in a `dtype = "2x2"` design, applied to c(Reference, Treatment). Defaults to `c(0, 0)` if not provided. Ignored for `dtype = "parallel"`. #' @param rho Correlation parameter applied uniformly across all endpoint pairs, used with sigma_list to calculate varcov if cor_mat or varcov_list are not provided. -#' @param TAR vector of allocation rates with allocation rates of the arm, default is equivalent rate. +#' @param TAR Numeric vector. Treatment allocation rate for each arm, where the length of `TAR` specifies the number of arms. The default is an equal allocation ratio across all arms. #' @param arm_names Optional vector with the treatment names. If not supplied, it will be derived from mu_list. #' @param ynames_list Optional list of vectors with Endpoint names on each arm. When not all endpoint names are provided for each arm, arbitrary names (assigned by vector order) are used. #' @param type_y vector with the type of endpoints: primary endpoint(1), otherwise (2). @@ -22,23 +22,22 @@ #' @param list_lequi.tol list of lower equivalence bounds (e.g., -0.5) expressed in raw scale units (e.g., scalepoints) of endpoint in comparator #' @param list_uequi.tol list of upper equivalence bounds (e.g., -0.5) expressed in raw scale units (e.g., scalepoints) of endpoint in comparator #' @param vareq Logical indicating whether variances are assumed equal across arms (default = FALSE). -#' @param dtype design type ("parallel","2x2") +#' @param dtype Character. Design type for the trial: `"parallel"` (default) for parallel group design or `"2x2"` for crossover design (applicable only for trials with 2 arms). #' @param lognorm Is data log-normally distributed? (TRUE, FALSE) #' @param k Vector with the number of endpoints that must be successful (integer) for global bioequivalence for each comparator. If no k vector is provided, it will be set to the total number of endpoints on each comparator. -#' @param adjust alpha adjustment ( "k", "bon","sid","no","seq") -#' @param ctype comparison dtype ("DOM"(Difference of means), "ROM"(Ratio of means)) +#' @param adjust Character. Method for alpha adjustment: `"k"` (K-fold), `"bon"` (Bonferroni), `"sid"` (Sidak), `"no"` (no adjustment, default), or `"seq"` (sequential adjustment). +#' @param ctype Character. Specifies the type of hypothesis test for comparison: `"DOM"` for Difference of Means or `"ROM"` for Ratio of Means. #' @param dropout vector with proportion of total population with dropout per arm -#' @param step.power initial step size is 2^step.power -#' @param step.up if TRUE steps up from 'lower', if FALSE steps down from 'upper' -#' @param pos.side if TRUE finds integer, i, closest to the root such that f(i) >0 -#' @param maxiter maximum number of iterations #' @param nsim number of simulated studies (default=5000) -#' @param lower initial value of N to be searched (default=2) -#' @param upper max value of N to be searched (default=500) #' @param seed main seed -#' @param ncores Number of processing cores for parallel computation; defaults to the total detected cores minus one. +#' @param ncores Integer. Number of processing cores to use for parallel computation. Defaults to one less than the total number of detected cores. #' @param optimization_method Character. Method for determining the required sample size: "fast" (using modified root-finding algorithms) or "step-by-step". Defaults to "fast". -#' +#' @param lower Integer. Initial value of `N` for the search range. Defaults to 2. +#' @param upper Integer. Maximum value of `N` for the search range. Defaults to 500. +#' @param step.power Numeric. The initial step size for the sample size search, defined as `2^step.power`. Relevant when `optimization_method` is `"fast"`. +#' @param step.up Logical. If `TRUE` (default), the sample size search increments upward from the `lower` limit; if `FALSE`, it decrements downward from the `upper` limit. Used only when `optimization_method` is `"fast"`. +#' @param pos.side Logical. If `TRUE`, finds the smallest integer, `i`, closest to the root such that `f(i) > 0`. Used only when `optimization_method` is `"fast"`. +#' @param maxiter Integer. Maximum number of iterations allowed for finding the sample size. Defaults to 1000. Used only when `optimization_method` is `"fast"`. #' @return An object simss that contains the following elements : #' \describe{ #' \item{"response"}{ array with the sample sizes for each arm and aproximated achieved power with confidence intervals} @@ -67,10 +66,9 @@ #' EUREF = c(AUCinf = 12332, AUClast = 9398, Cmax = 17.9), #' USREF = c(AUCinf = 10064, AUClast = 8332, Cmax = 18.8)) #' -#'# Equivalent boundaries -#'lequi.tol <- c(AUCinf = 0.8, AUClast = 0.8, Cmax = 0.8) -#'uequi.tol <- c(AUCinf = 1.25, AUClast = 1.25, Cmax = 1.25) -#' +#' # Equivalent boundaries +#' lequi.tol <- c(AUCinf = 0.8, AUClast = 0.8, Cmax = 0.8) +#' uequi.tol <- c(AUCinf = 1.25, AUClast = 1.25, Cmax = 1.25) #' #' # arms to be compared #' list_comparator <- list(EMA = c("SB2", "EUREF"), @@ -89,8 +87,8 @@ #' lognorm = TRUE, ncores = 1, nsim = 50, seed = 1234) #' #' @export -sampleSize <- function(mu_list, varcov_list=NA, sigma_list=NA, cor_mat=NA, - sigmaB =NA, Eper, Eco, rho=0, +sampleSize <- function(mu_list, varcov_list = NA, sigma_list = NA, cor_mat = NA, + sigmaB =NA, Eper, Eco, rho = 0, TAR=NA, arm_names=NA, ynames_list=NA, @@ -111,15 +109,15 @@ sampleSize <- function(mu_list, varcov_list=NA, sigma_list=NA, cor_mat=NA, adjust="no", dropout = NA, nsim=5000, - lower=2, - upper=500, seed=1234, ncores=NA, + optimization_method = "fast", + lower=2, + upper=500, step.power=6, step.up=TRUE, pos.side=FALSE, - maxiter = 1000, - optimization_method = "fast" + maxiter = 1000 ){ # Assign default values for Eper and Eco @@ -141,13 +139,6 @@ sampleSize <- function(mu_list, varcov_list=NA, sigma_list=NA, cor_mat=NA, } } - # Derive the names of the endpoints in each arm - # - # Example output for a trial with 2 treatments and 3 outcomes: - # $SB2 - # [1] "AUCinf" "AUClast" "Cmax" - # $EUREF - # [1] "AUCinf" "AUClast" "Cmax" if (any(is.na(ynames_list))) { # Try to derive the ynames from mu_list @@ -173,6 +164,9 @@ sampleSize <- function(mu_list, varcov_list=NA, sigma_list=NA, cor_mat=NA, mu_list[[i]] <- mu } + # Check the sample size limits + validate_sample_size_limits(lower = lower, upper = upper) + # Varcov specfication if (any(is.na(varcov_list))) { @@ -268,20 +262,8 @@ sampleSize <- function(mu_list, varcov_list=NA, sigma_list=NA, cor_mat=NA, len_mu <- lapply(mu_list,length) len_cvar <- lapply(varcov_list,ncol) - - # test positive defined varcov - positive <- function(x) { - resp <- base::tryCatch({matrixcalc::is.positive.semi.definite(round(x,3))}, - error = function(e) {FALSE}) - } - - - lis_pdef <- unlist(lapply(varcov_list, positive)) - - if (!all(lis_pdef)) { - stop("All 'varcov' matrices must be symmetric and positive definite.") - } + validate_positive_definite(varcov_list) # Define weights according to type of endpoint,i.e. primary, secondary @@ -596,3 +578,5 @@ sampleSize <- function(mu_list, varcov_list=NA, sigma_list=NA, cor_mat=NA, return(out) } + + diff --git a/R/validation.R b/R/validation.R new file mode 100644 index 0000000..b494670 --- /dev/null +++ b/R/validation.R @@ -0,0 +1,64 @@ +#' @title Check Sample Size Limits +#' +#' @author +#' Thomas Debray \email{tdebray@fromdatatowisdom.com} +#' +#' @description Validates that the upper and lower limits are numeric and that the upper limit is greater than the lower limit. +#' +#' @param lower Numeric. The initial lower limit for the search range. +#' @param upper Numeric. The initial upper limit for the search range. +#' +#' @return NULL. If the checks pass, the function returns nothing. If the checks fail, it stops execution with an error message. +validate_sample_size_limits <- function(lower, upper) { + + # Check if both lower and upper are numeric + if (!is.numeric(upper) || !is.numeric(lower)) { + stop("The upper and lower limits for the sample size must be numeric.") + } + + # Check if both lower and upper are integers + if (lower != as.integer(lower) || upper != as.integer(upper)) { + stop("The upper and lower limits for the sample size must be integers.") + } + + # Check lower is greater than 0 + if (lower <= 0) { + stop("The lower limit for the sample size must be greater than 0.") + } + + # Check if upper is greater than or equal to lower + if (upper < lower) { + stop("The upper limit for the sample size must be greater than or equal to the lower limit.") + } +} + + +#' @title Validate Positive Semi-Definite Matrices +#' +#' @author +#' Thomas Debray \email{tdebray@fromdatatowisdom.com} +#' +#' @description Validates that all matrices in a list are symmetric and positive semi-definite. +#' +#' @param varcov_list List of matrices. Each matrix is checked to ensure it is symmetric and positive semi-definite. +#' +#' @return NULL. If all matrices pass, the function returns nothing. If any matrix fails, it stops with an error message. +validate_positive_definite <- function(varcov_list) { + # Function to check if a matrix is positive semi-definite + is_positive <- function(x) { + resp <- tryCatch({ + matrixcalc::is.positive.semi.definite(round(x, 3)) + }, error = function(e) { + FALSE + }) + return(resp) + } + + # Apply the check to each matrix in the list + positive_definite_list <- unlist(lapply(varcov_list, is_positive)) + + # Stop if any matrix is not positive semi-definite + if (!all(positive_definite_list)) { + stop("All matrices in 'varcov_list' must be symmetric and positive semi-definite.") + } +} diff --git a/README.Rmd b/README.Rmd index e74b8fe..42aadd3 100644 --- a/README.Rmd +++ b/README.Rmd @@ -38,13 +38,9 @@ You can also install the development version of SimTOST from [GitHub](https://gi devtools::install_github("smartdata-analysis-and-statistics/SimTOST") ``` -## Example +## Vignettes -The main features of this package is `sampleSize` function which can be used to calculate sample size for individual and multiple endpoints. +The main features of this package is `sampleSize` function which can be used to calculate sample size for individual and multiple endpoints. Various worked examples are available as [vignettes](https://smartdata-analysis-and-statistics.github.io/SimTOST) -The example of using the functionality of this package can be found in these vignettes: - -1. [Sample size calculation of individual endpoint]() -2. [Sample size calculation of multiple endpoints]() diff --git a/man/print.simss.Rd b/man/print.simss.Rd index 44f49d7..d52f91d 100644 --- a/man/print.simss.Rd +++ b/man/print.simss.Rd @@ -18,5 +18,5 @@ Display the summary results of the sample size estimation This function displays the summary results of the sample size estimation. } \author{ -Thomas Debray \email{thomas.debray@gmail.com} +Thomas Debray \email{tdebray@fromdatatowisdom.com} } diff --git a/man/sampleSize.Rd b/man/sampleSize.Rd index 3cb7529..e78b61d 100644 --- a/man/sampleSize.Rd +++ b/man/sampleSize.Rd @@ -33,15 +33,15 @@ sampleSize( adjust = "no", dropout = NA, nsim = 5000, - lower = 2, - upper = 500, seed = 1234, ncores = NA, + optimization_method = "fast", + lower = 2, + upper = 500, step.power = 6, step.up = TRUE, pos.side = FALSE, - maxiter = 1000, - optimization_method = "fast" + maxiter = 1000 ) } \arguments{ @@ -61,7 +61,7 @@ sampleSize( \item{rho}{Correlation parameter applied uniformly across all endpoint pairs, used with sigma_list to calculate varcov if cor_mat or varcov_list are not provided.} -\item{TAR}{vector of allocation rates with allocation rates of the arm, default is equivalent rate.} +\item{TAR}{Numeric vector. Treatment allocation rate for each arm, where the length of \code{TAR} specifies the number of arms. The default is an equal allocation ratio across all arms.} \item{arm_names}{Optional vector with the treatment names. If not supplied, it will be derived from mu_list.} @@ -85,9 +85,9 @@ sampleSize( \item{list_uequi.tol}{list of upper equivalence bounds (e.g., -0.5) expressed in raw scale units (e.g., scalepoints) of endpoint in comparator} -\item{dtype}{design type ("parallel","2x2")} +\item{dtype}{Character. Design type for the trial: \code{"parallel"} (default) for parallel group design or \code{"2x2"} for crossover design (applicable only for trials with 2 arms).} -\item{ctype}{comparison dtype ("DOM"(Difference of means), "ROM"(Ratio of means))} +\item{ctype}{Character. Specifies the type of hypothesis test for comparison: \code{"DOM"} for Difference of Means or \code{"ROM"} for Ratio of Means.} \item{vareq}{Logical indicating whether variances are assumed equal across arms (default = FALSE).} @@ -95,29 +95,29 @@ sampleSize( \item{k}{Vector with the number of endpoints that must be successful (integer) for global bioequivalence for each comparator. If no k vector is provided, it will be set to the total number of endpoints on each comparator.} -\item{adjust}{alpha adjustment ( "k", "bon","sid","no","seq")} +\item{adjust}{Character. Method for alpha adjustment: \code{"k"} (K-fold), \code{"bon"} (Bonferroni), \code{"sid"} (Sidak), \code{"no"} (no adjustment, default), or \code{"seq"} (sequential adjustment).} \item{dropout}{vector with proportion of total population with dropout per arm} \item{nsim}{number of simulated studies (default=5000)} -\item{lower}{initial value of N to be searched (default=2)} +\item{seed}{main seed} -\item{upper}{max value of N to be searched (default=500)} +\item{ncores}{Integer. Number of processing cores to use for parallel computation. Defaults to one less than the total number of detected cores.} -\item{seed}{main seed} +\item{optimization_method}{Character. Method for determining the required sample size: "fast" (using modified root-finding algorithms) or "step-by-step". Defaults to "fast".} -\item{ncores}{Number of processing cores for parallel computation; defaults to the total detected cores minus one.} +\item{lower}{Integer. Initial value of \code{N} for the search range. Defaults to 2.} -\item{step.power}{initial step size is 2^step.power} +\item{upper}{Integer. Maximum value of \code{N} for the search range. Defaults to 500.} -\item{step.up}{if TRUE steps up from 'lower', if FALSE steps down from 'upper'} +\item{step.power}{Numeric. The initial step size for the sample size search, defined as \code{2^step.power}. Relevant when \code{optimization_method} is \code{"fast"}.} -\item{pos.side}{if TRUE finds integer, i, closest to the root such that f(i) >0} +\item{step.up}{Logical. If \code{TRUE} (default), the sample size search increments upward from the \code{lower} limit; if \code{FALSE}, it decrements downward from the \code{upper} limit. Used only when \code{optimization_method} is \code{"fast"}.} -\item{maxiter}{maximum number of iterations} +\item{pos.side}{Logical. If \code{TRUE}, finds the smallest integer, \code{i}, closest to the root such that \code{f(i) > 0}. Used only when \code{optimization_method} is \code{"fast"}.} -\item{optimization_method}{Character. Method for determining the required sample size: "fast" (using modified root-finding algorithms) or "step-by-step". Defaults to "fast".} +\item{maxiter}{Integer. Maximum number of iterations allowed for finding the sample size. Defaults to 1000. Used only when \code{optimization_method} is \code{"fast"}.} } \value{ An object simss that contains the following elements : @@ -147,7 +147,6 @@ sigma_list <- list(SB2 = c(AUCinf = 11114, AUClast = 9133, Cmax = 16.9), lequi.tol <- c(AUCinf = 0.8, AUClast = 0.8, Cmax = 0.8) uequi.tol <- c(AUCinf = 1.25, AUClast = 1.25, Cmax = 1.25) - # arms to be compared list_comparator <- list(EMA = c("SB2", "EUREF"), FDA = c("SB2", "USREF")) diff --git a/man/simParallelEndpoints.Rd b/man/simParallelEndpoints.Rd index d9b37a2..bcbf31a 100644 --- a/man/simParallelEndpoints.Rd +++ b/man/simParallelEndpoints.Rd @@ -36,5 +36,5 @@ A matrix of simulated endpoint values for a parallel design, with dimensions \co Generate simulated endpoint data for a parallel design, with options for normal and lognormal distributions. } \author{ -Thomas Debray \email{thomas.debray@gmail.com} +Thomas Debray \email{tdebray@fromdatatowisdom.com} } diff --git a/man/validate_positive_definite.Rd b/man/validate_positive_definite.Rd new file mode 100644 index 0000000..2b9a354 --- /dev/null +++ b/man/validate_positive_definite.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/validation.R +\name{validate_positive_definite} +\alias{validate_positive_definite} +\title{Validate Positive Semi-Definite Matrices} +\usage{ +validate_positive_definite(varcov_list) +} +\arguments{ +\item{varcov_list}{List of matrices. Each matrix is checked to ensure it is symmetric and positive semi-definite.} +} +\value{ +NULL. If all matrices pass, the function returns nothing. If any matrix fails, it stops with an error message. +} +\description{ +Validates that all matrices in a list are symmetric and positive semi-definite. +} diff --git a/man/validate_sample_size_limits.Rd b/man/validate_sample_size_limits.Rd new file mode 100644 index 0000000..afe6b95 --- /dev/null +++ b/man/validate_sample_size_limits.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/validation.R +\name{validate_sample_size_limits} +\alias{validate_sample_size_limits} +\title{Check Sample Size Limits} +\usage{ +validate_sample_size_limits(lower, upper) +} +\arguments{ +\item{lower}{Numeric. The initial lower limit for the search range.} + +\item{upper}{Numeric. The initial upper limit for the search range.} +} +\value{ +NULL. If the checks pass, the function returns nothing. If the checks fail, it stops execution with an error message. +} +\description{ +Validates that the upper and lower limits are numeric and that the upper limit is greater than the lower limit. +} diff --git a/vignettes/sampleSize_Mielke.Rmd b/vignettes/sampleSize_Mielke.Rmd index 71accc0..583b6e3 100644 --- a/vignettes/sampleSize_Mielke.Rmd +++ b/vignettes/sampleSize_Mielke.Rmd @@ -55,7 +55,7 @@ ss <- sampleSize(power = 0.8, alpha = 0.05, lequi.tol = rep(log(0.80), 5), uequi.tol = rep(log(1.25), 5), dtype = "parallel", ctype = "DOM", lognorm = FALSE, - adjust = "no",ncores = 1, nsim = 10000, seed = 1234) + adjust = "no", ncores = 1, nsim = 10000, seed = 1234) ss ``` @@ -121,7 +121,7 @@ ss <- sampleSize(power = 0.8, alpha = 0.05, sigma_list = list("R" = sigma, "T" = sigma), lequi.tol = rep(log(0.80), 5), uequi.tol = rep(log(1.25), 5), - dtype ="2x2", ctype = "DOM", lognorm = FALSE, adjust = "no", + dtype = "2x2", ctype = "DOM", lognorm = FALSE, adjust = "no", ncores = 1, nsim = 10000, seed = 1234) ss ``` @@ -130,21 +130,21 @@ ss In the Zarzio example, the required sample size to demonstrate equivalence for 3 out of 5 tests is as follows: -```{r, eval = FALSE} +```{r, eval = TRUE} # No adjustment -N_Mielke(power = 0.8, Nmax = 1000, m = 5, k = 3, rho = 0, sigma = sigma, - true.diff = mu, equi.tol = log(1.25), design = "22co", alpha = 0.05, - adjust = "no", nsim = 10000) +sampleSize_Mielke(power = 0.8, Nmax = 1000, m = 5, k = 3, rho = 0, + sigma = sigma, true.diff = mu, equi.tol = log(1.25), + design = "22co", alpha = 0.05, adjust = "no", nsim = 10000) # k-adjustment -N_Mielke(power = 0.8, Nmax = 1000, m = 5, k = 3, rho = 0, sigma = sigma, - true.diff = mu, equi.tol = log(1.25), design = "22co", alpha = 0.05, - adjust = "k", nsim = 10000) +sampleSize_Mielke(power = 0.8, Nmax = 1000, m = 5, k = 3, rho = 0, + sigma = sigma, true.diff = mu, equi.tol = log(1.25), + design = "22co", alpha = 0.05, adjust = "k", nsim = 10000) # Bonferroni adjustment -N_Mielke(power = 0.8, Nmax = 1000, m = 5, k = 3, rho = 0, sigma = sigma, - true.diff = mu, equi.tol = log(1.25), design = "22co", alpha = 0.05, - adjust = "bon", nsim = 10000) +sampleSize_Mielke(power = 0.8, Nmax = 1000, m = 5, k = 3, rho = 0, + sigma = sigma, true.diff = mu, equi.tol = log(1.25), + design = "22co", alpha = 0.05, adjust = "bon", nsim = 10000) ``` Without any adjustment, we find 18 subjects per study (hence 90 for the complete trial). When adopting k-adjustment, we find 22 subjects per study (hence 110 in total). Finally, when adopting Bonferroni adjustment we find 34 subjects per study (and thus 170 in total).