Skip to content

Commit

Permalink
Updates to internals (#53)
Browse files Browse the repository at this point in the history
* Improved error messages when users provide private names

* Renamed columns in 'tt' data

* Adjusted names in tests

* Improved internals to deal with same column in true and elsewhere

* Updated NEWS

* Updates to testing code

* Testing platforms

* Updated {roxygen2}

* Improved error message when private names are used
  • Loading branch information
ellessenne authored Feb 29, 2024
1 parent 070526a commit ef59fe9
Show file tree
Hide file tree
Showing 30 changed files with 207 additions and 171 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ URL: https://ellessenne.github.io/rsimsum/
BugReports: https://github.com/ellessenne/rsimsum/issues
VignetteBuilder: knitr
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
LazyData: true
ByteCompile: true
Encoding: UTF-8
Expand Down
2 changes: 0 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ pre_submission_test:
R -e "devtools::check_win_oldrelease(quiet = TRUE)"
R -e "devtools::check_mac_release(quiet = TRUE)"
R -e "rhub::check_for_cran()"
R -e "rhub::check(platforms = 'macos-highsierra-release-cran')"
R -e "rhub::check(platforms = 'macos-highsierra-release')"
make style

docs:
Expand Down
8 changes: 7 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# rsimsum (development version)

* Fixed issues with nested loop plot when the simulation design is not fully-factorial (#47, thanks @mikesweeting).
* Fixed issues with nested loop plot when the simulation design is not fully-factorial (#47, thanks @mikesweeting);

* Fixed wrong calculations when the same column was used in `true` and in `by` or elsewhere (#48, thanks @mikesweeting);

* Updated columns names for confidence intervals in the `tt` dataset;

* Updated documentation regarding column names that are not allowed when calling `simsum()` or `multisimsum()`.

# rsimsum 0.12.0

Expand Down
2 changes: 1 addition & 1 deletion R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
#' @format A data frame with 4,000 rows and 8 variables:
#' * `diff` The difference in mean between groups estimated by the t-test;
#' * `se` Standard error of the estimated difference;
#' * `lower`, `upper` Confidence interval for the difference in mean as reported by the t-test;
#' * `conf.low`, `conf.high` Confidence interval for the difference in mean as reported by the t-test;
#' * `df` The number of degrees of freedom assumed by the t-test;
#' * `repno` Identifies each replication, between 1 and 500;
#' * `dgm` Identifies each data-generating mechanism: 1 corresponds to normal data with equal variance between the groups, 2 is normal data with unequal variance, 3 and 4 are skewed data (simulated from a Gamma distribution) with equal and unequal variance between groups, respectively;
Expand Down
51 changes: 11 additions & 40 deletions R/performance.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#' @keywords internal
.performance <- function(data, estvarname, se, true, empse_ref = NULL, rho = NULL, ci.limits, df, control) {
### Adjust 'true' to use internal column if not NULL
if (!is.null(true)) true <- ":true"

### Make object to return
obj <- list()

Expand All @@ -22,29 +25,17 @@
}
# Bias
if (!is.null(true)) {
if (is.character(true)) {
bias <- 1 / nsim * sum(data[[estvarname]] - data[[true]], na.rm = control$na.rm)
} else {
bias <- 1 / nsim * sum(data[[estvarname]] - true, na.rm = control$na.rm)
}
bias <- 1 / nsim * sum(data[[estvarname]] - data[[":true"]], na.rm = control$na.rm)
}
# Relative bias
if (!is.null(true)) {
if (is.character(true)) {
rbias <- 1 / nsim * sum((data[[estvarname]] - data[[true]]) / data[[true]], na.rm = control$na.rm)
} else {
rbias <- 1 / nsim * sum((data[[estvarname]] - true) / true, na.rm = control$na.rm)
}
rbias <- 1 / nsim * sum((data[[estvarname]] - data[[":true"]]) / data[[":true"]], na.rm = control$na.rm)
}
# Empirical standard error
empse <- sqrt(1 / (nsim - 1) * sum((data[[estvarname]] - mean(data[[estvarname]], na.rm = control$na.rm))^2, na.rm = control$na.rm))
# Mean squared error
if (!is.null(true)) {
if (is.character(true)) {
mse <- 1 / nsim * sum((data[[estvarname]] - data[[true]])^2, na.rm = control$na.rm)
} else {
mse <- 1 / nsim * sum((data[[estvarname]] - true)^2, na.rm = control$na.rm)
}
mse <- 1 / nsim * sum((data[[estvarname]] - data[[":true"]])^2, na.rm = control$na.rm)
}
# Relative change in precision
if (!is.null(empse_ref) & !is.null(rho)) {
Expand All @@ -68,26 +59,14 @@
# Coverage of a nominal (1 - level)% confidence interval
if (!is.null(true) & !is.null(se)) {
if (is.null(ci.limits)) {
if (is.character(true)) {
cover <- 1 / nsim * sum(data[[true]] >= data[[estvarname]] - crit * data[[se]] & data[[true]] <= data[[estvarname]] + crit * data[[se]], na.rm = control$na.rm)
} else {
cover <- 1 / nsim * sum(true >= data[[estvarname]] - crit * data[[se]] & true <= data[[estvarname]] + crit * data[[se]], na.rm = control$na.rm)
}
cover <- 1 / nsim * sum(data[[":true"]] >= data[[estvarname]] - crit * data[[se]] & data[[":true"]] <= data[[estvarname]] + crit * data[[se]], na.rm = control$na.rm)
} else {
if (is.character(ci.limits)) {
if (is.character(true)) {
cover <- 1 / nsim * sum(data[[true]] >= data[[ci.limits[1]]] & data[[true]] <= data[[ci.limits[2]]], na.rm = control$na.rm)
} else {
cover <- 1 / nsim * sum(true >= data[[ci.limits[1]]] & true <= data[[ci.limits[2]]], na.rm = control$na.rm)
}
cover <- 1 / nsim * sum(data[[":true"]] >= data[[ci.limits[1]]] & data[[":true"]] <= data[[ci.limits[2]]], na.rm = control$na.rm)
} else if (is.numeric(ci.limits)) {
data[["lower"]] <- ci.limits[1]
data[["upper"]] <- ci.limits[2]
if (is.character(true)) {
cover <- 1 / nsim * sum(data[[true]] >= data[["lower"]] & data[[true]] <= data[["upper"]], na.rm = control$na.rm)
} else {
cover <- 1 / nsim * sum(true >= data[["lower"]] & true <= data[["upper"]], na.rm = control$na.rm)
}
cover <- 1 / nsim * sum(data[[":true"]] >= data[["lower"]] & data[[":true"]] <= data[["upper"]], na.rm = control$na.rm)
}
}
}
Expand All @@ -112,20 +91,12 @@
if (control$mcse) {
if (!is.null(true)) {
bias_mcse <- sqrt(1 / (nsim * (nsim - 1)) * sum((data[[estvarname]] - mean(data[[estvarname]], na.rm = control$na.rm))^2, na.rm = control$na.rm))
if (is.character(true)) {
rbias_i <- (data[[estvarname]] - data[[true]]) / data[[true]]
} else {
rbias_i <- (data[[estvarname]] - true) / true
}
rbias_i <- (data[[estvarname]] - data[[":true"]]) / data[[":true"]]
rbias_mcse <- sd(rbias_i) / sqrt(nsim)
}
empse_mcse <- empse / sqrt(2 * (nsim - 1))
if (!is.null(true)) {
if (is.character(true)) {
mse_mcse <- sqrt(sum(((data[[estvarname]] - data[[true]])^2 - mse)^2, na.rm = control$na.rm) / (nsim * (nsim - 1)))
} else {
mse_mcse <- sqrt(sum(((data[[estvarname]] - true)^2 - mse)^2, na.rm = control$na.rm) / (nsim * (nsim - 1)))
}
mse_mcse <- sqrt(sum(((data[[estvarname]] - data[[":true"]])^2 - mse)^2, na.rm = control$na.rm) / (nsim * (nsim - 1)))
}
if (!is.null(empse_ref) & !is.null(rho)) {
relprec_mcse <- 200 * (empse_ref / empse)^2 * sqrt((1 - rho^2) / (nsim - 1))
Expand Down
50 changes: 26 additions & 24 deletions R/simsum.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,32 @@
#' @param data A `data.frame` in which variable names are interpreted.
#' It has to be in tidy format, e.g. each variable forms a column and each observation forms a row.
#' @param estvarname The name of the variable containing the point estimates.
#' Note that some column names are forbidden: these are listed below in the _Details_ section.
#' @param se The name of the variable containing the standard errors of the point estimates.
#' Note that some column names are forbidden: these are listed below in the _Details_ section.
#' @param true The true value of the parameter; this is used in calculations of bias, relative bias, coverage, and mean squared error and is required whenever these performance measures are requested.
#' `true` can be a numeric value or a string that identifies a column in `data`.
#' In the former setting, `simsum` will assume the same value for all replications; conversely, each replication will use a distinct value for `true` as identified by each row of `data`.
#' See `vignette("E-custom-inputs", package = "rsimsum")` for more details.
#' Note that some column names are forbidden: these are listed below in the _Details_ section.
#' @param methodvar The name of the variable containing the methods to compare.
#' For instance, methods could be the models compared within a simulation study.
#' Can be `NULL`.
#' If a vector of column names is passed to `simsum()`, those columns will be combined into a single column named `:methodvar` using the [base::interaction()] function before computing all performance measures.
#' Note that some column names are forbidden: these are listed below in the _Details_ section.
#' @param ref Specifies the reference method against which relative precision will be calculated.
#' Only useful if `methodvar` is specified.
#' @param by A vector of variable names to compute performance measures by a list of factors. Factors listed here are the (potentially several) data-generating mechanisms used to simulate data under different scenarios (e.g. sample size, true distribution of a variable, etc.).
#' Can be `NULL`.
#' Note that some column names are forbidden: these are listed below in the _Details_ section.
#' @param ci.limits Can be used to specify the limits (lower and upper) of confidence intervals used to calculate coverage and bias-eliminated coverage.
#' Useful for non-Wald type estimators (e.g. bootstrap).
#' Defaults to `NULL`, where Wald-type confidence intervals based on the provided SEs are calculated for coverage; otherwise, it can be a numeric vector (for fixed confidence intervals) or a vector of strings that identify columns in `data` with replication-specific lower and upper limits.
#' See `vignette("E-custom-inputs", package = "rsimsum")` for more details.
#' Note that some column names are forbidden: these are listed below in the _Details_ section.
#' @param df Can be used to specify that a column containing the replication-specific number of degrees of freedom that will be used to calculate confidence intervals for coverage (and bias-eliminated coverage) assuming t-distributed critical values (rather than normal theory intervals).
#' See `vignette("E-custom-inputs", package = "rsimsum")` for more details.
#' Note that some column names are forbidden: these are listed below in the _Details_ section.
#' @param dropbig Specifies that point estimates or standard errors beyond the maximum acceptable values should be dropped. Defaults to `FALSE`.
#' @param x Set to `TRUE` to include the `data` argument used to calculate summary statistics (i.e. after pre-processing the input dataset e.g. removing values deemed too large via the `dropbig` argument) as a slot. Calling `simsum` with `x = TRUE` is required to produce zipper plots. The downside is that the size of the returned object increases considerably, therefore it is set to `FALSE` by default.
#' @param control A list of parameters that control the behaviour of `simsum`.
Expand All @@ -43,7 +50,7 @@
#' @references Gasparini, A. 2018. rsimsum: Summarise results from Monte Carlo simulation studies. Journal of Open Source Software 3(26):739, \doi{10.21105/joss.00739}
#' @export
#' @details
#' The following names are not allowed for `estvarname`, `se`, `methodvar`, `by`: `stat`, `est`, `mcse`, `lower`, `upper`, `:methodvar`.
#' The following names are not allowed for any column in `data` that is passed to [simsum()]: `stat`, `est`, `mcse`, `lower`, `upper`, `:methodvar`, `:true`.
#'
#' @examples
#' data("MIsim", package = "rsimsum")
Expand All @@ -62,22 +69,6 @@ simsum <- function(data,
dropbig = FALSE,
x = FALSE,
control = list()) {
# data("nlp", package = "rsimsum")
# nlp.subset <- nlp %>%
# dplyr::filter(!(ss == 100 & esigma == 2))
# data <- nlp.subset
# estvarname <- "b"
# true <- 0
# se <- "se"
# methodvar <- "model"
# by <- c("baseline", "ss", "esigma")
# ci.limits <- NULL
# df <- NULL
# dropbig <- FALSE
# x <- FALSE
# control <- list()
# ref <- NULL

### Check arguments
arg_checks <- checkmate::makeAssertCollection()
# 'data' must be a data.frame
Expand Down Expand Up @@ -109,13 +100,13 @@ simsum <- function(data,
checkmate::assert_subset(x = by, choices = names(data), add = arg_checks)
checkmate::assert_subset(x = methodvar, choices = names(data), add = arg_checks)
checkmate::assert_subset(x = df, choices = names(data), add = arg_checks)
# 'estvarname', 'se', 'methodvar', 'by' , 'df' must not be any in ('stat', 'est', 'mcse', 'lower', 'upper', ':methodvar')
.private_names <- c("stat", "est", "mcse", "lower", "upper", ":methodvar")
checkmate::assert_false(x = (estvarname %in% .private_names), add = arg_checks)
if (!is.null(se)) checkmate::assert_false(x = (se %in% .private_names), add = arg_checks)
if (!is.null(methodvar)) checkmate::assert_false(x = any(methodvar %in% .private_names), add = arg_checks)
if (!is.null(by)) checkmate::assert_false(x = any(by %in% .private_names), add = arg_checks)
if (!is.null(df)) checkmate::assert_false(x = any(df %in% .private_names), add = arg_checks)
# 'estvarname', 'se', 'methodvar', 'by' , 'df' must not be any in ('stat', 'est', 'mcse', 'lower', 'upper', ':methodvar', ':true')
.private_names <- c("stat", "est", "mcse", "lower", "upper", ":methodvar", ":true")
.check_private(var = estvarname, label = "estvarname", private_names = .private_names)
.check_private(var = se, label = "se", private_names = .private_names)
.check_private(var = methodvar, label = "methodvar", private_names = .private_names)
.check_private(var = by, label = "by", private_names = .private_names)
.check_private(var = df, label = "df", private_names = .private_names)
# Process vector of 'methodvar' if a vector
user_methodvar <- NULL
if (length(methodvar) > 1) {
Expand All @@ -134,6 +125,7 @@ simsum <- function(data,
if (is.character(ci.limits)) {
checkmate::assert_character(x = ci.limits, len = 2, add = arg_checks)
checkmate::assert_true(x = all(ci.limits %in% names(data)), add = arg_checks)
lapply(X = ci.limits, FUN = function(x) .check_private(var = x, label = "ci.limits", private_names = .private_names))
}
if (is.numeric(ci.limits)) {
checkmate::assert_numeric(x = ci.limits, len = 2, add = arg_checks)
Expand Down Expand Up @@ -164,6 +156,15 @@ simsum <- function(data,
), recursive = FALSE)
control <- control.tmp

### Add hidden column with true values
if (!is.null(true)) {
if (is.character(true)) {
data[[":true"]] <- data[[true]]
} else {
data[[":true"]] <- true
}
}

### Factorise 'methodvar', 'by'
data <- .factorise(data = data, cols = c(methodvar, by))

Expand Down Expand Up @@ -256,6 +257,7 @@ simsum <- function(data,
obj$control <- control
if (x) {
obj$x <- .br(lapply(data, .br))
if (!is.null(true)) obj$x[[":true"]] <- NULL
rownames(obj$x) <- NULL
}

Expand Down
10 changes: 10 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,13 @@
data <- data[nrs > 0]
return(data)
}

### Check private names
.check_private <- function(var, label, private_names) {
if (!is.null(var)) {
if (any(var %in% private_names)) {
this <- which(var %in% private_names)
stop(paste0("'", var[this], "' is not an allowed name for '", label, "'; see help('simsum') for more details."), call. = FALSE)
}
}
}
2 changes: 2 additions & 0 deletions data-raw/tt-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ for (i in seq(B)) {
tt.df[[i]] <- out
}
tt <- do.call(rbind.data.frame, tt.df)
library(dplyr)
tt <- rename(tt, conf.low = lower, conf.high = upper)

### Export for use in the package
usethis::use_data(tt, overwrite = TRUE)
Binary file modified data/tt.rda
Binary file not shown.
2 changes: 1 addition & 1 deletion docs/articles/A-introduction.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/B-relhaz.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/C-plotting.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/D-nlp.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit ef59fe9

Please sign in to comment.