Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates to internals #53

Merged
merged 10 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ URL: https://ellessenne.github.io/rsimsum/
BugReports: https://github.com/ellessenne/rsimsum/issues
VignetteBuilder: knitr
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
LazyData: true
ByteCompile: true
Encoding: UTF-8
Expand Down
2 changes: 0 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ pre_submission_test:
R -e "devtools::check_win_oldrelease(quiet = TRUE)"
R -e "devtools::check_mac_release(quiet = TRUE)"
R -e "rhub::check_for_cran()"
R -e "rhub::check(platforms = 'macos-highsierra-release-cran')"
R -e "rhub::check(platforms = 'macos-highsierra-release')"
make style

docs:
Expand Down
8 changes: 7 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# rsimsum (development version)

* Fixed issues with nested loop plot when the simulation design is not fully-factorial (#47, thanks @mikesweeting).
* Fixed issues with nested loop plot when the simulation design is not fully-factorial (#47, thanks @mikesweeting);

* Fixed wrong calculations when the same column was used in `true` and in `by` or elsewhere (#48, thanks @mikesweeting);

* Updated columns names for confidence intervals in the `tt` dataset;

* Updated documentation regarding column names that are not allowed when calling `simsum()` or `multisimsum()`.

# rsimsum 0.12.0

Expand Down
2 changes: 1 addition & 1 deletion R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
#' @format A data frame with 4,000 rows and 8 variables:
#' * `diff` The difference in mean between groups estimated by the t-test;
#' * `se` Standard error of the estimated difference;
#' * `lower`, `upper` Confidence interval for the difference in mean as reported by the t-test;
#' * `conf.low`, `conf.high` Confidence interval for the difference in mean as reported by the t-test;
#' * `df` The number of degrees of freedom assumed by the t-test;
#' * `repno` Identifies each replication, between 1 and 500;
#' * `dgm` Identifies each data-generating mechanism: 1 corresponds to normal data with equal variance between the groups, 2 is normal data with unequal variance, 3 and 4 are skewed data (simulated from a Gamma distribution) with equal and unequal variance between groups, respectively;
Expand Down
51 changes: 11 additions & 40 deletions R/performance.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#' @keywords internal
.performance <- function(data, estvarname, se, true, empse_ref = NULL, rho = NULL, ci.limits, df, control) {
### Adjust 'true' to use internal column if not NULL
if (!is.null(true)) true <- ":true"

### Make object to return
obj <- list()

Expand All @@ -22,29 +25,17 @@
}
# Bias
if (!is.null(true)) {
if (is.character(true)) {
bias <- 1 / nsim * sum(data[[estvarname]] - data[[true]], na.rm = control$na.rm)
} else {
bias <- 1 / nsim * sum(data[[estvarname]] - true, na.rm = control$na.rm)
}
bias <- 1 / nsim * sum(data[[estvarname]] - data[[":true"]], na.rm = control$na.rm)
}
# Relative bias
if (!is.null(true)) {
if (is.character(true)) {
rbias <- 1 / nsim * sum((data[[estvarname]] - data[[true]]) / data[[true]], na.rm = control$na.rm)
} else {
rbias <- 1 / nsim * sum((data[[estvarname]] - true) / true, na.rm = control$na.rm)
}
rbias <- 1 / nsim * sum((data[[estvarname]] - data[[":true"]]) / data[[":true"]], na.rm = control$na.rm)
}
# Empirical standard error
empse <- sqrt(1 / (nsim - 1) * sum((data[[estvarname]] - mean(data[[estvarname]], na.rm = control$na.rm))^2, na.rm = control$na.rm))
# Mean squared error
if (!is.null(true)) {
if (is.character(true)) {
mse <- 1 / nsim * sum((data[[estvarname]] - data[[true]])^2, na.rm = control$na.rm)
} else {
mse <- 1 / nsim * sum((data[[estvarname]] - true)^2, na.rm = control$na.rm)
}
mse <- 1 / nsim * sum((data[[estvarname]] - data[[":true"]])^2, na.rm = control$na.rm)
}
# Relative change in precision
if (!is.null(empse_ref) & !is.null(rho)) {
Expand All @@ -68,26 +59,14 @@
# Coverage of a nominal (1 - level)% confidence interval
if (!is.null(true) & !is.null(se)) {
if (is.null(ci.limits)) {
if (is.character(true)) {
cover <- 1 / nsim * sum(data[[true]] >= data[[estvarname]] - crit * data[[se]] & data[[true]] <= data[[estvarname]] + crit * data[[se]], na.rm = control$na.rm)
} else {
cover <- 1 / nsim * sum(true >= data[[estvarname]] - crit * data[[se]] & true <= data[[estvarname]] + crit * data[[se]], na.rm = control$na.rm)
}
cover <- 1 / nsim * sum(data[[":true"]] >= data[[estvarname]] - crit * data[[se]] & data[[":true"]] <= data[[estvarname]] + crit * data[[se]], na.rm = control$na.rm)
} else {
if (is.character(ci.limits)) {
if (is.character(true)) {
cover <- 1 / nsim * sum(data[[true]] >= data[[ci.limits[1]]] & data[[true]] <= data[[ci.limits[2]]], na.rm = control$na.rm)
} else {
cover <- 1 / nsim * sum(true >= data[[ci.limits[1]]] & true <= data[[ci.limits[2]]], na.rm = control$na.rm)
}
cover <- 1 / nsim * sum(data[[":true"]] >= data[[ci.limits[1]]] & data[[":true"]] <= data[[ci.limits[2]]], na.rm = control$na.rm)
} else if (is.numeric(ci.limits)) {
data[["lower"]] <- ci.limits[1]
data[["upper"]] <- ci.limits[2]
if (is.character(true)) {
cover <- 1 / nsim * sum(data[[true]] >= data[["lower"]] & data[[true]] <= data[["upper"]], na.rm = control$na.rm)
} else {
cover <- 1 / nsim * sum(true >= data[["lower"]] & true <= data[["upper"]], na.rm = control$na.rm)
}
cover <- 1 / nsim * sum(data[[":true"]] >= data[["lower"]] & data[[":true"]] <= data[["upper"]], na.rm = control$na.rm)
}
}
}
Expand All @@ -112,20 +91,12 @@
if (control$mcse) {
if (!is.null(true)) {
bias_mcse <- sqrt(1 / (nsim * (nsim - 1)) * sum((data[[estvarname]] - mean(data[[estvarname]], na.rm = control$na.rm))^2, na.rm = control$na.rm))
if (is.character(true)) {
rbias_i <- (data[[estvarname]] - data[[true]]) / data[[true]]
} else {
rbias_i <- (data[[estvarname]] - true) / true
}
rbias_i <- (data[[estvarname]] - data[[":true"]]) / data[[":true"]]
rbias_mcse <- sd(rbias_i) / sqrt(nsim)
}
empse_mcse <- empse / sqrt(2 * (nsim - 1))
if (!is.null(true)) {
if (is.character(true)) {
mse_mcse <- sqrt(sum(((data[[estvarname]] - data[[true]])^2 - mse)^2, na.rm = control$na.rm) / (nsim * (nsim - 1)))
} else {
mse_mcse <- sqrt(sum(((data[[estvarname]] - true)^2 - mse)^2, na.rm = control$na.rm) / (nsim * (nsim - 1)))
}
mse_mcse <- sqrt(sum(((data[[estvarname]] - data[[":true"]])^2 - mse)^2, na.rm = control$na.rm) / (nsim * (nsim - 1)))
}
if (!is.null(empse_ref) & !is.null(rho)) {
relprec_mcse <- 200 * (empse_ref / empse)^2 * sqrt((1 - rho^2) / (nsim - 1))
Expand Down
50 changes: 26 additions & 24 deletions R/simsum.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,32 @@
#' @param data A `data.frame` in which variable names are interpreted.
#' It has to be in tidy format, e.g. each variable forms a column and each observation forms a row.
#' @param estvarname The name of the variable containing the point estimates.
#' Note that some column names are forbidden: these are listed below in the _Details_ section.
#' @param se The name of the variable containing the standard errors of the point estimates.
#' Note that some column names are forbidden: these are listed below in the _Details_ section.
#' @param true The true value of the parameter; this is used in calculations of bias, relative bias, coverage, and mean squared error and is required whenever these performance measures are requested.
#' `true` can be a numeric value or a string that identifies a column in `data`.
#' In the former setting, `simsum` will assume the same value for all replications; conversely, each replication will use a distinct value for `true` as identified by each row of `data`.
#' See `vignette("E-custom-inputs", package = "rsimsum")` for more details.
#' Note that some column names are forbidden: these are listed below in the _Details_ section.
#' @param methodvar The name of the variable containing the methods to compare.
#' For instance, methods could be the models compared within a simulation study.
#' Can be `NULL`.
#' If a vector of column names is passed to `simsum()`, those columns will be combined into a single column named `:methodvar` using the [base::interaction()] function before computing all performance measures.
#' Note that some column names are forbidden: these are listed below in the _Details_ section.
#' @param ref Specifies the reference method against which relative precision will be calculated.
#' Only useful if `methodvar` is specified.
#' @param by A vector of variable names to compute performance measures by a list of factors. Factors listed here are the (potentially several) data-generating mechanisms used to simulate data under different scenarios (e.g. sample size, true distribution of a variable, etc.).
#' Can be `NULL`.
#' Note that some column names are forbidden: these are listed below in the _Details_ section.
#' @param ci.limits Can be used to specify the limits (lower and upper) of confidence intervals used to calculate coverage and bias-eliminated coverage.
#' Useful for non-Wald type estimators (e.g. bootstrap).
#' Defaults to `NULL`, where Wald-type confidence intervals based on the provided SEs are calculated for coverage; otherwise, it can be a numeric vector (for fixed confidence intervals) or a vector of strings that identify columns in `data` with replication-specific lower and upper limits.
#' See `vignette("E-custom-inputs", package = "rsimsum")` for more details.
#' Note that some column names are forbidden: these are listed below in the _Details_ section.
#' @param df Can be used to specify that a column containing the replication-specific number of degrees of freedom that will be used to calculate confidence intervals for coverage (and bias-eliminated coverage) assuming t-distributed critical values (rather than normal theory intervals).
#' See `vignette("E-custom-inputs", package = "rsimsum")` for more details.
#' Note that some column names are forbidden: these are listed below in the _Details_ section.
#' @param dropbig Specifies that point estimates or standard errors beyond the maximum acceptable values should be dropped. Defaults to `FALSE`.
#' @param x Set to `TRUE` to include the `data` argument used to calculate summary statistics (i.e. after pre-processing the input dataset e.g. removing values deemed too large via the `dropbig` argument) as a slot. Calling `simsum` with `x = TRUE` is required to produce zipper plots. The downside is that the size of the returned object increases considerably, therefore it is set to `FALSE` by default.
#' @param control A list of parameters that control the behaviour of `simsum`.
Expand All @@ -43,7 +50,7 @@
#' @references Gasparini, A. 2018. rsimsum: Summarise results from Monte Carlo simulation studies. Journal of Open Source Software 3(26):739, \doi{10.21105/joss.00739}
#' @export
#' @details
#' The following names are not allowed for `estvarname`, `se`, `methodvar`, `by`: `stat`, `est`, `mcse`, `lower`, `upper`, `:methodvar`.
#' The following names are not allowed for any column in `data` that is passed to [simsum()]: `stat`, `est`, `mcse`, `lower`, `upper`, `:methodvar`, `:true`.
#'
#' @examples
#' data("MIsim", package = "rsimsum")
Expand All @@ -62,22 +69,6 @@ simsum <- function(data,
dropbig = FALSE,
x = FALSE,
control = list()) {
# data("nlp", package = "rsimsum")
# nlp.subset <- nlp %>%
# dplyr::filter(!(ss == 100 & esigma == 2))
# data <- nlp.subset
# estvarname <- "b"
# true <- 0
# se <- "se"
# methodvar <- "model"
# by <- c("baseline", "ss", "esigma")
# ci.limits <- NULL
# df <- NULL
# dropbig <- FALSE
# x <- FALSE
# control <- list()
# ref <- NULL

### Check arguments
arg_checks <- checkmate::makeAssertCollection()
# 'data' must be a data.frame
Expand Down Expand Up @@ -109,13 +100,13 @@ simsum <- function(data,
checkmate::assert_subset(x = by, choices = names(data), add = arg_checks)
checkmate::assert_subset(x = methodvar, choices = names(data), add = arg_checks)
checkmate::assert_subset(x = df, choices = names(data), add = arg_checks)
# 'estvarname', 'se', 'methodvar', 'by' , 'df' must not be any in ('stat', 'est', 'mcse', 'lower', 'upper', ':methodvar')
.private_names <- c("stat", "est", "mcse", "lower", "upper", ":methodvar")
checkmate::assert_false(x = (estvarname %in% .private_names), add = arg_checks)
if (!is.null(se)) checkmate::assert_false(x = (se %in% .private_names), add = arg_checks)
if (!is.null(methodvar)) checkmate::assert_false(x = any(methodvar %in% .private_names), add = arg_checks)
if (!is.null(by)) checkmate::assert_false(x = any(by %in% .private_names), add = arg_checks)
if (!is.null(df)) checkmate::assert_false(x = any(df %in% .private_names), add = arg_checks)
# 'estvarname', 'se', 'methodvar', 'by' , 'df' must not be any in ('stat', 'est', 'mcse', 'lower', 'upper', ':methodvar', ':true')
.private_names <- c("stat", "est", "mcse", "lower", "upper", ":methodvar", ":true")
.check_private(var = estvarname, label = "estvarname", private_names = .private_names)
.check_private(var = se, label = "se", private_names = .private_names)
.check_private(var = methodvar, label = "methodvar", private_names = .private_names)
.check_private(var = by, label = "by", private_names = .private_names)
.check_private(var = df, label = "df", private_names = .private_names)
# Process vector of 'methodvar' if a vector
user_methodvar <- NULL
if (length(methodvar) > 1) {
Expand All @@ -134,6 +125,7 @@ simsum <- function(data,
if (is.character(ci.limits)) {
checkmate::assert_character(x = ci.limits, len = 2, add = arg_checks)
checkmate::assert_true(x = all(ci.limits %in% names(data)), add = arg_checks)
lapply(X = ci.limits, FUN = function(x) .check_private(var = x, label = "ci.limits", private_names = .private_names))
}
if (is.numeric(ci.limits)) {
checkmate::assert_numeric(x = ci.limits, len = 2, add = arg_checks)
Expand Down Expand Up @@ -164,6 +156,15 @@ simsum <- function(data,
), recursive = FALSE)
control <- control.tmp

### Add hidden column with true values
if (!is.null(true)) {
if (is.character(true)) {
data[[":true"]] <- data[[true]]
} else {
data[[":true"]] <- true
}
}

### Factorise 'methodvar', 'by'
data <- .factorise(data = data, cols = c(methodvar, by))

Expand Down Expand Up @@ -256,6 +257,7 @@ simsum <- function(data,
obj$control <- control
if (x) {
obj$x <- .br(lapply(data, .br))
if (!is.null(true)) obj$x[[":true"]] <- NULL
rownames(obj$x) <- NULL
}

Expand Down
10 changes: 10 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,13 @@
data <- data[nrs > 0]
return(data)
}

### Check private names
.check_private <- function(var, label, private_names) {
if (!is.null(var)) {
if (any(var %in% private_names)) {
this <- which(var %in% private_names)
stop(paste0("'", var[this], "' is not an allowed name for '", label, "'; see help('simsum') for more details."), call. = FALSE)
}
}
}
2 changes: 2 additions & 0 deletions data-raw/tt-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ for (i in seq(B)) {
tt.df[[i]] <- out
}
tt <- do.call(rbind.data.frame, tt.df)
library(dplyr)
tt <- rename(tt, conf.low = lower, conf.high = upper)

### Export for use in the package
usethis::use_data(tt, overwrite = TRUE)
Binary file modified data/tt.rda
Binary file not shown.
2 changes: 1 addition & 1 deletion docs/articles/A-introduction.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/B-relhaz.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/C-plotting.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/D-nlp.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading