diff --git a/.github/workflows/check_on_branch.yml b/.github/workflows/check_on_branch.yml index bc635a0..2f09f44 100644 --- a/.github/workflows/check_on_branch.yml +++ b/.github/workflows/check_on_branch.yml @@ -17,4 +17,4 @@ jobs: permissions: contents: read steps: - - uses: inbo/actions/check_pkg@main + - uses: inbo/actions/check_pkg@checklist-0.3.6 diff --git a/.github/workflows/check_on_main.yml b/.github/workflows/check_on_main.yml index 6e8268d..d5a137b 100644 --- a/.github/workflows/check_on_main.yml +++ b/.github/workflows/check_on_main.yml @@ -16,4 +16,4 @@ jobs: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: inbo/actions/check_pkg@main + - uses: inbo/actions/check_pkg@checklist-0.3.6 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7d9b730..9b10b9f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -10,28 +10,21 @@ on: - completed jobs: - prepare: + publish: runs-on: ubuntu-latest - outputs: - tag: ${{ steps.gettag.outputs.tag }} - body: ${{ steps.gettag.outputs.body }} + permissions: + contents: write steps: - uses: actions/checkout@v3 - name: Get tag run: | git fetch --tags --force TAG=$(git tag --contains $(git rev-parse HEAD)) - echo "tag=$TAG" >> "$GITHUB_OUTPUT" - echo "body=$TAG_BODY" >> "$GITHUB_OUTPUT" - id: gettag - publish: - runs-on: ubuntu-latest - permissions: - contents: write - needs: prepare - steps: + TAG_BODY=$(git tag --contains $(git rev-parse HEAD) --format='%(contents)') + echo "tag=$TAG" >> $GITHUB_ENV + echo "$TAG_BODY" > body.md - uses: ncipollo/release-action@v1 with: - name: Release ${{needs.prepare.outputs.tag}} - tag: ${{needs.prepare.outputs.tag}} - body: ${{needs.prepare.outputs.body}} + name: Release ${{ env.tag }} + tag: ${{ env.tag }} + bodyFile: body.md diff --git a/.zenodo.json b/.zenodo.json index b365e3b..d9c67af 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -1,6 +1,6 @@ { "title": "multimput: Using Multiple Imputation to Address Missing Data", - "version": "0.2.12", + "version": "0.2.13", "license": "GPL-3.0", "upload_type": "software", "description": "
Accompanying package for the paper: Working with population totals in\nthe presence of missing data comparing imputation methods in terms of\nbias and precision. Published in 2017 in the Journal of Ornithology\nvolume 158 page 603–615 (doi:10.1007/s10336-016-1404-9).<\/p>", diff --git a/CITATION.cff b/CITATION.cff index 369fb7f..43a9cb9 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,6 +1,6 @@ cff-version: 1.2.0 message: If you use this software, please cite it using these metadata. -title: 'multimput: Using Multiple Imputation to Address Missing Data' +title: "multimput: Using Multiple Imputation to Address Missing Data" authors: - given-names: Thierry family-names: Onkelinx @@ -24,12 +24,12 @@ doi: 10.5281/zenodo.598331 license: GPL-3.0 repository-code: https://github.com/inbo/multimput/ type: software -abstract: 'Accompanying package for the paper: Working with population totals in the +abstract: "Accompanying package for the paper: Working with population totals in the presence of missing data comparing imputation methods in terms of bias and precision. - Published in 2017 in the Journal of Ornithology volume 158 page 603–615 (doi:10.1007/s10336-016-1404-9).' + Published in 2017 in the Journal of Ornithology volume 158 page 603–615 (doi:10.1007/s10336-016-1404-9)." identifiers: - type: doi value: 10.5281/zenodo.598331 - type: url value: https://inbo.github.io/multimput/ -version: 0.2.12 +version: 0.2.13 diff --git a/DESCRIPTION b/DESCRIPTION index 8b5e09b..15620e6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: multimput Title: Using Multiple Imputation to Address Missing Data -Version: 0.2.12 +Version: 0.2.13 Authors@R: c( person("Thierry", "Onkelinx", , "thierry.onkelinx@inbo.be", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-8804-4216", affiliation = "Research Institute for Nature and Forest (INBO)")), diff --git a/NEWS.md b/NEWS.md index d65a3dc..d0e4624 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,12 @@ +# multimput 0.2.13 + +* `aggregate_impute()` handles the corner case when `join` results in an empty + dataset. +* The `model_fun` argument of `model_impute()` can be either a function or a + string containing the name of a function (like `"glm"`). + Include the package name in case the function is not available in base R (like + `"INLA::inla"`). + # multimput 0.2.12 * `impute()` gains an `extra` argument. diff --git a/R/aggregate_impute.R b/R/aggregate_impute.R index d66214f..e957335 100644 --- a/R/aggregate_impute.R +++ b/R/aggregate_impute.R @@ -92,7 +92,7 @@ setMethod( assert_that(is.list(join)) assert_that( - all(sapply(join, inherits, "data.frame")), + all(vapply(join, inherits, logical(1), "data.frame")), msg = "not all objects in join are data.frames" ) for (i in seq_along(join)) { @@ -105,6 +105,27 @@ setMethod( } } + if (nrow(data) == 0) { + return( + new( + "aggregatedImputed", + Covariate = data |> + select(!!!grouping) |> + as.data.frame(), + Imputation = ncol(imputation) |> + seq_len() |> + sprintf(fmt = "Imputation%04i") |> + list() |> + c(list(character(0))) |> + rev() |> + matrix( + data = NA_real_, nrow = 0, ncol = ncol(imputation), + byrow = FALSE + ) + ) + ) + } + imputation <- imputation[na.omit(data[[id_column]]), , drop = FALSE] missing_obs <- which(is.na(data[, response])) @@ -188,6 +209,27 @@ setMethod( } } + if (nrow(data) == 0) { + return( + new( + "aggregatedImputed", + Covariate = data |> + select(!!!grouping) |> + as.data.frame(), + Imputation = imputation |> + select(starts_with("Imputation")) |> + colnames() |> + list() |> + c(list(character(0))) |> + rev() |> + matrix( + data = NA_real_, nrow = 0, ncol = ncol(imputation) - 1, + byrow = FALSE + ) + ) + ) + } + data |> inner_join(imputation, by = id_column) |> group_by(!!!grouping) |> diff --git a/R/model_impute.R b/R/model_impute.R index 092e154..4e281fe 100644 --- a/R/model_impute.R +++ b/R/model_impute.R @@ -1,6 +1,10 @@ #' Model an imputed dataset #' @param object The imputed dataset. #' @param model_fun The function to apply on each imputation set. +#' Or a string with the name of the function. +#' Include the package name when the function is not in one of the base R +#' packages. +#' For example: `"glm"` or `"INLA::inla"`. #' @param rhs The right hand side of the model. #' @param model_args An optional list of arguments to pass to the model #' function. @@ -50,7 +54,7 @@ setMethod( #' @importFrom dplyr bind_rows filter group_by mutate n row_number select #' summarise transmute ungroup #' @importFrom purrr map -#' @importFrom rlang .data !! !!! := +#' @importFrom rlang .data !! !!! := parse_expr #' @importFrom tibble rownames_to_column #' @importFrom stats as.formula qnorm var #' @examples @@ -83,6 +87,13 @@ setMethod( extractor_args = "extractor.args" ) ) + if (is.string(model_fun) && noNA(model_fun)) { + package_name <- gsub("(.*)::(.*)", "\\1", model_fun) + if (package_name != model_fun) { + stopifnot(requireNamespace(package_name, quietly = TRUE)) + } + model_fun <- eval(parse_expr(model_fun)) + } assert_that( inherits(model_fun, "function"), inherits(extractor, "function"), is.character(rhs), inherits(model_args, "list"), @@ -100,7 +111,8 @@ setMethod( do.call(what = dplyr::mutate) -> object@Covariate object@Imputation <- object@Imputation[object@Covariate[[id_column]], ] - paste("Imputed", rhs, sep = "~") |> + gsub("\\s*~", "", rhs) |> + sprintf(fmt = "Imputed ~ %s") |> as.formula() -> form m <- lapply( seq_len(ncol(object@Imputation)), @@ -111,12 +123,13 @@ setMethod( silent = TRUE ) if (inherits(model, "try-error")) { - NULL - } else { - do.call(extractor, c(list(model), extractor_args)) |> - as.data.frame() |> - rownames_to_column("Variable") + return(NULL) } + list(model) |> + c(extractor_args) |> + do.call(what = extractor) |> + as.data.frame() |> + rownames_to_column("Variable") } ) failed <- vapply(m, is.null, logical(1)) diff --git a/inst/CITATION b/inst/CITATION index af95722..c7186af 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -2,12 +2,12 @@ citHeader("To cite `multimput` in publications please use:") # begin checklist entry bibentry( bibtype = "Manual", - title = "multimput: Using Multiple Imputation to Address Missing Data. Version 0.2.12", + title = "multimput: Using Multiple Imputation to Address Missing Data. Version 0.2.13", author = c( author = c(person(given = "Thierry", family = "Onkelinx"), person(given = "Koen", family = "Devos"), person(given = "Paul", family = "Quataert"))), year = 2023, url = "https://inbo.github.io/multimput/", abstract = "Accompanying package for the paper: Working with population totals in the presence of missing data comparing imputation methods in terms of bias and precision. Published in 2017 in the Journal of Ornithology volume 158 page 603–615 (doi:10.1007/s10336-016-1404-9).", - textVersion = "Onkelinx, Thierry; Devos, Koen; Quataert, Paul (2023) multimput: Using Multiple Imputation to Address Missing Data. Version 0.2.12. https://inbo.github.io/multimput/", + textVersion = "Onkelinx, Thierry; Devos, Koen; Quataert, Paul (2023) multimput: Using Multiple Imputation to Address Missing Data. Version 0.2.13. https://inbo.github.io/multimput/", keywords = "missing data, multiple imputation, Rubin", doi = "10.5281/zenodo.598331", ) diff --git a/man/model_impute.Rd b/man/model_impute.Rd index 090a026..296d26f 100644 --- a/man/model_impute.Rd +++ b/man/model_impute.Rd @@ -46,7 +46,11 @@ model_impute( \arguments{ \item{object}{The imputed dataset.} -\item{model_fun}{The function to apply on each imputation set.} +\item{model_fun}{The function to apply on each imputation set. +Or a string with the name of the function. +Include the package name when the function is not in one of the base R +packages. +For example: \code{"glm"} or \code{"INLA::inla"}.} \item{rhs}{The right hand side of the model.} diff --git a/tests/testthat/test_bbb_aggregate_impute.R b/tests/testthat/test_bbb_aggregate_impute.R index cc463fc..117cd50 100644 --- a/tests/testthat/test_bbb_aggregate_impute.R +++ b/tests/testthat/test_bbb_aggregate_impute.R @@ -96,6 +96,14 @@ test_that("aggregate_impute", { ), "aggregatedImputed" ) + + # handles empty datasets + empty_imputed <- impute(data = dataset[integer(0), ], model = model) + empty_aggr <- aggregate_impute(empty_imputed, grouping = grouping, fun = fun) + expect_is(empty_aggr, "aggregatedImputed") + expect_identical(colnames(empty_aggr@Covariate), grouping) + expect_identical(nrow(empty_aggr@Covariate), nrow(empty_aggr@Imputation)) + expect_identical(ncol(empty_imputed@Imputation), ncol(empty_aggr@Imputation)) }) test_that("aggregate_impute() works on aggregatedImputed objects", { @@ -112,4 +120,13 @@ test_that("aggregate_impute() works on aggregatedImputed objects", { aggr2 <- aggregate_impute(aggr, grouping = grouping2, fun = sum), "aggregatedImputed" ) + + # handles empty datasets + empty_imputed <- impute(data = dataset[integer(0), ], model = model) + empty_aggr <- aggregate_impute(empty_imputed, grouping = grouping, fun = fun) + empty_aggr2 <- aggregate_impute(empty_aggr, grouping = grouping2, fun = fun) + expect_is(empty_aggr2, "aggregatedImputed") + expect_identical(colnames(empty_aggr2@Covariate), grouping2) + expect_identical(nrow(empty_aggr2@Covariate), nrow(empty_aggr2@Imputation)) + expect_identical(ncol(empty_aggr@Imputation), ncol(empty_aggr2@Imputation)) }) diff --git a/tests/testthat/test_ccc_model_impute.R b/tests/testthat/test_ccc_model_impute.R index 0830934..f34081b 100644 --- a/tests/testthat/test_ccc_model_impute.R +++ b/tests/testthat/test_ccc_model_impute.R @@ -1,125 +1,87 @@ -context("model_impute") -describe("model_impute", { - dataset <- generate_data(n_year = 10, n_site = 10, n_run = 1) - it("has no effect when there are no missing values", { - model <- lm(Count ~ Year + factor(Period) + factor(Site), data = dataset) - imputed <- impute(data = dataset, model = model) - aggr <- aggregate_impute(imputed, grouping = c("Year", "Period"), fun = sum) - extractor <- function(model) { - summary(model)$coefficients[, c("Estimate", "Std. Error")] - } - model_aggr <- model_impute( - aggr, - model_fun = lm, - rhs = "0 + factor(Year)", - extractor = extractor - ) - aggr_base <- aggregate(Count ~ Year + Period, data = dataset, FUN = sum) - model_base <- lm(Count ~ 0 + factor(Year), data = aggr_base) - expect_equal( - unname(as.matrix(model_aggr[, 2:3])), - unname(extractor(model_base)) - ) - }) +dataset <- generate_data(n_year = 10, n_site = 10, n_run = 1) +test_that("model_impute has no effect when there are no missing values", { + model <- lm(Count ~ Year + factor(Period) + factor(Site), data = dataset) + imputed <- impute(data = dataset, model = model) + aggr <- aggregate_impute(imputed, grouping = c("Year", "Period"), fun = sum) + extractor <- function(model) { + summary(model)$coefficients[, c("Estimate", "Std. Error")] + } + model_aggr <- model_impute( + aggr, model_fun = lm, rhs = "0 + factor(Year)", extractor = extractor + ) + aggr_base <- aggregate(Count ~ Year + Period, data = dataset, FUN = sum) + model_base <- lm(Count ~ 0 + factor(Year), data = aggr_base) + expect_equal( + unname(as.matrix(model_aggr[, 2:3])), unname(extractor(model_base)) + ) +}) +test_that("model_impute, handles rawImputed", { + dataset$Count[sample(nrow(dataset), 10)] <- NA + model <- lm(Count ~ Year + factor(Period) + factor(Site), data = dataset) + imputed <- impute(data = dataset, model = model) + aggr <- aggregate_impute(imputed, grouping = c("Year", "Period"), fun = sum) + extractor <- function(model) { + summary(model)$coefficients[, c("Estimate", "Std. Error")] + } + expect_is( + model_imp <- model_impute( + aggr, model_fun = lm, rhs = "0 + factor(Year)", extractor = extractor + ), + "data.frame" + ) + expect_identical( + colnames(model_imp), c("Parameter", "Estimate", "SE", "LCL", "UCL") + ) +}) +test_that("model_impute checks the sanity of the arguments", { + expect_error( + model_impute(object = "junk"), "doesn't handle a 'character' object" + ) + dataset <- generate_data(n_year = 10, n_site = 10, n_run = 1) dataset$Count[sample(nrow(dataset), 10)] <- NA model <- lm(Count ~ Year + factor(Period) + factor(Site), data = dataset) imputed <- impute(data = dataset, model = model) aggr <- aggregate_impute(imputed, grouping = c("Year", "Period"), fun = sum) + expect_error( + model_impute(aggr, model_fun = list("junk"), rhs = "0 + factor(Year)"), + "model_fun does not inherit from class function" + ) + expect_error( + model_impute( + aggr, model_fun = lm, rhs = "0 + factor(Year)", extractor = "junk" + ), + "extractor does not inherit from class function" + ) extractor <- function(model) { summary(model)$coefficients[, c("Estimate", "Std. Error")] } - it("handles rawImputed", { - expect_is( - model_imp <- model_impute( - aggr, - model_fun = lm, - rhs = "0 + factor(Year)", - extractor = extractor - ), - "data.frame" - ) - expect_identical( - colnames(model_imp), - c("Parameter", "Estimate", "SE", "LCL", "UCL") - ) - }) - it("checks the sanity of the arguments", { - expect_error( - model_impute(object = "junk"), - "doesn't handle a 'character' object" - ) - dataset <- generate_data(n_year = 10, n_site = 10, n_run = 1) - dataset$Count[sample(nrow(dataset), 10)] <- NA - model <- lm(Count ~ Year + factor(Period) + factor(Site), data = dataset) - imputed <- impute(data = dataset, model = model) - aggr <- aggregate_impute(imputed, grouping = c("Year", "Period"), fun = sum) - expect_error( - model_impute( - aggr, - model_fun = "junk", - rhs = "0 + factor(Year)" - ), - "model_fun does not inherit from class function" - ) - expect_error( - model_impute( - aggr, - model_fun = lm, - rhs = "0 + factor(Year)", - extractor = "junk" - ), - "extractor does not inherit from class function" - ) - extractor <- function(model) { - summary(model)$coefficients[, c("Estimate", "Std. Error")] - } - expect_error( - model_impute( - aggr, - model_fun = lm, - rhs = "0 + factor(Year)", - model_args = "junk", - extractor = extractor - ), - "model_args does not inherit from class list" - ) - expect_error( - model_impute( - aggr, - model_fun = lm, - rhs = "0 + factor(Year)", - extractor_args = "junk", - extractor = extractor - ), - "extractor_args does not inherit from class list" - ) - expect_error( - model_impute( - aggr, - model_fun = lm, - rhs = NA, - extractor = extractor - ), - "rhs is not a character vector" - ) - expect_error( - model_impute( - aggr, - model_fun = lm, - rhs = ~factor(Year), - extractor = extractor - ), - "rhs is not a character vector" - ) - expect_error( - model_impute( - aggr, - model_fun = lm, - rhs = "junk", - extractor = extractor - ), - "model failed on all imputations" - ) - }) + expect_error( + model_impute( + aggr, model_fun = lm, rhs = "0 + factor(Year)", model_args = "junk", + extractor = extractor + ), + "model_args does not inherit from class list" + ) + expect_error( + model_impute( + aggr, model_fun = lm, rhs = "0 + factor(Year)", extractor_args = "junk", + extractor = extractor + ), + "extractor_args does not inherit from class list" + ) + expect_error( + model_impute(aggr, model_fun = lm, rhs = NA, extractor = extractor), + "rhs is not a character vector" + ) + expect_error( + model_impute( + aggr, model_fun = lm, rhs = ~factor(Year), extractor = extractor + ), + "rhs is not a character vector" + ) + expect_error( + model_impute(aggr, model_fun = lm, rhs = "junk", extractor = extractor), + "model failed on all imputations" + ) })