From f94c5b1808800ff1e44bd021e11d2b8e0555e435 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 24 Nov 2024 13:00:33 +0100 Subject: [PATCH 1/3] Harnonize arguments --- R/check_model.R | 80 ++++++++++++++--------------- man/check_model.Rd | 16 +++--- vignettes/check_model_practical.Rmd | 12 ++--- vignettes/simulate_residuals.Rmd | 8 +-- 4 files changed, 58 insertions(+), 58 deletions(-) diff --git a/R/check_model.R b/R/check_model.R index baa224feb..576f483e4 100644 --- a/R/check_model.R +++ b/R/check_model.R @@ -10,8 +10,8 @@ #' get hints about possible problems. #' #' @param x A model object. -#' @param dot_size,line_size Size of line and dot-geoms. -#' @param base_size,title_size,axis_title_size Base font size for axis and plot titles. +#' @param size_dot,size_line Size of line and dot-geoms. +#' @param base_size,size_title,size_axis_title Base font size for axis and plot titles. #' @param panel Logical, if `TRUE`, plots are arranged as panels; else, #' single plots for each diagnostic are returned. #' @param check Character vector, indicating which checks for should be performed @@ -23,7 +23,7 @@ #' `"linearity"`, and checks for non-constant variance, i.e. for #' heteroscedasticity, as well as the linear relationship. By default, all #' possible checks are performed and plotted. -#' @param alpha,dot_alpha The alpha level of the confidence bands and dot-geoms. +#' @param alpha,alpha_dot The alpha level of the confidence bands and dot-geoms. #' Scalar from 0 to 1. #' @param colors Character vector with color codes (hex-format). Must be of #' length 3. First color is usually used for reference lines, second color @@ -196,13 +196,13 @@ check_model.default <- function(x, type = "density", residual_type = NULL, show_dots = NULL, - dot_size = 2, - line_size = 0.8, - title_size = 12, - axis_title_size = base_size, + size_dot = 2, + size_line = 0.8, + size_title = 12, + size_axis_title = base_size, base_size = 10, alpha = 0.2, - dot_alpha = 0.8, + alpha_dot = 0.8, colors = c("#3aaf85", "#1b6ca8", "#cd201f"), theme = "see::theme_lucid", verbose = FALSE, @@ -277,14 +277,14 @@ check_model.default <- function(x, } attr(assumptions_data, "panel") <- panel - attr(assumptions_data, "dot_size") <- dot_size - attr(assumptions_data, "line_size") <- line_size + attr(assumptions_data, "dot_size") <- size_dot + attr(assumptions_data, "line_size") <- size_line attr(assumptions_data, "base_size") <- base_size - attr(assumptions_data, "axis_title_size") <- axis_title_size - attr(assumptions_data, "title_size") <- title_size + attr(assumptions_data, "axis_title_size") <- size_axis_title + attr(assumptions_data, "title_size") <- size_title attr(assumptions_data, "check") <- check attr(assumptions_data, "alpha") <- alpha - attr(assumptions_data, "dot_alpha") <- dot_alpha + attr(assumptions_data, "dot_alpha") <- alpha_dot attr(assumptions_data, "show_dots") <- isTRUE(show_dots) attr(assumptions_data, "detrend") <- detrend attr(assumptions_data, "colors") <- colors @@ -328,28 +328,28 @@ check_model.stanreg <- function(x, type = "density", residual_type = NULL, show_dots = NULL, - dot_size = 2, - line_size = 0.8, - title_size = 12, - axis_title_size = base_size, + size_dot = 2, + size_line = 0.8, + size_title = 12, + size_axis_title = base_size, base_size = 10, alpha = 0.2, - dot_alpha = 0.8, + alpha_dot = 0.8, colors = c("#3aaf85", "#1b6ca8", "#cd201f"), theme = "see::theme_lucid", verbose = FALSE, ...) { check_model(bayestestR::bayesian_as_frequentist(x), - dot_size = dot_size, - line_size = line_size, + size_dot = size_dot, + size_line = size_line, panel = panel, check = check, alpha = alpha, - dot_alpha = dot_alpha, + alpha_dot = alpha_dot, colors = colors, theme = theme, base_size = base_size, - axis_title_size = axis_title_size, + size_axis_title = size_axis_title, detrend = detrend, show_dots = show_dots, bandwidth = bandwidth, @@ -374,26 +374,26 @@ check_model.model_fit <- function(x, type = "density", residual_type = NULL, show_dots = NULL, - dot_size = 2, - line_size = 0.8, - title_size = 12, - axis_title_size = base_size, + size_dot = 2, + size_line = 0.8, + size_title = 12, + size_axis_title = base_size, base_size = 10, alpha = 0.2, - dot_alpha = 0.8, + alpha_dot = 0.8, colors = c("#3aaf85", "#1b6ca8", "#cd201f"), theme = "see::theme_lucid", verbose = FALSE, ...) { check_model( x$fit, - dot_size = dot_size, - line_size = line_size, + size_dot = size_dot, + size_line = size_line, panel = panel, check = check, alpha = alpha, - axis_title_size = axis_title_size, - dot_alpha = dot_alpha, + size_axis_title = size_axis_title, + alpha_dot = alpha_dot, colors = colors, theme = theme, base_size = base_size, @@ -417,26 +417,26 @@ check_model.performance_simres <- function(x, type = "density", residual_type = NULL, show_dots = NULL, - dot_size = 2, - line_size = 0.8, - title_size = 12, - axis_title_size = base_size, + size_dot = 2, + size_line = 0.8, + size_title = 12, + size_axis_title = base_size, base_size = 10, alpha = 0.2, - dot_alpha = 0.8, + alpha_dot = 0.8, colors = c("#3aaf85", "#1b6ca8", "#cd201f"), theme = "see::theme_lucid", verbose = FALSE, ...) { check_model( x$fittedModel, - dot_size = dot_size, - line_size = line_size, + size_dot = size_dot, + size_line = size_line, panel = panel, check = check, alpha = alpha, - dot_alpha = dot_alpha, - axis_title_size = axis_title_size, + alpha_dot = alpha_dot, + size_axis_title = size_axis_title, colors = colors, theme = theme, base_size = base_size, diff --git a/man/check_model.Rd b/man/check_model.Rd index c07add5f2..5d06a7192 100644 --- a/man/check_model.Rd +++ b/man/check_model.Rd @@ -16,13 +16,13 @@ check_model(x, ...) type = "density", residual_type = NULL, show_dots = NULL, - dot_size = 2, - line_size = 0.8, - title_size = 12, - axis_title_size = base_size, + size_dot = 2, + size_line = 0.8, + size_title = 12, + size_axis_title = base_size, base_size = 10, alpha = 0.2, - dot_alpha = 0.8, + alpha_dot = 0.8, colors = c("#3aaf85", "#1b6ca8", "#cd201f"), theme = "see::theme_lucid", verbose = FALSE, @@ -79,11 +79,11 @@ time-consuming. By default, \code{show_dots = NULL}. In this case \code{check_mo tries to guess whether performance will be poor due to a very large model and thus automatically shows or hides dots.} -\item{dot_size, line_size}{Size of line and dot-geoms.} +\item{size_dot, size_line}{Size of line and dot-geoms.} -\item{base_size, title_size, axis_title_size}{Base font size for axis and plot titles.} +\item{base_size, size_title, size_axis_title}{Base font size for axis and plot titles.} -\item{alpha, dot_alpha}{The alpha level of the confidence bands and dot-geoms. +\item{alpha, alpha_dot}{The alpha level of the confidence bands and dot-geoms. Scalar from 0 to 1.} \item{colors}{Character vector with color codes (hex-format). Must be of diff --git a/vignettes/check_model_practical.Rmd b/vignettes/check_model_practical.Rmd index 53ca7ab58..5b6673910 100644 --- a/vignettes/check_model_practical.Rmd +++ b/vignettes/check_model_practical.Rmd @@ -1,6 +1,6 @@ --- title: "How to arrive at the best model fit" -output: +output: rmarkdown::html_vignette: toc: true tags: [r, performance] @@ -8,7 +8,7 @@ vignette: > \usepackage[utf8]{inputenc} %\VignetteIndexEntry{How to arrive at the best model fit} %\VignetteEngine{knitr::rmarkdown} -editor_options: +editor_options: chunk_output_type: console --- @@ -82,7 +82,7 @@ In *performance*, we can conduct a comprehensive visual inspection of our model For now, we want to focus on the _posterior predictive checks_, _dispersion and zero-inflation_ as well as the Q-Q plot (_uniformity of residuals_). ```{r fig.height=12, fig.width=10} -check_model(model1, dot_size = 1.2) +check_model(model1, size_dot = 1.2) ``` Note that unlike `plot()`, which is a base R function to create diagnostic plots, `check_model()` relies on *simulated residuals* for the Q-Q plot, which is more accurate for non-Gaussian models. See [this vignette](https://easystats.github.io/performance/articles/simulate_residuals.html) and the documentation of `simulate_residuals()` for further details. @@ -108,7 +108,7 @@ model2 <- glmmTMB::glmmTMB( family = poisson, data = glmmTMB::Salamanders ) -check_model(model2, dot_size = 1.2) +check_model(model2, size_dot = 1.2) ``` Looking at the above plots, the zero-inflation seems to be addressed properly (see especially _posterior predictive checks_ and _uniformity of residuals_, the Q-Q plot). However, the overdispersion still could be present. We can check for these problems using `check_overdispersion()` and `check_zeroinflation()` again. @@ -132,7 +132,7 @@ model3 <- glmmTMB::glmmTMB( family = glmmTMB::nbinom1, data = glmmTMB::Salamanders ) -check_model(model3, dot_size = 1.2) +check_model(model3, size_dot = 1.2) ``` Now we see that the plot showing _misspecified dispersion and zero-inflation_ suggests that the overdispersion is better addressed than before. Let us check again: @@ -195,7 +195,7 @@ model4 <- glmmTMB::glmmTMB( family = glmmTMB::nbinom2, data = glmmTMB::Salamanders ) -check_model(model4, dot_size = 1.2) +check_model(model4, size_dot = 1.2) check_overdispersion(model4) diff --git a/vignettes/simulate_residuals.Rmd b/vignettes/simulate_residuals.Rmd index 89f826ba6..8842205a8 100644 --- a/vignettes/simulate_residuals.Rmd +++ b/vignettes/simulate_residuals.Rmd @@ -1,6 +1,6 @@ --- title: "Checking simulated residuals" -output: +output: rmarkdown::html_vignette: toc: true tags: [r, performance] @@ -8,7 +8,7 @@ vignette: > \usepackage[utf8]{inputenc} %\VignetteIndexEntry{Checking simulated residuals} %\VignetteEngine{knitr::rmarkdown} -editor_options: +editor_options: chunk_output_type: console --- @@ -93,10 +93,10 @@ check_outliers(simulated_residuals) The above three functions internally call `simulate_residuals()` for more complex models automatically, so you don't need to call `simulate_residuals()` yourself. Simulated residuals are usually more reliable than the standard residuals, especially for complex models. -Finally, you can even perform a visual check for the entire model, either by passing the model object directly, or the object returned from `simulate_residuals()`. +Finally, you can even perform a visual check for the entire model, either by passing the model object directly, or the object returned from `simulate_residuals()`. ```{r fig.height=12, fig.width=10} -check_model(simulated_residuals, dot_size = 1.5) +check_model(simulated_residuals, size_dot = 1.5) ``` The `check_model()` function is the main reason we don't want to prematurely extract the residuals in `simulate_residuals()`, because if we do then the simulated residual won't contain the model fit (`fittedModel` in the output below), so we won't be able to do all of the checks we would want to do using the model (e.g., posterior predictive checks). From 0b2002e3a6f079b91e7c6f8eecdbbf10b97f47f7 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 24 Nov 2024 17:12:42 +0100 Subject: [PATCH 2/3] rename --- R/check_model.R | 26 +++++++++++++------------- R/check_model_diagnostics.R | 16 ++++++++-------- R/check_normality.R | 2 +- R/check_outliers.R | 10 +--------- R/check_overdispersion.R | 2 +- 5 files changed, 24 insertions(+), 32 deletions(-) diff --git a/R/check_model.R b/R/check_model.R index 576f483e4..11562a351 100644 --- a/R/check_model.R +++ b/R/check_model.R @@ -462,35 +462,35 @@ check_model.DHARMa <- check_model.performance_simres # multicollinearity -------------- if (any(c("all", "vif") %in% check)) { - dat$VIF <- .diag_vif(model, verbose = verbose) + dat$VIF <- .model_diagnostic_vif(model, verbose = verbose) } # Q-Q plot (normality/uniformity of residuals) -------------- if (any(c("all", "qq") %in% check)) { dat$QQ <- switch(residual_type, simulated = .safe(simulate_residuals(model, ...)), - .diag_qq(model, model_info = model_info, verbose = verbose) + .model_diagnostic_qq(model, model_info = model_info, verbose = verbose) ) } # Random Effects Q-Q plot (normality of BLUPs) -------------- if (any(c("all", "reqq") %in% check)) { - dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose) + dat$REQQ <- .model_diagnostic_ranef_qq(model, level = 0.95, model_info = model_info, verbose = verbose) } # normal-curve plot (normality of residuals) -------------- if (any(c("all", "normality") %in% check)) { - dat$NORM <- .diag_norm(model, verbose = verbose) + dat$NORM <- .model_diagnostic_normality(model, verbose = verbose) } # non-constant variance (heteroskedasticity, liniearity) -------------- if (any(c("all", "ncv", "linearity") %in% check)) { - dat$NCV <- .diag_ncv(model, verbose = verbose) + dat$NCV <- .model_diagnostic_ncv(model, verbose = verbose) } # homogeneity of variance -------------- if (any(c("all", "homogeneity") %in% check)) { - dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose) + dat$HOMOGENEITY <- .model_diagnostic_homogeneity(model, verbose = verbose) } # outliers -------------- @@ -501,7 +501,7 @@ check_model.DHARMa <- check_model.performance_simres } else { threshold <- attributes(dat$OUTLIERS)$threshold$cook } - dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold) + dat$INFLUENTIAL <- .safe(.model_diagnostic_outlier(model, threshold = threshold)) } # posterior predictive checks -------------- @@ -523,25 +523,25 @@ check_model.DHARMa <- check_model.performance_simres # multicollinearity -------------- if (any(c("all", "vif") %in% check)) { - dat$VIF <- .diag_vif(model, verbose = verbose) + dat$VIF <- .model_diagnostic_vif(model, verbose = verbose) } # Q-Q plot (normality/uniformity of residuals) -------------- if (any(c("all", "qq") %in% check)) { dat$QQ <- switch(residual_type, simulated = .safe(simulate_residuals(model, ...)), - .diag_qq(model, model_info = model_info, verbose = verbose) + .model_diagnostic_qq(model, model_info = model_info, verbose = verbose) ) } # homogeneity of variance -------------- if (any(c("all", "homogeneity") %in% check)) { - dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose) + dat$HOMOGENEITY <- .model_diagnostic_homogeneity(model, verbose = verbose) } # Random Effects Q-Q plot (normality of BLUPs) -------------- if (any(c("all", "reqq") %in% check)) { - dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose) + dat$REQQ <- .model_diagnostic_ranef_qq(model, level = 0.95, model_info = model_info, verbose = verbose) } # outliers -------------- @@ -552,7 +552,7 @@ check_model.DHARMa <- check_model.performance_simres } else { threshold <- attributes(dat$OUTLIERS)$threshold$cook } - dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold) + dat$INFLUENTIAL <- .safe(.model_diagnostic_outlier(model, threshold = threshold)) } # posterior predictive checks -------------- @@ -567,7 +567,7 @@ check_model.DHARMa <- check_model.performance_simres # misspecified dispersion and zero-inflation -------------- if (isTRUE(model_info$is_count) && any(c("all", "overdispersion") %in% check)) { - dat$OVERDISPERSION <- .diag_overdispersion(model) + dat$OVERDISPERSION <- .model_diagnostic_overdispersion(model) } dat <- insight::compact_list(dat) diff --git a/R/check_model_diagnostics.R b/R/check_model_diagnostics.R index 8897c685e..9595ed968 100644 --- a/R/check_model_diagnostics.R +++ b/R/check_model_diagnostics.R @@ -1,6 +1,6 @@ # prepare data for VIF plot ---------------------------------- -.diag_vif <- function(model, verbose = TRUE) { +.model_diagnostic_vif <- function(model, verbose = TRUE) { out <- check_collinearity(model, verbose = verbose) dat <- insight::compact_list(out) if (is.null(dat)) { @@ -35,7 +35,7 @@ # prepare data for QQ plot ---------------------------------- -.diag_qq <- function(model, model_info = NULL, verbose = TRUE) { +.model_diagnostic_qq <- function(model, model_info = NULL, verbose = TRUE) { if (inherits(model, c("lme", "lmerMod", "merMod", "gam"))) { res_ <- stats::residuals(model) } else if (inherits(model, "geeglm")) { @@ -98,7 +98,7 @@ # prepare data for random effects QQ plot ---------------------------------- -.diag_reqq <- function(model, level = 0.95, model_info = NULL, verbose = TRUE) { +.model_diagnostic_ranef_qq <- function(model, level = 0.95, model_info = NULL, verbose = TRUE) { # check if we have mixed model if (is.null(model_info) || !model_info$is_mixed) { return(NULL) @@ -161,7 +161,7 @@ # prepare data for normality of residuals plot ---------------------------------- -.diag_norm <- function(model, verbose = TRUE) { +.model_diagnostic_normality <- function(model, verbose = TRUE) { r <- try(as.numeric(stats::residuals(model)), silent = TRUE) if (inherits(r, "try-error")) { @@ -181,7 +181,7 @@ # prepare data for influential obs plot ---------------------------------- -.diag_influential_obs <- function(model, threshold = NULL) { +.model_diagnostic_outlier <- function(model, threshold = NULL) { s <- summary(model) if (inherits(model, "lm", which = TRUE) == 1) { @@ -220,7 +220,7 @@ # prepare data for non-constant variance plot ---------------------------------- -.diag_ncv <- function(model, verbose = TRUE) { +.model_diagnostic_ncv <- function(model, verbose = TRUE) { ncv <- tryCatch( data.frame( x = as.numeric(stats::fitted(model)), @@ -248,7 +248,7 @@ # prepare data for homogeneity of variance plot ---------------------------------- -.diag_homogeneity <- function(model, verbose = TRUE) { +.model_diagnostic_homogeneity <- function(model, verbose = TRUE) { faminfo <- insight::model_info(model) r <- tryCatch( if (inherits(model, "merMod")) { @@ -367,7 +367,7 @@ -.diag_overdispersion <- function(model, ...) { +.model_diagnostic_overdispersion <- function(model, ...) { faminfo <- insight::model_info(model) # data for poisson models diff --git a/R/check_normality.R b/R/check_normality.R index 7ea6970ec..ffd4ba824 100644 --- a/R/check_normality.R +++ b/R/check_normality.R @@ -231,7 +231,7 @@ check_normality.merMod <- function(x, effects = c("fixed", "random"), ...) { p.val <- c(p.val, .check_normality(re[[i]][[j]], x, "random effects")) } } - attr(p.val, "re_qq") <- .diag_reqq(x, level = 0.95, model_info = info) + attr(p.val, "re_qq") <- .model_diagnostic_ranef_qq(x, level = 0.95, model_info = info) attr(p.val, "type") <- "random effects" attr(p.val, "re_groups") <- re_groups } diff --git a/R/check_outliers.R b/R/check_outliers.R index 071bd39a3..7729f9931 100644 --- a/R/check_outliers.R +++ b/R/check_outliers.R @@ -588,7 +588,7 @@ check_outliers.default <- function(x, attr(outlier, "threshold") <- thresholds attr(outlier, "method") <- method attr(outlier, "text_size") <- 3 - attr(outlier, "influential_obs") <- .influential_obs(x, threshold = unlist(thresholds)) + attr(outlier, "influential_obs") <- .safe(.model_diagnostic_outlier(x, threshold = unlist(thresholds))) # nolint attr(outlier, "variables") <- "(Whole model)" attr(outlier, "raw_data") <- my_data attr(outlier, "outlier_var") <- outlier_var @@ -2036,14 +2036,6 @@ check_outliers.DHARMa <- check_outliers.performance_simres -# influential observations data -------- - -.influential_obs <- function(x, threshold = NULL) { - .safe(.diag_influential_obs(x, threshold = threshold)) -} - - - # Non-supported model classes --------------------------------------- #' @export diff --git a/R/check_overdispersion.R b/R/check_overdispersion.R index 0d877b0ad..fc8e25e32 100644 --- a/R/check_overdispersion.R +++ b/R/check_overdispersion.R @@ -101,7 +101,7 @@ plot.check_overdisp <- function(x, ...) { } } if (!is.null(model)) { - x <- .diag_overdispersion(model) + x <- .model_diagnostic_overdispersion(model) class(x) <- c("see_check_overdisp", "data.frame") attr(x, "colors") <- list(...)$colors attr(x, "line_size") <- list(...)$size_line From 5a519ee11d1255df4b11fbe64598158142452e22 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 24 Nov 2024 17:24:30 +0100 Subject: [PATCH 3/3] news, desc --- DESCRIPTION | 2 +- NEWS.md | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 89c9a9d57..b1ce379bf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.4.6 +Version: 0.12.4.7 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index 9eb5ded26..1bddce2ce 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,11 @@ * Deprecated arguments and alias-function-names have been removed. +* Argument names in `check_model()` that refer to plot-aesthetics (like + `dot_size`) are now harmonized across *easystats* packages, meaning that + these have been renamed. They now follow the pattern `aesthetic_type`, e.g. + `size_dot` (instead of `dot_size`). + ## Changes * Increased accuracy for `check_convergence()` for *glmmTMB* models.