From 1e8bc0199d8e8feb665147c557ef4f18c75c45a6 Mon Sep 17 00:00:00 2001 From: flozanoisla Date: Wed, 10 Jul 2024 18:12:19 -0500 Subject: [PATCH] new functions --- NEWS.md | 4 ++-- R/remove_outliers.R | 51 +++++++++++++++++++++++++----------------- man/remove_outliers.Rd | 1 + 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/NEWS.md b/NEWS.md index 267e9aa..ac37938 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,8 +9,8 @@ # inti 0.6.6 - Package - - Update function to outliers_remove => "`remove_outliers`" - - Update function `plot_diag()` + - New function related `outliers_remove()` => "`remove_outliers`" to work with formula + - New function related `plot_diag()` => "`plot_diagnostic`" to work with formula - Rticles - Fix Tables and Figures order in final document diff --git a/R/remove_outliers.R b/R/remove_outliers.R index 810d0da..14b2f77 100644 --- a/R/remove_outliers.R +++ b/R/remove_outliers.R @@ -34,6 +34,7 @@ #' remove_outliers(data = . #' , formula = stemdw ~ 0 + (1|bloque) + treat*geno #' , plot_diag = FALSE +#' , drop_na = FALSE #' ) #' #' rmout @@ -45,7 +46,7 @@ remove_outliers <- function(data , plot_diag = FALSE ) { - # data = potato; drop_na = TRUE; plot_diag = TRUE + # data = potato; drop_na = F; plot_diag = T # formula = stemdw ~ 0 + (1|bloque) + treat*geno out_flag <- bholm <- NULL @@ -55,10 +56,16 @@ remove_outliers <- function(data trait <- factors[1] mdfct <- factors[-1] - model <- lme4::lmer(formula, data) + rawdt <- data %>% + tibble::rownames_to_column("index") %>% + tidyr::drop_na({{trait}}) %>% + dplyr::select("index", {{factors}}) %>% + dplyr::relocate({{trait}}, .after = last_col()) + + modeli <- lme4::lmer(formula, rawdt) # re-scaled MAD - resi <- cbind(residuals(model, type = "response")) + resi <- cbind(residuals(modeli, type = "response")) medi <- median(resi, na.rm = TRUE) MAD <- median((abs(resi-medi)), na.rm = TRUE) re_MAD <- MAD*1.4826 @@ -71,10 +78,7 @@ remove_outliers <- function(data # Calculate adjusted p-values rawp.BHStud <- 2 * (1 - pnorm(abs(res_MAD))) - newdt <- data %>% - select({{factors}}) %>% - relocate({{trait}}, .after = last_col()) %>% - drop_na() %>% # fix model test bug? + newdt <- rawdt %>% cbind.data.frame(., resi, res_MAD, rawp.BHStud) # Produce a Bonferroni-Holm tests for the adjusted p-values @@ -87,30 +91,35 @@ remove_outliers <- function(data rownames_to_column("index") %>% mutate(out_flag = ifelse(bholm <0.05, "OUTLIER", ".")) - outliers <- cbind(newdt, BHStud_test) %>% + outliers <- merge(newdt, BHStud_test) %>% dplyr::filter(out_flag %in% "OUTLIER") - nwdt <- cbind(newdt, BHStud_test) %>% - mutate({{trait}} := case_when( + cleandt <- merge(newdt, BHStud_test) %>% + dplyr::mutate({{trait}} := case_when( !out_flag %in% "OUTLIER" ~ as.character(.data[[trait]]) , TRUE ~ NA_character_ )) %>% - mutate(across({{trait}}, as.numeric)) %>% + dplyr::mutate(across({{trait}}, as.numeric)) %>% {if (isTRUE(drop_na)) {drop_na(data = ., any_of({{trait}}))} else {.}} %>% - select({{factors}}) %>% - relocate({{trait}}, .after = last_col()) - - modelf <- lme4::lmer(formula = formula, data = nwdt) + dplyr::select(1:{{trait}}) %>% + dplyr::mutate(across(.data$index, ~ as.numeric(.))) %>% + dplyr::arrange(.data$index) + + modelf <- lme4::lmer(formula, cleandt) diagplot <- if(isTRUE(plot_diag)) { - raw <- data %>% + raw <- rawdt %>% + tidyr::drop_na({{trait}}) %>% plot_diagnostic(formula) %>% - cowplot::plot_grid(nrow = 1, plotlist = ., labels = "Raw data") + cowplot::plot_grid(nrow = 1, plotlist = . + , labels = paste("Raw data:", {{trait}})) - clean <- nwdt %>% + clean <- cleandt %>% + tidyr::drop_na({{trait}}) %>% plot_diagnostic(formula) %>% - cowplot::plot_grid(nrow = 1, plotlist = ., labels = "Clean data") + cowplot::plot_grid(nrow = 1, plotlist = . + , labels = paste("Clean data:", trait)) list(raw, clean) %>% cowplot::plot_grid(nrow = 2, plotlist = .) @@ -119,10 +128,10 @@ remove_outliers <- function(data list( - data = list(raw = data, clean = nwdt) + data = list(raw = rawdt, clean = cleandt) , outliers = outliers , diagplot = diagplot - , model = list(raw = model, clean = modelf) + , model = list(raw = modeli, clean = modelf) ) } diff --git a/man/remove_outliers.Rd b/man/remove_outliers.Rd index ccf5eae..b9de3ea 100644 --- a/man/remove_outliers.Rd +++ b/man/remove_outliers.Rd @@ -34,6 +34,7 @@ rmout <- potato \%>\% remove_outliers(data = . , formula = stemdw ~ 0 + (1|bloque) + treat*geno , plot_diag = FALSE + , drop_na = FALSE ) rmout