From 083ea313c36d61ca10b86f35d80d89c70b47f316 Mon Sep 17 00:00:00 2001 From: gbganalyst Date: Tue, 2 Apr 2024 14:12:48 +0100 Subject: [PATCH 1/2] added pipe function --- R/utils-pipe.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/utils-pipe.R b/R/utils-pipe.R index e7ea19f..fd0b1d1 100644 --- a/R/utils-pipe.R +++ b/R/utils-pipe.R @@ -1,12 +1,14 @@ #' Pipe operator #' -#' See \code{magrittr::\link[magrittr]{\%>\%}} for details. +#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. #' #' @name %>% #' @rdname pipe #' @keywords internal -#' @return No return value #' @export #' @importFrom magrittr %>% #' @usage lhs \%>\% rhs +#' @param lhs A value or the magrittr placeholder. +#' @param rhs A function call using the magrittr semantics. +#' @return The result of calling `rhs(lhs)`. NULL From f39db9e35a2fd2bd1e599bc5d1d4430ef21d1766 Mon Sep 17 00:00:00 2001 From: gbganalyst Date: Wed, 10 Apr 2024 21:11:36 +0100 Subject: [PATCH 2/2] Updated code repo --- DESCRIPTION | 2 +- R/fill_missing_values.R | 39 +++++++++++++++++++--------------- man/fill_missing_values.Rd | 17 +++++++++------ man/pipe.Rd | 9 ++++++-- man/read_csv_files_from_dir.Rd | 2 +- vignettes/other-functions.Rmd | 2 +- 6 files changed, 43 insertions(+), 28 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 594b328..20f8787 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -55,4 +55,4 @@ VignetteBuilder: Config/testthat/edition: 3 Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 diff --git a/R/fill_missing_values.R b/R/fill_missing_values.R index 1344d89..0ba709c 100644 --- a/R/fill_missing_values.R +++ b/R/fill_missing_values.R @@ -16,12 +16,13 @@ #' which missing values should be imputed. If `NULL` (default), imputation is #' applied to all variables in the data frame. #' -#' @param method A character string specifying the imputation method for continuous -#' variables. Supported methods are "min", "max", "mean", "median", "harmonic", -#' and "geometric". The default method is "mean". For categorical variables, the -#' mode is always used. +#' @param method A character string specifying the imputation method for +#' continuous variables. Supported methods are "min", "max", "mean", "median", +#' "harmonic", and "geometric". The default method is "mean". For categorical +#' variables, the mode is always used. #' -#' @return A data frame with missing values imputed according to the specified `method`. +#' @return A data frame with missing values imputed according to the specified +#' `method`. #' #' @export #' @@ -71,17 +72,21 @@ #' map_df(fill_missing_values, method = "median") #' #' -fill_missing_values <- function(df, selected_variables = NULL, method = "mean") { +fill_missing_values <- function(df, selected_variables = NULL, method = c("mean", "min", "max", "median", "harmonic", "geometric")) { + + # Check if df is a dataframe + + if (!is.data.frame(df)) { + stop("fill_missing_values() is designed to operate exclusively on objects of the class 'dataframe'") + } if (missing(df)) { stop("argument 'df' is missing, with no default") } # Validate method input for continuous variables - valid_methods <- c("min", "max", "mean", "median", "harmonic", "geometric") - if (!(method %in% valid_methods)) { - stop("Invalid method. Choose from 'min', 'max', 'mean', 'median', 'harmonic', 'geometric'") - } + + method <- rlang::arg_match(method) # Calculate the replacement value based on the specified method @@ -91,13 +96,13 @@ fill_missing_values <- function(df, selected_variables = NULL, method = "mean") } # Skip non-numeric columns replacement_value <- switch(method, - min = min(x, na.rm = TRUE), - max = max(x, na.rm = TRUE), - mean = mean(x, na.rm = TRUE), - median = median(x, na.rm = TRUE), - harmonic = harmonic_mean(x), - geometric = geometric_mean(x), - x + min = min(x, na.rm = TRUE), + max = max(x, na.rm = TRUE), + mean = mean(x, na.rm = TRUE), + median = median(x, na.rm = TRUE), + harmonic = harmonic_mean(x), + geometric = geometric_mean(x), + x ) # Default to return x as is diff --git a/man/fill_missing_values.Rd b/man/fill_missing_values.Rd index cb730c9..fc5bd86 100644 --- a/man/fill_missing_values.Rd +++ b/man/fill_missing_values.Rd @@ -4,7 +4,11 @@ \alias{fill_missing_values} \title{Fill missing values in a data frame} \usage{ -fill_missing_values(df, selected_variables = NULL, method = "mean") +fill_missing_values( + df, + selected_variables = NULL, + method = c("mean", "min", "max", "median", "harmonic", "geometric") +) } \arguments{ \item{df}{A dataframe to process for missing value imputation.} @@ -13,13 +17,14 @@ fill_missing_values(df, selected_variables = NULL, method = "mean") which missing values should be imputed. If \code{NULL} (default), imputation is applied to all variables in the data frame.} -\item{method}{A character string specifying the imputation method for continuous -variables. Supported methods are "min", "max", "mean", "median", "harmonic", -and "geometric". The default method is "mean". For categorical variables, the -mode is always used.} +\item{method}{A character string specifying the imputation method for +continuous variables. Supported methods are "min", "max", "mean", "median", +"harmonic", and "geometric". The default method is "mean". For categorical +variables, the mode is always used.} } \value{ -A data frame with missing values imputed according to the specified \code{method}. +A data frame with missing values imputed according to the specified +\code{method}. } \description{ \code{fill_missing_values()} is an efficient function that addresses missing diff --git a/man/pipe.Rd b/man/pipe.Rd index c4525a0..a648c29 100644 --- a/man/pipe.Rd +++ b/man/pipe.Rd @@ -6,10 +6,15 @@ \usage{ lhs \%>\% rhs } +\arguments{ +\item{lhs}{A value or the magrittr placeholder.} + +\item{rhs}{A function call using the magrittr semantics.} +} \value{ -No return value +The result of calling \code{rhs(lhs)}. } \description{ -See \code{magrittr::\link[magrittr]{\%>\%}} for details. +See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. } \keyword{internal} diff --git a/man/read_csv_files_from_dir.Rd b/man/read_csv_files_from_dir.Rd index 9c4540e..10af1c5 100644 --- a/man/read_csv_files_from_dir.Rd +++ b/man/read_csv_files_from_dir.Rd @@ -39,7 +39,7 @@ character represents one column: By default, reading a file without a column specification will print a message showing what \code{readr} guessed they were. To remove this message, -set \code{show_col_types = FALSE} or set `options(readr.show_col_types = FALSE).} +set \code{show_col_types = FALSE} or set \code{options(readr.show_col_types = FALSE)}.} \item{.id}{The name of a column in which to store the file path. This is useful when reading multiple input files and there is data in the file diff --git a/vignettes/other-functions.Rmd b/vignettes/other-functions.Rmd index b0a5f08..30f3c05 100644 --- a/vignettes/other-functions.Rmd +++ b/vignettes/other-functions.Rmd @@ -94,7 +94,7 @@ airquality %>% ## fill_missing_values() -`fill_missing_values()` is an efficient function that addresses missing values in a data frame. It uses imputation by function, also known as column-based imputation, to impute the missing values. It supports various imputation methods for continuous variables, including minimum, maximum, mean, median, harmonic mean, and geometric mean. For categorical variables, missing values are replaced with the mode of the column. This approach ensures accurate and consistent replacements derived from individual columns, resulting in a complete and reliable dataset for improved analysis and decision-making. +`fill_missing_values()` is an efficient function that addresses missing values in a data frame. It uses imputation by function, also known as column-based imputation, to impute the missing values. It supports various imputation methods for continuous variables, including `minimum`, `maximum`, `mean`, `median`, `harmonic mean`, and `geometric mean`. For categorical variables, missing values are replaced with the `mode` of the column. This approach ensures accurate and consistent replacements derived from individual columns, resulting in a complete and reliable dataset for improved analysis and decision-making. ```{r example 6}