From 443b0eca76b401c7e0ea1fa8544af70de6d8d911 Mon Sep 17 00:00:00 2001 From: Philippine Louail Date: Thu, 11 Jan 2024 14:35:12 +0100 Subject: [PATCH] fix: naming system --- ...ction-filtering.R => quality-assessment.R} | 33 ++++++++++--------- ...ringFunctions.Rd => quality_assessment.Rd} | 22 ++++++------- vignettes/MetaboCoreUtils.Rmd | 12 ++++--- 3 files changed, 35 insertions(+), 32 deletions(-) rename R/{function-filtering.R => quality-assessment.R} (84%) rename man/{filteringFunctions.Rd => quality_assessment.Rd} (84%) diff --git a/R/function-filtering.R b/R/quality-assessment.R similarity index 84% rename from R/function-filtering.R rename to R/quality-assessment.R index 19038f1..4cbaa2d 100644 --- a/R/function-filtering.R +++ b/R/quality-assessment.R @@ -1,12 +1,12 @@ -#' @title Basic filtering functions for metabolomics +#' @title Basic quality assessment functions for metabolomics #' #' @description #' -#' When dealing with metabolomics results, it is often necessary to filter -#' features based on certain criteria. These criteria are typically derived -#' from statistical formulas applied to full rows of data, where each row -#' represents a feature. The following functions provide basic filtering -#' methods commonly used in the analysis of metabolomics data. +#' The following functions allow to calculate basic quality assessment estimates +#' typically employed in the analysis of metabolomics data. These functions are +#' designed to be applied to entire rows of data, where each row corresponds to +#' a feature. Subsequently, these estimates can serve as a foundation for +#' feature filtering. #' #' - `rsd` and `rowRsd` are convenience functions to calculate the relative #' standard deviation (i.e. coefficient of variation) of a numerical vector @@ -45,9 +45,9 @@ #' for non-gaussian distributed data. #' #' @note -#' For `rsd` and `rowRsd` the feature abundances are expected to be provided -#' in natural scale and not e.g. log2 scale as it may lead to incorrect -#' interpretations. +#' For `rsd` and `rowRsd` the feature abundances are expected to be provided in +#' natural scale and not e.g. log2 scale as it may lead to incorrect +#' interpretations. #' #' @return See individual function description above for details. #' @@ -57,7 +57,7 @@ #' #' @importFrom stats sd mad median #' -#' @name filteringFunctions +#' @name quality_assessment #' #' @references #' @@ -97,7 +97,7 @@ NULL #' @export -#' @rdname filteringFunctions +#' @rdname quality_assessment rsd <- function(x, na.rm = TRUE, mad = FALSE) { if (mad) mad(x, na.rm = na.rm) / abs(median(x, na.rm = na.rm)) @@ -106,12 +106,12 @@ rsd <- function(x, na.rm = TRUE, mad = FALSE) { } #' @export -#' @rdname filteringFunctions +#' @rdname quality_assessment rowRsd <- function(x, na.rm = TRUE, mad = FALSE) apply(x, MARGIN = 1, rsd, na.rm = na.rm, mad = mad) #' @export -#' @rdname filteringFunctions +#' @rdname quality_assessment rowDratio <- function(x, y, na.rm = TRUE, mad = FALSE){ if (mad) vec <- apply(y, 1, mad, na.rm = na.rm) / @@ -122,19 +122,20 @@ rowDratio <- function(x, y, na.rm = TRUE, mad = FALSE){ } #' @export -#' @rdname filteringFunctions +#' @rdname quality_assessment percentMissing <- function(x){ ((sum(is.na(x))) / length(x))*100 } #' @export -#' @rdname filteringFunctions +#' @rdname quality_assessment rowPercentMissing <- function(x){ apply(x, MARGIN = 1, percentMissing) } #' @export -#' @rdname filteringFunctions +#' @rdname quality_assessment + rowBlank <- function(x, y, na.rm = TRUE){ m_samples <- apply(x, 1, mean, na.rm = na.rm) m_blank <- apply(y, 1, mean, na.rm = na.rm) diff --git a/man/filteringFunctions.Rd b/man/quality_assessment.Rd similarity index 84% rename from man/filteringFunctions.Rd rename to man/quality_assessment.Rd index 38ee320..c706dcf 100644 --- a/man/filteringFunctions.Rd +++ b/man/quality_assessment.Rd @@ -1,14 +1,14 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/function-filtering.R -\name{filteringFunctions} -\alias{filteringFunctions} +% Please edit documentation in R/quality-assessment.R +\name{quality_assessment} +\alias{quality_assessment} \alias{rsd} \alias{rowRsd} \alias{rowDratio} \alias{percentMissing} \alias{rowPercentMissing} \alias{rowBlank} -\title{Basic filtering functions for metabolomics} +\title{Basic quality assessment functions for metabolomics} \usage{ rsd(x, na.rm = TRUE, mad = FALSE) @@ -42,11 +42,11 @@ respectively.} See individual function description above for details. } \description{ -When dealing with metabolomics results, it is often necessary to filter -features based on certain criteria. These criteria are typically derived -from statistical formulas applied to full rows of data, where each row -represents a feature. The following functions provide basic filtering -methods commonly used in the analysis of metabolomics data. +The following functions allow to calculate basic quality assessment estimates +typically employed in the analysis of metabolomics data. These functions are +designed to be applied to entire rows of data, where each row corresponds to +a feature. Subsequently, these estimates can serve as a foundation for +feature filtering. \itemize{ \item \code{rsd} and \code{rowRsd} are convenience functions to calculate the relative standard deviation (i.e. coefficient of variation) of a numerical vector @@ -68,8 +68,8 @@ literature, and they are implemented to assist in preprocessing metabolomics data. } \note{ -For \code{rsd} and \code{rowRsd} the feature abundances are expected to be provided -in natural scale and not e.g. log2 scale as it may lead to incorrect +For \code{rsd} and \code{rowRsd} the feature abundances are expected to be provided in +natural scale and not e.g. log2 scale as it may lead to incorrect interpretations. } \examples{ diff --git a/vignettes/MetaboCoreUtils.Rmd b/vignettes/MetaboCoreUtils.Rmd index c06b829..b8eec43 100644 --- a/vignettes/MetaboCoreUtils.Rmd +++ b/vignettes/MetaboCoreUtils.Rmd @@ -546,18 +546,19 @@ Generally, injecting study samples in random order can reduce (or even avoid) influence of any related technical bias in the downstream analysis and is highly suggested to improve and assure data quality. -## Filtering data: Identifying measurement error +## Basic quality assessment and pre-filtering of metabolomics data When dealing with metabolomics results, it is often necessary to filter features based on certain criteria. These criteria are typically derived from statistical formulas applied to full rows of data, where each row represents a feature. In this tutorial, we'll explore a set of functions -designed for filtering metabolomics data. +designed designed to calculate basic quality assessment metrics on which +metabolomics data can subsequently be filtered. First, to get more information on the available function you can check the documentation ```{r} -?filteringFunctions +?quality_assessment ``` We will use a matrix representing metabolomics measurements from different @@ -581,8 +582,9 @@ cv_result <- rowRsd(metabolomics_data) print(cv_result) ``` -Next, we will compute the D-ratio, a measure of dispersion, by comparing the -standard deviation of QC samples to that of biological test samples. +Next, we will compute the D-ratio [@broadhurst_guidelines_2018], a measure of +dispersion, by comparing the standard deviation of QC samples to that of +biological test samples. ```{r} # Generate QC samples