From 1edc232afc5b622e2a1905a20e5a5b2bec4f2047 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 4 Aug 2023 22:31:32 +0200 Subject: [PATCH] refactor: countElements returns NA if an element is invalid instead of dropping it --- R/adducts.R | 5 +++- R/chemFormula.R | 50 +++++++++++++++++-------------- tests/testthat/test_chemFormula.R | 9 ++++++ 3 files changed, 41 insertions(+), 23 deletions(-) diff --git a/R/adducts.R b/R/adducts.R index 65e4694..fc0c1b7 100644 --- a/R/adducts.R +++ b/R/adducts.R @@ -233,6 +233,9 @@ adductFormula <- function(formulas, adduct = "[M+H]+", standardize = TRUE) { ionMatrix <- lapply(formulas, function(formula, adduct) { formulaAdduct <- apply(adduct, 1, function(x) { current_f <- formula + + if (is.null(current_f)) return(NULL) + multiplicity <- round(as.numeric(x["mass_multi"]) * as.numeric(x["charge"])) if (multiplicity != 1) { @@ -264,7 +267,7 @@ adductFormula <- function(formulas, adduct = "[M+H]+", standardize = TRUE) { return(formulaAdduct) }, adduct = adduct) ionMatrix <- do.call(rbind, ionMatrix) - rownames(ionMatrix) <- formulas + rownames(ionMatrix) <- formulas[as.logical(lengths(formulas))] colnames(ionMatrix) <- rownames(adduct) return(ionMatrix) } diff --git a/R/chemFormula.R b/R/chemFormula.R index 948f859..8936fba 100644 --- a/R/chemFormula.R +++ b/R/chemFormula.R @@ -25,25 +25,25 @@ countElements <- function(x) { "(?", paste0("[0-9]*", c( - "[A][cglmrstu]|", - "[B][aehikr]?|", - "[C][adeflmnorsu]?|", - "[D][bsy]|", - "[E][rsu]|", - "[F][elmr]?|", - "[G][ade]|", - "[H][efgos]?|", - "[I][nr]?|", - "[K][r]?|", - "[L][airuv]|", - "[M][cdgnot]|", - "[N][abdehiop]?|", - "[O][gs]?|", - "[P][abdmortu]?|", - "[R][abefghnu]|", - "[S][bcegimnr]?|", - "[T][abcehilms]|", - "[U]|[V]|[W]|[X][e]|[Y][b]?|[Z][nr]" + "A[cglmrstu]|", + "B[aehikr]?|", + "C[adeflmnorsu]?|", + "D[bsy]|", + "E[rsu]|", + "F[elmr]?|", + "G[ade]|", + "H[efgos]?|", + "I[nr]?|", + "K[r]?|", + "L[airuv]|", + "M[cdgnot]|", + "N[abdehiop]?|", + "O[gs]?|", + "P[abdmortu]?|", + "R[abefghnu]|", + "S[bcegimnr]?|", + "T[abcehilms]|", + "U|V|W|Xe|Yb?|Z[nr]" ), collapse = "" ), @@ -58,6 +58,10 @@ countElements <- function(x) { if (is.na(xx)) return(NA_integer_) + if (sum(attr(rr, "match.length")) != nchar(gsub("\\[|\\]", "", xx))) { + warning("The given formula '", xx, "' contains invalid symbols.") + return(NA_integer_) + } start <- attr(rr, "capture.start") end <- start + attr(rr, "capture.length") - 1L @@ -175,10 +179,10 @@ standardizeFormula <- function(x) { #' @examples #' .sum_elements(c(H = 6, C = 3, O = 6, C = 3, H = 6)) .sum_elements <- function(x) { - if (!is.character(names(x))) - stop("element names missing") if (anyNA(x)) return(NA_integer_) + if (!is.character(names(x))) + stop("element names missing") unlist(lapply(split(x, names(x)), sum)) } @@ -336,7 +340,9 @@ calculateMass <- function(x) { stop("x must be either a character or a list with element counts.") vapply(x, function(z) { isotopes <- names(z) - if (!length(z) || !all(isotopes %in% names(.ISOTOPES))) { + if (!length(z) || + is.null(isotopes) || + !all(isotopes %in% names(.ISOTOPES))) { message("not for all isotopes a mass is found") return(NA_real_) } diff --git a/tests/testthat/test_chemFormula.R b/tests/testthat/test_chemFormula.R index c6abbb0..f6349e3 100644 --- a/tests/testthat/test_chemFormula.R +++ b/tests/testthat/test_chemFormula.R @@ -53,6 +53,15 @@ test_that("countElements", { ), names = c("C6H12O6", NA, "H2O") ) ) + expect_warning(countElements("Foo"), "invalid") + expect_identical( + suppressWarnings(countElements(c("C6H12O6", "Foo", "H2O"))), + list( + "C6H12O6" = c(C = 6L, H = 12L, O = 6L), + Foo = NA_integer_, + "H2O" = c(H = 2L, O = 1L) + ) + ) ## heavy isotopes expect_identical(