diff --git a/R/fragments-calculate.R b/R/fragments-calculate.R index 4b1cec6..3594832 100644 --- a/R/fragments-calculate.R +++ b/R/fragments-calculate.R @@ -24,7 +24,8 @@ #' @return The methods with `oject = "missing"` returns a #' `data.frame`. #' -#' @param sequence character() providing a peptide sequence. +#' @param sequence character() providing a peptide sequence. ProForma delta +#' masses are supported (e.g. `"EM[+15.9949]EVEES[-79.9663]PEK"`). #' #' @param type `character` vector of target ions; possible values: #' `c("a", "b", "c", "x", "y", "z")`. Default is `type = c("b", @@ -63,6 +64,9 @@ #' #' @exportMethod calculateFragments #' +#' @references +#' [HUPO-PSI ProForma specification](http://www.psidev.info/proforma) +#' #' @examples #' #' ## calculate fragments for ACE with default modification @@ -93,6 +97,9 @@ #' #' ## disable neutral loss completely #' calculateFragments("PQR", neutralLoss=NULL) +#' +#' ## ProForma encoded delta masses +#' calculateFragments("EM[+15.9949]EVEES[-79.9663]PEK") setMethod("calculateFragments", c("character", "missing"), function(sequence, type = c("b", "y"), z = 1, modifications = c(C = 57.02146), @@ -172,14 +179,17 @@ setMethod("calculateFragments", c("character", "missing"), message("Modifications used: ", mods) } + clean_sequence <- .proforma_clean_sequences(sequence)[[1]] + delta_mass <- .proforma_delta_masses(sequence)[[1]] + ## split peptide sequence into aa - fragment.seq <- strsplit(sequence, "")[[1]] + fragment.seq <- strsplit(clean_sequence, "")[[1]] fn <- length(fragment.seq) ## calculate cumulative mass starting at the amino-terminus (for a, b, c) - amz <- cumsum(aamass[fragment.seq[-fn]]) + amz <- cumsum(aamass[fragment.seq[-fn]] + delta_mass[-fn]) ## calculate cumulative mass starting at the carboxyl-terminus (for x, y, z) - cmz <- cumsum(aamass[rev(fragment.seq[-1L])]) + cmz <- cumsum(aamass[rev(fragment.seq[-1L])] + rev(delta_mass[-1L])) ## calculate fragment mass (amino-terminus) tn <- length(amz) @@ -202,11 +212,11 @@ setMethod("calculateFragments", c("character", "missing"), cmz <- cmz + mass["p"] ## fragment seq (amino-terminus) - aseq <- rep(rep(substring(sequence, rep(1L, fn - 1L), + aseq <- rep(rep(substring(clean_sequence, rep(1L, fn - 1L), 1L:(fn - 1L)), each = zn), nat) ## fragment seq (carboxyl-terminus) - cseq <- rep(rep(rev(substring(sequence, 2L:fn, + cseq <- rep(rep(rev(substring(clean_sequence, 2L:fn, rep(fn, fn - 1L))), each=zn), nct) ## fragment str (amino-terminus) diff --git a/R/proforma-parser.R b/R/proforma-parser.R new file mode 100644 index 0000000..26fc250 --- /dev/null +++ b/R/proforma-parser.R @@ -0,0 +1,60 @@ +#' ProForma parser +#' +#' This helper functions provide a basic implementation of the PSI ProForma +#' notation. +#' +#' @author Sebastian Gibb +#' +#' @references http://www.psidev.info/proforma +#' @name proforma-parser + +#' Remove all modifications +#' +#' @name proforma-parser +#' @param x `character`, ProForma sequence. +#' @return `character`, a `character` cleaned of all modifications. +#' @noRd +#' @examples +#' .proforma_clean_sequences( +#' c("EM[+15.9949]EVEES[+79.9663]PEK", +#' "EM[+15.995]EVEES[-18.01]PEK") +#' ) +.proforma_clean_sequences <- function(x) { + gsub(pattern = "\\[[^]]*\\]|<[^>]*>", "", x) +} + +#' Extract delta masses +#' +#' @name proforma-parser +#' @param x `character`, ProForma sequence. +#' @return `list`, a `list` of `doubles` representing the delta masses for each +#' sequence. +#' @noRd +#' @examples +#' .proforma_delta_masses( +#' c("EM[+15.9949]EVEES[+79.9663]PEK", +#' "EM[+15.995]EVEES[-18.01]PEK") +#' ) +.proforma_delta_masses <- function(x) { + rx <- gregexpr( + pattern = + "(?[A-Z])(?:\\[[GMURX]?:?)?(?[+-][0-9.]+)?(?:\\])?", + text = x, + perl = TRUE + ) + + mapply(function(sequence, start, matched_length) { + mod <- as.double( + substring(sequence, start, start + matched_length - 1L) + ) + mod[is.na(mod)] <- 0 + mod + }, + sequence = x, + start = + lapply(rx, function(r)attr(r, "capture.start")[, "DeltaMass"]), + matched_length = + lapply(rx, function(r)attr(r, "capture.length")[, "DeltaMass"]), + SIMPLIFY = FALSE, USE.NAMES = FALSE + ) +} diff --git a/man/calculateFragments.Rd b/man/calculateFragments.Rd index a669f31..37f9413 100644 --- a/man/calculateFragments.Rd +++ b/man/calculateFragments.Rd @@ -16,7 +16,8 @@ ) } \arguments{ -\item{sequence}{character() providing a peptide sequence.} +\item{sequence}{character() providing a peptide sequence. ProForma delta +masses are supported (e.g. \code{"EM[+15.9949]EVEES[-79.9663]PEK"}).} \item{type}{\code{character} vector of target ions; possible values: \code{c("a", "b", "c", "x", "y", "z")}. Default is \code{type = c("b", "y")}.} @@ -98,6 +99,12 @@ calculateFragments("PQR", ## disable neutral loss completely calculateFragments("PQR", neutralLoss=NULL) + +## ProForma encoded delta masses +calculateFragments("EM[+15.9949]EVEES[-79.9663]PEK") +} +\references{ +\href{http://www.psidev.info/proforma}{HUPO-PSI ProForma specification} } \author{ Sebastian Gibb \href{mailto:mail@sebastiangibb.de}{mail@sebastiangibb.de} diff --git a/tests/testthat/test_proforma-parser.R b/tests/testthat/test_proforma-parser.R new file mode 100644 index 0000000..dae5a67 --- /dev/null +++ b/tests/testthat/test_proforma-parser.R @@ -0,0 +1,42 @@ +test_that("proforma clean sequences works", { + expect_identical( + .proforma_clean_sequences( + c("EM[+15.9949]EVEES[+79.9663]PEK", "EM[+15.995]EVEES[-18.01]PEK") + ), + c("EMEVEESPEK", "EMEVEESPEK") + ) +}) + +test_that("proforma delta masses without prefixes are supported", { + expect_equal( + .proforma_delta_masses( + c("EMEVEESPEK", "EM[+15.995]EVEESPEK") + ), + list( + rep(0, 10), + c(0, 15.995, 0, 0, 0, 0, 0, 0, 0, 0) + ) + ) + expect_equal( + .proforma_delta_masses( + c("EM[+15.9949]EVEES[+79.9663]PEK", "EM[+15.995]EVEES[-18.01]PEK") + ), + list( + c(0, 15.9949, 0, 0, 0, 0, 79.9663, 0, 0, 0), + c(0, 15.995, 0, 0, 0, 0, -18.01, 0, 0, 0) + ) + ) +}) + +test_that("proforma delta masses with prefixes are supported", { + expect_equal( + .proforma_delta_masses( + c("EM[U:+15.9949]EVEES[M:+79.9663]PEK", + "EM[X:+15.995]EVEES[R:-18.01]PEK") + ), + list( + c(0, 15.9949, 0, 0, 0, 0, 79.9663, 0, 0, 0), + c(0, 15.995, 0, 0, 0, 0, -18.01, 0, 0, 0) + ) + ) +})