From 6845b2723166c9cb1813db272ef42c31daaac802 Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Thu, 31 Oct 2024 10:01:12 +0100 Subject: [PATCH 01/20] doc: update Description and SystemRequirements --- DESCRIPTION | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 270b79a..7315cc2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: rawrr Type: Package Title: Direct Access to Orbitrap Data and Beyond -Version: 1.15.1 +Version: 1.15.2.x Authors@R: c(person("Christian", "Panse", email = "cp@fgcz.ethz.ch", role = c("aut", "cre"), @@ -25,16 +25,16 @@ Suggests: rmarkdown, tartare (>= 1.5), testthat -Description: This package wraps the functionality - of the RawFileReader .NET assembly. Within the R environment, +Description: This package wraps the functionality of the + Thermo Fisher Scientic RawFileReader .NET 8.0 assembly. + Within the R environment, spectra and chromatograms are represented by S3 objects. The package provides basic functions to download and install the required third-party libraries. The package is developed, tested, and used at the Functional Genomics Center Zurich, Switzerland. License: GPL-3 -SystemRequirements: mono-runtime 4.x or higher (including System.Data library) - on Linux/macOS, .Net Framework (>= 4.5.1) on Microsoft Windows. +SystemRequirements: .NET 8.0 URL: https://github.com/fgcz/rawrr/ BugReports: https://github.com/fgcz/rawrr/issues Encoding: UTF-8 From ad53e703ca497f986f9015e0767649c23164d07a Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Thu, 31 Oct 2024 10:17:20 +0100 Subject: [PATCH 02/20] refactor: remove .checkDllInMonoPath --- R/dotNetAssembly.R | 59 ++++++++++------------------------------------ 1 file changed, 12 insertions(+), 47 deletions(-) diff --git a/R/dotNetAssembly.R b/R/dotNetAssembly.R index 46ac310..1398745 100644 --- a/R/dotNetAssembly.R +++ b/R/dotNetAssembly.R @@ -3,16 +3,6 @@ # Test if \code{rawrr.exe} .NET assembly is working .isAssemblyWorking <- function(FUN = stop, exe = .rawrrAssembly()){ - if (Sys.info()['sysname'] %in% c("Darwin", "Linux")){ - if (Sys.which('mono') == ""){ - msg <- c("The cross platform, open source .NET framework (mono) is not available.\n", - "Consider to install 'apt-get install mono-runtime' on Linux\n", - "or download/install from https://www.mono-project.com/.") - FUN(msg) - } - } - - .checkRawFileReaderDLLs(FUN) if (isFALSE(file.exists(exe))){ msg <- c("'rawrr.exe' not found.\n", @@ -24,15 +14,8 @@ # execute rawrr.exe assembly and keep output string rvs <- "?" - if (Sys.info()['sysname'] %in% c("Darwin", "Linux") && !exists('RAWRRDOTNET')){ - if (file.exists(exe) && Sys.which('mono') != ""){ - rvs <- system2(Sys.which('mono'), args = c(shQuote(exe)), - stdout = TRUE) - } - }else{ - if (file.exists(exe)){ - rvs <- system2(exe, stdout = TRUE) - } + if (file.exists(exe)){ + rvs <- system2(exe, stdout = TRUE) } # expect that output string @@ -41,11 +24,12 @@ FUN(msg) } - if(interactive() && isFALSE(.checkDllInMonoPath())){ stopifnot(.isRawFileReaderLicenseAccepted()) } + if(interactive()){ stopifnot(.isRawFileReaderLicenseAccepted()) } TRUE } +## TODO: recator .rawfileReaderDLLs <- function(){ # 'ThermoFisher.CommonCore.BackgroundSubtraction.dll', c( @@ -76,20 +60,6 @@ rawrrAssemblyPath <- function(){ return(d) } -#' Check if a file is contained in the environment variable \code{MONO_PATH}. -#' -#' @param dll a file name. -#' -#' @return a boolean -#' @export -.checkDllInMonoPath <- function(dll="ThermoFisher.CommonCore.Data.dll"){ - monoPath <- Sys.getenv("MONO_PATH", names=TRUE) - monoPath <- strsplit(monoPath, .Platform$path.sep)[[1]] - any(vapply(monoPath, function(d){ - file.exists(file.path(d, dll)) - }, FALSE)) -} - .checkRawFileReaderDLLs <- function(FUN=stop){ rv <- vapply(.rawfileReaderDLLs(), function(dll){ @@ -112,20 +82,15 @@ rawrrAssemblyPath <- function(){ .rawrrAssembly <- function(){ - f <- file.path(rawrrAssemblyPath(), 'rawrr.exe') - - if (exists('RAWRRDOTNET')){ - if (Sys.info()['sysname'] == "Darwin"){ - file.path(rawrr::rawrrAssemblyPath(), 'osx-x64', 'rawrr') -> f - } else if (Sys.info()['sysname'] == "Linux"){ - file.path(rawrr::rawrrAssemblyPath(), 'linux-x64', 'rawrr') -> f - } else { - file.path(rawrr::rawrrAssemblyPath(), 'win-x64', 'rawrr.exe') -> f - } - message("Using '", f, "' ...") + if (Sys.info()['sysname'] == "Darwin"){ + file.path(rawrr::rawrrAssemblyPath(), 'osx-x64', 'rawrr') -> f + } else if (Sys.info()['sysname'] == "Linux"){ + file.path(rawrr::rawrrAssemblyPath(), 'linux-x64', 'rawrr') -> f + } else { + file.path(rawrr::rawrrAssemblyPath(), 'win-x64', 'rawrr.exe') -> f } - - return(f) + message("Using '", f, "' ...") + return(f) } From 83d161e9c2c46f7e81da94210c02c4823cc4f93c Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Thu, 31 Oct 2024 11:09:27 +0100 Subject: [PATCH 03/20] doc: refactor --- INSTALL | 77 +++++++++++++++++++++++---------------------------------- 1 file changed, 31 insertions(+), 46 deletions(-) diff --git a/INSTALL b/INSTALL index 7dc148e..e10ee0e 100644 --- a/INSTALL +++ b/INSTALL @@ -1,83 +1,68 @@ # System requirements -## Linux (debian:10/ubuntu:20.04) +The `rawrr` executable will run out of the box. -In case you prefer to compile `rawrr.exe` from C# source code, please install -the mono compiler and xbuild by installing the following Linux packages: +I you want to build on your own follow the text below. -```{sh} -sudo apt-get install mono-mcs mono-xbuild -``` +## Compile and Link yourself -Otherwise, to execute the precompiled code, the following Linux packages are -sufficient: +In case you prefer to compile `rawrr.exe` from C# source code, please install +the .NET 8.0 -```{sh} -sudo apt-get install mono-runtime libmono-system-data4.0-cil -y -``` +### Linux (debian:10/ubuntu:20.04) (debian:12/ubuntu:24) -## macOS (Catalina/BigSur) +```{sh} +## DEPRECIATED: sudo apt-get install mono-mcs mono-xbuild +sudo apt-get install dotnet-sdk-8.0 ``` -brew install mono -``` - -or install from - -https://www.mono-project.com/ -## Microsoft Windows +### macOS (Catalina/BigSur/.../Sequoia) -Running the `rawrr.exe` will run out of the box. +https://dotnet.microsoft.com/en-us/download -If the native C# compiler is not available install mono from: +### Microsoft Windows -https://www.mono-project.com/ +https://dotnet.microsoft.com/en-us/download - -# Install the .NET assemblies +## Install the .NET assemblies assemblies aka Common Intermediate Language bytecode -the following files are required in -`r tools::R_user_dir("rawrr", which='cache')` -or in the `MONO_PATH` +In general, ThermoFisher.CommonCore dlls can be obtained through: -``` -ThermoFisher.CommonCore.Data.dll -ThermoFisher.CommonCore.MassPrecisionEstimator.dll -ThermoFisher.CommonCore.RawFileReader.dll -``` +https://github.com/thermofisherlsms/RawFileReader -The download and install can be done on all platforms using the command: -`r rawrr::installRawFileReaderDLLs()` +or -The in the package included C# source code that can compile into a rawrr.exe -file by calling `r rawrr:::buildRawrrExe()`. (is executed when the package -is loaded) +by contacting Jim Shofstahl using -if no C# compile and build tool is available, run +jim.Shofstahl@thermofisher.com -`r rawrr::installRawrrExe()` to download the rawrr.exe assembly. -On Windows, the decimal symbol has to be configured as a '.'! +## build -In general, ThermoFisher.CommonCore dlls can be obtained through: +* source nuget pkgs -https://github.com/thermofisherlsms/RawFileReader +* add libraries -or +* (cross)-dcompile and link -by contacting Jim Shofstahl using +``` +dotnet publish rawrr-dotnet.csproj --os osx -a x64 --output /Users/cp/Library/Caches/org.R-project.R/R/rawrr/rawrrassembly/osx-x64 +dotnet publish rawrr-dotnet.csproj --os win -a x64 --output /Users/cp/Library/Caches/org.R-project.R/R/rawrr/rawrrassembly/win-x64 +dotnet publish rawrr-dotnet.csproj --os linux -a x64 --output /Users/cp/Library/Caches/org.R-project.R/R/rawrr/rawrrassembly/linux-x64 +``` -jim.Shofstahl@thermofisher.com +## Docker The `Dockerfile` performing `R CMD build` and `R CMD check` requires ``` from bioconductor/bioconductor_docker:devel run apt-get update && apt-get install mono-mcs mono-xbuild -y -run apt-get install texlive-base texlive-latex-extra texinfo texlive-fonts-extra -y +run apt-get install dotnet-sdk-8.0 run R -q -e "BiocManager::install(c('BiocStyle', 'ExperimentHub', 'knitr', 'protViz', 'rmarkdown', 'tartare', 'testthat'))" ``` + From eaf9590e55c8571d0e37c909ea17245966cbfc2c Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Thu, 31 Oct 2024 11:10:37 +0100 Subject: [PATCH 04/20] doc: replace mono by dotnet in code snippets --- vignettes/rawrr.Rmd | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/vignettes/rawrr.Rmd b/vignettes/rawrr.Rmd index a170fcc..7f74c3f 100644 --- a/vignettes/rawrr.Rmd +++ b/vignettes/rawrr.Rmd @@ -59,10 +59,7 @@ If the environment variable `MONO_PATH` does not include a directory containing the RawFileReader .NET assemblies are installed in a directory derived by the `rawrr::rawrrAssemblyPath()` function. -```{r installAssemblies, echo=TRUE} -if (isFALSE(rawrr::.checkDllInMonoPath())){ - rawrr::installRawFileReaderDLLs() -} +```{r installRuntime, echo=TRUE} rawrr::installRawrrExe() ``` @@ -353,7 +350,7 @@ See also [#fgcz/rawDiag/issues/33](https://github.com/fgcz/rawDiag/issues/33). } ``` -# Session information {-} +# System and session information {-} ```{r sessioninfo, echo=FALSE} sessionInfo() @@ -361,12 +358,12 @@ sessionInfo() # Mono information {-} -```{bash monoinfo, echo=TRUE, error=TRUE} -mono --version +```{bash dotnet--info, echo=TRUE, error=TRUE} +dotnet --info ``` ```{bash pkginfo, echo=TRUE, error=TRUE} -pkg-config --cflags --libs mono-2 +dotnet nuget list source ``` # References {-} From f1b4362e3a496e77c8a4470f9277f9163a0c5030 Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Thu, 31 Oct 2024 18:09:00 +0100 Subject: [PATCH 05/20] refactor: adapt test cases for dotnet runtime --- inst/rawrrassembly/rawrr-dotnet.csproj | 17 ----------------- inst/rawrrassembly/rawrr.csproj | 25 +++++++++++-------------- tests/testthat/test-EH4547.R | 2 +- tests/testthat/test-spectrum.R | 13 ++++++++----- 4 files changed, 20 insertions(+), 37 deletions(-) delete mode 100644 inst/rawrrassembly/rawrr-dotnet.csproj diff --git a/inst/rawrrassembly/rawrr-dotnet.csproj b/inst/rawrrassembly/rawrr-dotnet.csproj deleted file mode 100644 index 5480680..0000000 --- a/inst/rawrrassembly/rawrr-dotnet.csproj +++ /dev/null @@ -1,17 +0,0 @@ - - - - Exe - net8.0 - rawrr - true - true - - - - - - - - - diff --git a/inst/rawrrassembly/rawrr.csproj b/inst/rawrrassembly/rawrr.csproj index 69ea68a..5480680 100644 --- a/inst/rawrrassembly/rawrr.csproj +++ b/inst/rawrrassembly/rawrr.csproj @@ -1,20 +1,17 @@ - + + + Exe + net8.0 rawrr - bin\ + true + true - - - - - - - - - - - - + + + + + diff --git a/tests/testthat/test-EH4547.R b/tests/testthat/test-EH4547.R index 038fa24..9ef4f93 100644 --- a/tests/testthat/test-EH4547.R +++ b/tests/testthat/test-EH4547.R @@ -22,6 +22,6 @@ test_that("check tic of EH4547", { expect_equal(length(x$times), length(x$intensities)) expect_equal(length(x$intensities), 995) - expect_true(sum(x$intensities) == 63682815178) + expect_equal(sum(x$intensities), 63682815178, tolerance = 10) } }) diff --git a/tests/testthat/test-spectrum.R b/tests/testthat/test-spectrum.R index 35cde2f..c0b4daf 100644 --- a/tests/testthat/test-spectrum.R +++ b/tests/testthat/test-spectrum.R @@ -50,12 +50,15 @@ test_that("check readSpectrum scan 23.", { package = 'rawrr') |> read.table(sep="\t", header=TRUE) - expect_true(sum(S$mZ %in% DF$m.z) >= 720) - expect_true(sum(S$intensity %in% DF$Intensity) >= 720) + ## mono + expect_true(sum(round(S$mZ, 3) %in% round(DF$m.z, 3)) >= 720) + expect_true(sum(round(S$intensity, 3) %in% round(DF$Intensity, 3)) >= 720) - lapply(DF$m.z[DF$Flags == "F"] %in% S$mZ, expect_true) - lapply(DF$m.z[DF$Flags == "M"] %in% S$mZ, expect_true) - lapply(DF$m.z[DF$Flags == "E"] %in% S$mZ, expect_false) + + mZ <- round(S$mZ, 3) + lapply(round(DF$m.z[DF$Flags == "F"], 3) %in% mZ, FUN = expect_true) + lapply(round(DF$m.z[DF$Flags == "M"], 3) %in% mZ, FUN = expect_true) + lapply(round(DF$m.z[DF$Flags == "E"], 3) %in% mZ, FUN = expect_false) }) From 8c6290ae4e92df4e25afc02f40d9caefd056b3a3 Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Thu, 31 Oct 2024 18:09:58 +0100 Subject: [PATCH 06/20] refactor: replace mono by dotnet; add benchmark --- vignettes/rawrr.Rmd | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/vignettes/rawrr.Rmd b/vignettes/rawrr.Rmd index 7f74c3f..4ecac76 100644 --- a/vignettes/rawrr.Rmd +++ b/vignettes/rawrr.Rmd @@ -252,9 +252,6 @@ fit <- lm(rtFittedAPEX ~ iRTscore) The fitted model can then be inspected using standard procedures. Figure 6, shows a visual inspection by plotting observed RTs as a function of iRT score together with the fitted model regression line. The corresponding R-squared indicates that the RTs behave highly linear. This is expected since the iRT peptides were separated on a 20 min linear gradient from 5% buffer B to 35% buffer B using C18 reversed-phase material (the change rate is therefore constant at 1.5% per minute). The magnitude of the slope parameter (b) is a direct equivalent of this gradient change rate. The intercept (a) is equal to the predicted RT of iRT peptide `GAGSSEPVTGLDAK` since it was defined to have a zero score on the iRT scale. - - - ```{r iRTscoreFitPlot, fig.small=TRUE, echo=FALSE, fig.cap="iRT regression. Plot shows observed peptide RTs as a function of iRT scores and fitted regression line of corresponding linear model obtained by ordinary least squares (OLS) regression."} # iRTscoreFitPlot plot(rtFittedAPEX ~ iRTscore, @@ -277,6 +274,15 @@ An extended and dynamic version of the above use cases can be found at (https:// The R script that renders the html page is also available as supplementary information. The correspoding [R markdown file](https://github.com/fgcz/rawrr/blob/master/vignettes/JPR_supplement.Rmd) is part of the `rawrr` package and can be processed locally after downloading a snapshot of the above described input data from [MSV000086542](https://massive.ucsd.edu/ProteoSAFe/dataset.jsp?accession=MSV000086542) [@MSV000086542]. In summary, this shows how scalable analysis pipelines can be constructed starting from basic building blocks. It demonstrates that `rawrr`'s data access mechanism works for all types of Orbitrap instrument models. +## Benchmark + +```{r benchmark, message = FALSE, fig.cap="Runtime versus n Random selected scans.", fig.small=TRUE} +1:4 |> + lapply(FUN=function(x){rawrr::sampleFilePath() |> rawrr:::.benchmark()}) |> + Reduce(f=rbind) -> S +boxplot(S$runTimeInSec ~ S$count, log='y') +``` + # Conclusions Our R package `rawrr` provides direct access to spectral data stored in Thermo Fisher Scientific raw-formatted binary files, thereby eliminating the need for unfavorable conversion to exchange formats. Within the `R` environment, spectral data is presented by using only two non-standard objects representing data items well known to analytical scientists (mass spectrum and mass chromatogram). This design choice makes data handling relatively easy and intuitive and requires little knowledge about internal/technical details of the implementation. By using vendor API methods whenever possible, we nevertheless made sure that ease-of-use does not impair performance. We also emphasize that our implementation aligns well with common `R` conventions and styles. From 10b54e20653011e3c990d2d11f9338729a76fc1a Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Thu, 31 Oct 2024 18:10:54 +0100 Subject: [PATCH 07/20] refactor: make download for dotnet work --- R/dotNetAssembly.R | 171 +++++++++++++++++++++------------------------ 1 file changed, 81 insertions(+), 90 deletions(-) diff --git a/R/dotNetAssembly.R b/R/dotNetAssembly.R index 1398745..15610f2 100644 --- a/R/dotNetAssembly.R +++ b/R/dotNetAssembly.R @@ -29,7 +29,7 @@ } -## TODO: recator +## TODO: refactor .rawfileReaderDLLs <- function(){ # 'ThermoFisher.CommonCore.BackgroundSubtraction.dll', c( @@ -61,10 +61,11 @@ rawrrAssemblyPath <- function(){ } +## TODO: refactor .checkRawFileReaderDLLs <- function(FUN=stop){ rv <- vapply(.rawfileReaderDLLs(), function(dll){ userFileDllPath <- file.path(rawrrAssemblyPath(), dll) - dllExists <- file.exists(userFileDllPath) || .checkDllInMonoPath(dll) + dllExists <- file.exists(userFileDllPath) if (isFALSE(dllExists)){ message(sprintf("'%s' is missing.", dll)) } @@ -89,7 +90,6 @@ rawrrAssemblyPath <- function(){ } else { file.path(rawrr::rawrrAssemblyPath(), 'win-x64', 'rawrr.exe') -> f } - message("Using '", f, "' ...") return(f) } @@ -97,109 +97,91 @@ rawrrAssemblyPath <- function(){ #' URL for Thermo Fisher .NET assemblies #' #' @return an URL -#' @export .thermofisherlsmsUrl <- function(){ - "https://github.com/thermofisherlsms/RawFileReader/raw/main/Libs/Net471/" + # "https://github.com/thermofisherlsms/RawFileReader/tree/main/Libs/NetCore/Net8/" + "https://github.com/thermofisherlsms/RawFileReader/raw/refs/heads/main/Libs/NetCore/Net8/" } -#' Download and install the New RawFileReader from Thermo Fisher Scientific .Net -#' assemblies i +#' Download and install the Thermo Fisher Scientific .NET 8.0 nupkgs #' -#' @description -#' Download and install the New RawFileReader from Thermo Fisher Scientific .Net -#' assemblies in -#' the directory provided by \code{rawrrAssemblyPath()}. -#' -#' -#' @param ... other parameter for \code{download.file} -#' @param sourceUrl url of New RawFileReader from Thermo Fisher Scientific -#' assemblies. +#' @param sourceUrl url of nupkgs. +#' @param force if \code{TRUE} it will overwrite the pkgs. #' #' @aliases Thermo #' @aliases ThermoFisher #' @aliases ThermoFisherScientific #' #' @details -#' The console application assembly \code{rawrr.exe} requires three -#' assemplies: +#' The console application assembly \code{rawrr.exe} requires: #' \itemize{ #' \item {\code{ThermoFisher.CommonCore.Data.dll}, } #' \item{\code{ThermoFisher.CommonCore.MassPrecisionEstimator.dll}, and} #' \item{ThermoFisher.CommonCore.RawFileReader.dll} #' }. #' -#' The \code{rawrr.exe} assembly can be built from C# source code by using the -#' \code{msbuild} tool shipped by the \url{https://www.mono-project.com} or by -#' Microsoft's .NET SDK \url{https://dotnet.microsoft.com} on Linux, Microsoft, -#' and macOS. -#' -#' If no build tool and C# compiler (\code{csc} or \code{msc}) are available or -#' the build process fails, you can download \code{rawrr.exe} assembly from the -#' authors' site. -#' -#' @seealso \link{buildRawrrExe} and \link{installRawrrExe} -#' #' @references \itemize{ #' \item{\url{https://www.mono-project.com/docs/advanced/assemblies-and-the-gac/}} #' \item{\url{https://planetorbitrap.com/rawfilereader}} #' \item{\doi{10.1021/acs.jproteome.0c00866}} #' } #' -#' @author Christian Panse , 2021 +#' @author Christian Panse , 2021, 2024 #' #' @return An (invisible) vector of integer code, 0 for success and non-zero for #' failure. For the "wget" and "curl" methods this is the status code returned #' by the external program. #' -#' @export installRawFileReaderDLLs #' @importFrom utils download.file -#' -#' @examples -#' # to install all assemblies -#' \donttest{ -#' rawrr::installRawFileReaderDLLs() -#' rawrr::buildRawrrExe() || rawrr::installRawrrExe() -#' } -# TODO(cp): rename installThermoFisherScientificRawFileReaderAssemblyDLLs() -installRawFileReaderDLLs <- - function(sourceUrl = .thermofisherlsmsUrl(), ...){ +.downloadNupkgs <- function(sourceUrl = .thermofisherlsmsUrl(), force = TRUE){ rawfileReaderDLLsPath <- rawrrAssemblyPath() + + if (isFALSE(dir.exists(rawfileReaderDLLsPath))){ + dir.create(rawfileReaderDLLsPath, recursive = TRUE) + } if (isTRUE(dir.exists(rawfileReaderDLLsPath))){ - msg <- sprintf("removing DLL files in directory '%s'", rawfileReaderDLLsPath) + msg <- sprintf("removing nupkgs files in directory '%s'", rawfileReaderDLLsPath) message(msg) file.remove(file.path(rawrrAssemblyPath(), - list.files(rawrrAssemblyPath(), pattern="\\.dll$"))) + list.files(rawrrAssemblyPath(), pattern="\\.nupkg$"))) } - if (isFALSE(dir.exists(rawfileReaderDLLsPath))){ - dir.create(rawfileReaderDLLsPath, recursive = TRUE) - } - - if(interactive()){ stopifnot(.isRawFileReaderLicenseAccepted()) } - - rv <- vapply(.rawfileReaderDLLs(), function(dll){ - destfile <- file.path(rawfileReaderDLLsPath, dll) - download.file(file.path(sourceUrl, dll), - destfile=destfile, mode='wb', ...) - }, 0) + c('ThermoFisher.CommonCore.BackgroundSubtraction.8.0.6.nupkg', + 'ThermoFisher.CommonCore.RandomAccessReaderPlugin.8.0.6.nupkg', + 'ThermoFisher.CommonCore.Data.8.0.6.nupkg', + 'ThermoFisher.CommonCore.RawfileReader.8.0.6.nupkg', + 'ThermoFisher.CommonCore.MassPrecisionEstimator.8.0.6.nupkg') |> vapply(FUN = function(nupkg){ + destfile <- file.path(rawfileReaderDLLsPath, nupkg) + download.file(file.path(sourceUrl, nupkg), + destfile = destfile, mode='wb') + }, 0) -> rv + rv } +#' dotnet nuget add source /Users/cp/Library/Caches/org.R-project.R/R/rawrr/rawrrassembly/ +#' dotnet nuget remove source "Package source 1" +#' dotnet nuget list source +#' dotnet add package ThermoFisher.CommonCore.MassPrecisionEstimator +.addNupkgSource <- function(){ +} +.addPackages <- function(){ +} #' Download and install the \code{rawrr.exe} console application #' -#' @description downloads and installs the \code{rawrr.exe} .Net assembly in +#' @description downloads and installs the \code{rawrr.exe} .NET assembly in #' the directory provided by \code{rawrrAssemblyPath()}. #' #' @details The console application \code{rawrr.exe} is used by the package's #' reader functions through a \link{system2} call. #' #' @param sourceUrl url of \code{rawrr.exe} assembly. +#' @param force if \code{TRUE} it will overwrite the assembly #' @param ... other parameter for \code{download.file}. #' #' @return An integer code, 0 for success and non-zero for @@ -210,24 +192,52 @@ installRawFileReaderDLLs <- #' @aliases rawrr.exe #' @export installRawrrExe installRawrrExe <- - function (sourceUrl = "https://github.com/fgcz/rawrr/releases/download/1.9.2/rawrr.1.9.2.exe", - ...) - { - - - if (isFALSE(dir.exists(rawrrAssemblyPath()))){ - dir.create(rawrrAssemblyPath(), recursive = TRUE) - } - - rawrrAssembly <- .rawrrAssembly() - - rv = download.file(sourceUrl, destfile = rawrrAssembly, mode='wb', ...) - - message(sprintf("MD5 %s %s", tools::md5sum(rawrrAssembly), rawrrAssembly)) - rv + function (sourceUrl = "https://fgcz-ms.uzh.ch/~cpanse/rawrr/dotnet/", + force = FALSE, + ...) { + rawrrAssembly <- .rawrrAssembly() + if (file.exists(rawrrAssembly) && isFALSE(force)){ + ## TODO: if interactive ask to override + if (interactive()){ + response <- readline(prompt = sprintf("Assembly exists. Do you want to overwrite it? [Y/n]: ")) + if (tolower(response) == "y"){ + + }else{ + return() + } + } + } + + if (isFALSE(dir.exists(rawrrAssemblyPath()))) { + dir.create(rawrrAssemblyPath(), recursive = TRUE) } + if (Sys.info()["sysname"] == "Darwin") { + sourceUrl <- file.path(sourceUrl, "osx-x64", "rawrr") + } + else if (Sys.info()["sysname"] == "Linux") { + sourceUrl <- file.path(sourceUrl, "linux-x64", "rawrr") + } + else { + sourceUrl <- file.path(sourceUrl, "win-x64", "rawrr.exe") + } + message("Overwrite sourceUrl to ", sourceUrl) + + + dir.create(dirname(rawrrAssembly), recursive = TRUE, showWarnings = FALSE) + rv = download.file(sourceUrl, destfile = rawrrAssembly, mode = "wb", + ...) + Sys.chmod(rawrrAssembly, mode = "0777", use_umask = TRUE) + + message(sprintf("MD5 %s %s", tools::md5sum(rawrrAssembly), rawrrAssembly)) + + rv +} + .buildOnLoad <- function(){ + return() + + ## TODO: # nothing to do if (file.exists(.rawrrAssembly())){ @@ -252,26 +262,7 @@ installRawrrExe <- } -.determineAdditionalLibPath <- function(){ - monoPaths <- strsplit(Sys.getenv("MONO_PATH"), .Platform$path.sep)[[1]] - pgkPath <- rawrrAssemblyPath() - - dlls <- .rawfileReaderDLLs() - - rv <- lapply(c(pgkPath, monoPaths, '/usr/local/lib'), function(d){ - if(all(vapply(dlls, function(x){file.exists(file.path(d, x))}, TRUE))){ - return(d) - }else{NULL} - }) - - rv <- rv[!vapply(rv, is.null, TRUE)] - - if(length(rv) > 0) - return (rv[[1]]) - - NULL -} - +## TODO: make it work for dotnet #' Build \code{rawrr.exe} console application. #' #' @description builds \code{rawrr.exe} file from C# source code requiring From bf4a914aceb0daf3f14bd8e70c3f5c07ccbbe525 Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Thu, 31 Oct 2024 18:11:15 +0100 Subject: [PATCH 08/20] cosmetics --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7315cc2..d1a5371 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: rawrr Type: Package Title: Direct Access to Orbitrap Data and Beyond -Version: 1.15.2.x +Version: 1.15.2.0 Authors@R: c(person("Christian", "Panse", email = "cp@fgcz.ethz.ch", role = c("aut", "cre"), @@ -39,6 +39,6 @@ URL: https://github.com/fgcz/rawrr/ BugReports: https://github.com/fgcz/rawrr/issues Encoding: UTF-8 NeedsCompilation: no -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 biocViews: MassSpectrometry, Proteomics, Metabolomics, Infrastructure, Software VignetteBuilder: knitr From abaf911592aee48f7435cb67e552e225f8e42f5f Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Thu, 31 Oct 2024 18:12:14 +0100 Subject: [PATCH 09/20] refactor: clean up code by removing mono runtime snippets --- R/rawrr.R | 79 +++++++------------------------------------------------ 1 file changed, 10 insertions(+), 69 deletions(-) diff --git a/R/rawrr.R b/R/rawrr.R index f26734f..50c2ada 100644 --- a/R/rawrr.R +++ b/R/rawrr.R @@ -1,6 +1,7 @@ .monoInfo <-function(){ # system2("mcs", "--version", stdout = TRUE) - system2("mono", "-V", stdout = TRUE) + # system2("mono", "-V", stdout = TRUE) + system2("dotnet", "--version", stdout = TRUE) } .checkReaderFunctions <- function(rawfile = sampleFilePath()){ @@ -85,11 +86,8 @@ function(rawfile, input, rawrrArgs="scans", tmpdir=tempdir(), removeTempfile=TRUE){ - mono <- if(Sys.info()['sysname'] %in% c("Darwin", "Linux")) TRUE else FALSE exe <- .rawrrAssembly() - - tfi <- tempfile(tmpdir=tmpdir, fileext = ".txt") tfo <- tempfile(tmpdir=tmpdir, fileext = ".R") tfstdout <- tempfile(tmpdir=tmpdir, fileext = ".stdout") @@ -100,23 +98,7 @@ stop(paste0("No input file '", tfi, "' available!")) } - - if (mono && !exists('RAWRRDOTNET')){ - if (system2(command = "/usr/bin/which", args = c("mono"), - stderr = FALSE, stdout = FALSE) != 0){ - stop("mono is not available; please check https://www.mono-project.com/") - } - rvs <- system2(Sys.which("mono"), args = c(shQuote(exe), - shQuote(rawfile), - rawrrArgs, shQuote(tfi), - shQuote(tfo)), - stdout = tfstdout, - stderr = tfstderr) - }else{ - rvs <- system2(exe, args = c( shQuote(rawfile), - rawrrArgs, shQuote(tfi), - shQuote(tfo)), ) - } + rvs <- system2(exe, args = c( shQuote(rawfile), rawrrArgs, shQuote(tfi), shQuote(tfo)), ) if (isFALSE(file.exists(tfo))){ errmsg <- sprintf("Rcode file to parse does not exist. '%s' failed for an unknown reason. @@ -251,21 +233,10 @@ readIndex <- function (rawfile) .isAssemblyWorking() rawfile <- normalizePath(rawfile) .checkRawFile(rawfile) - mono <- if (Sys.info()["sysname"] %in% c("Darwin", "Linux")) - TRUE - else FALSE exe <- .rawrrAssembly() - if (mono && !exists('RAWRRDOTNET')){ - con <- textConnection(system2(Sys.which("mono"), - args = c(shQuote(exe), - shQuote(rawfile), "index"), - stdout = TRUE)) - } - else { - con <- textConnection(system2(exe, args = c(shQuote(rawfile), - "index"), stdout = TRUE)) - } + con <- textConnection(system2(exe, args = c(shQuote(rawfile), "index"), stdout = TRUE)) + DF <- read.table(con, header = TRUE, comment.char = "#", sep = ";", na.strings = "-1", colClasses = c("integer", "character", "numeric", "numeric", "character", "integer", "integer", "integer", "numeric")) @@ -285,7 +256,7 @@ filter <- function(rawfile, filter = "ms", precision = 10, tmpdir=tempdir()){ .isAssemblyWorking() rawfile <- normalizePath(rawfile) .checkRawFile(rawfile) - mono <- if(Sys.info()['sysname'] %in% c("Darwin", "Linux")) TRUE else FALSE + exe <- .rawrrAssembly() @@ -295,21 +266,11 @@ filter <- function(rawfile, filter = "ms", precision = 10, tmpdir=tempdir()){ cmd <- exe - if (exists('RAWRRDOTNET')){ mono <<- FALSE} - if (mono){ - rvs <- system2(Sys.which("mono"), - args = c(shQuote(exe), shQuote(rawfile), - "filter", shQuote(filter), shQuote(precision), - shQuote(tfo)), - stderr = tfstderr, - stdout=tfstdout) - }else{ - rvs <- system2(exe, + rvs <- system2(exe, args = c( shQuote(rawfile), "filter", shQuote(filter), shQuote(precision), shQuote(tfo)), stderr = tfstderr, stdout=tfstdout) - } if (isFALSE(file.exists(tfo))){ errmsg <- sprintf("Output file to read does not exist. '%s' failed for an unknown reason. @@ -625,9 +586,7 @@ readSpectrum <- function(rawfile, scan = NULL, tmpdir = tempdir(), filter = "ms", type='tic', tmpdir = tempdir()){ - mono <- if(Sys.info()['sysname'] %in% c("Darwin", "Linux")) TRUE else FALSE exe <- .rawrrAssembly() - tfstdout <- tempfile(fileext = ".stdout", tmpdir = tmpdir) tfstderr <- tempfile(fileext = ".stderr", tmpdir = tmpdir) @@ -636,11 +595,7 @@ readSpectrum <- function(rawfile, scan = NULL, tmpdir = tempdir(), system2args <- c(shQuote(rawfile), "chromatogram", shQuote(filter), tfcsv) - if (mono && !exists('RAWRRDOTNET')){ - rvs <- system2("mono", args = c(shQuote(exe), system2args), stdout=tfstdout, stderr=tfstderr) - }else{ - rvs <- system2(exe, args = system2args, stdout=tfstdout, stderr=tfstderr) - } + rvs <- system2(exe, args = system2args, stdout=tfstdout, stderr=tfstderr) if (isFALSE(file.exists(tfcsv))) { @@ -1594,7 +1549,6 @@ readTrailer <- function(rawfile, label = NULL) { rawfile <- normalizePath(rawfile) .checkRawFile(rawfile) - mono <- if(Sys.info()['sysname'] %in% c("Darwin", "Linux")) TRUE else FALSE exe <- .rawrrAssembly() @@ -1602,27 +1556,14 @@ readTrailer <- function(rawfile, label = NULL) { if (is.null(label)){ # should return all available trailer label - if (mono && !exists('RAWRRDOTNET')){ - con <- textConnection(system2(Sys.which("mono"), - args = c(shQuote(exe), shQuote(rawfile), "trailer"), - stdout = TRUE)) - }else{ - con <- textConnection(system2(exe, + con <- textConnection(system2(exe, args = c( shQuote(rawfile), "trailer"), stdout = TRUE)) - } }else{ # use case for providing a trailer label - if (mono && !exists('RAWRRDOTNET')){ - con <- textConnection(system2(Sys.which("mono"), - args = c(shQuote(exe), shQuote(rawfile), - "trailer", shQuote(label)), - stdout = TRUE)) - }else{ - con <- textConnection(system2(exe, + con <- textConnection(system2(exe, args = c(shQuote(rawfile), "trailer", shQuote(label)), stdout = TRUE)) - } } scan(con, what=character(), From c8e3e58b3ad7191e5abe99728c9b43f7ad173baf Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Thu, 31 Oct 2024 18:12:49 +0100 Subject: [PATCH 10/20] feat: add benchmark function --- R/benchmark.R | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 R/benchmark.R diff --git a/R/benchmark.R b/R/benchmark.R new file mode 100644 index 0000000..5da179d --- /dev/null +++ b/R/benchmark.R @@ -0,0 +1,21 @@ +#R + +#' f <- "/Users/cp/Library/Caches/org.R-project.R/R/ExperimentHub/46314c3933e2_4590.raw" +.benchmark <- function(f){ + stopifnot(file.exists(f)) + + rawrr::readFileHeader(f)$`Number of scans` -> n + + 2**(seq(0, floor(log(n, 2)))) |> + lapply(FUN = function(i){ + sample(n, size = i) |> sort() -> idx + message("Reading ", i, "random scans from ", f) + start.time <- Sys.time() + rawrr::readSpectrum(f, scan = idx) -> S + end.time <- Sys.time() + message("in ", end.time - start.time) + data.frame(count = i, + size = object.size(S) |> as.integer(), + runTimeInSec = as.double(difftime(end.time, start.time, units='secs'))) + }) |> Reduce(f = rbind) +} From 97d00ffc8ebc25fc0d895c41616b1b732d3e8b9a Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Thu, 31 Oct 2024 18:13:26 +0100 Subject: [PATCH 11/20] doc: run roxygen2::roxygenize() --- NAMESPACE | 2 - man/dot-benchmark.Rd | 11 ++++++ man/dot-checkDllInMonoPath.Rd | 17 --------- man/dot-downloadNupkgs.Rd | 42 +++++++++++++++++++++ man/installRawFileReaderDLLs.Rd | 66 --------------------------------- man/installRawrrExe.Rd | 7 +++- man/rawrr-package.Rd | 7 +++- 7 files changed, 64 insertions(+), 88 deletions(-) create mode 100644 man/dot-benchmark.Rd delete mode 100644 man/dot-checkDllInMonoPath.Rd create mode 100644 man/dot-downloadNupkgs.Rd delete mode 100644 man/installRawFileReaderDLLs.Rd diff --git a/NAMESPACE b/NAMESPACE index 0b41dc5..20df44c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,13 +6,11 @@ S3method(plot,rawrrSpectrum) S3method(print,rawrrSpectrum) S3method(summary,rawrrChromatogram) S3method(summary,rawrrSpectrum) -export(.checkDllInMonoPath) export(.thermofisherlsmsUrl) export(basePeak) export(buildRawrrExe) export(dependentScan) export(faimsVoltageOn) -export(installRawFileReaderDLLs) export(installRawrrExe) export(is.rawrrChromatogram) export(is.rawrrSpectrum) diff --git a/man/dot-benchmark.Rd b/man/dot-benchmark.Rd new file mode 100644 index 0000000..6242a17 --- /dev/null +++ b/man/dot-benchmark.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/benchmark.R +\name{.benchmark} +\alias{.benchmark} +\title{f <- "/Users/cp/Library/Caches/org.R-project.R/R/ExperimentHub/46314c3933e2_4590.raw"} +\usage{ +.benchmark(f) +} +\description{ +f <- "/Users/cp/Library/Caches/org.R-project.R/R/ExperimentHub/46314c3933e2_4590.raw" +} diff --git a/man/dot-checkDllInMonoPath.Rd b/man/dot-checkDllInMonoPath.Rd deleted file mode 100644 index d7063b7..0000000 --- a/man/dot-checkDllInMonoPath.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dotNetAssembly.R -\name{.checkDllInMonoPath} -\alias{.checkDllInMonoPath} -\title{Check if a file is contained in the environment variable \code{MONO_PATH}.} -\usage{ -.checkDllInMonoPath(dll = "ThermoFisher.CommonCore.Data.dll") -} -\arguments{ -\item{dll}{a file name.} -} -\value{ -a boolean -} -\description{ -Check if a file is contained in the environment variable \code{MONO_PATH}. -} diff --git a/man/dot-downloadNupkgs.Rd b/man/dot-downloadNupkgs.Rd new file mode 100644 index 0000000..076487f --- /dev/null +++ b/man/dot-downloadNupkgs.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dotNetAssembly.R +\name{.downloadNupkgs} +\alias{.downloadNupkgs} +\alias{Thermo} +\alias{ThermoFisher} +\alias{ThermoFisherScientific} +\title{Download and install the Thermo Fisher Scientific .NET 8.0 nupkgs} +\usage{ +.downloadNupkgs(sourceUrl = .thermofisherlsmsUrl(), force = TRUE) +} +\arguments{ +\item{sourceUrl}{url of nupkgs.} + +\item{force}{if \code{TRUE} it will overwrite the pkgs.} +} +\value{ +An (invisible) vector of integer code, 0 for success and non-zero for +failure. For the "wget" and "curl" methods this is the status code returned +by the external program. +} +\description{ +Download and install the Thermo Fisher Scientific .NET 8.0 nupkgs +} +\details{ +The console application assembly \code{rawrr.exe} requires: +\itemize{ +\item {\code{ThermoFisher.CommonCore.Data.dll}, } +\item{\code{ThermoFisher.CommonCore.MassPrecisionEstimator.dll}, and} +\item{ThermoFisher.CommonCore.RawFileReader.dll} +}. +} +\references{ +\itemize{ + \item{\url{https://www.mono-project.com/docs/advanced/assemblies-and-the-gac/}} + \item{\url{https://planetorbitrap.com/rawfilereader}} + \item{\doi{10.1021/acs.jproteome.0c00866}} +} +} +\author{ +Christian Panse , 2021, 2024 +} diff --git a/man/installRawFileReaderDLLs.Rd b/man/installRawFileReaderDLLs.Rd deleted file mode 100644 index a1c953d..0000000 --- a/man/installRawFileReaderDLLs.Rd +++ /dev/null @@ -1,66 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dotNetAssembly.R -\name{installRawFileReaderDLLs} -\alias{installRawFileReaderDLLs} -\alias{Thermo} -\alias{ThermoFisher} -\alias{ThermoFisherScientific} -\title{Download and install the New RawFileReader from Thermo Fisher Scientific .Net -assemblies i} -\usage{ -installRawFileReaderDLLs(sourceUrl = .thermofisherlsmsUrl(), ...) -} -\arguments{ -\item{sourceUrl}{url of New RawFileReader from Thermo Fisher Scientific -assemblies.} - -\item{...}{other parameter for \code{download.file}} -} -\value{ -An (invisible) vector of integer code, 0 for success and non-zero for -failure. For the "wget" and "curl" methods this is the status code returned -by the external program. -} -\description{ -Download and install the New RawFileReader from Thermo Fisher Scientific .Net -assemblies in -the directory provided by \code{rawrrAssemblyPath()}. -} -\details{ -The console application assembly \code{rawrr.exe} requires three -assemplies: -\itemize{ -\item {\code{ThermoFisher.CommonCore.Data.dll}, } -\item{\code{ThermoFisher.CommonCore.MassPrecisionEstimator.dll}, and} -\item{ThermoFisher.CommonCore.RawFileReader.dll} -}. - -The \code{rawrr.exe} assembly can be built from C# source code by using the -\code{msbuild} tool shipped by the \url{https://www.mono-project.com} or by -Microsoft's .NET SDK \url{https://dotnet.microsoft.com} on Linux, Microsoft, -and macOS. - -If no build tool and C# compiler (\code{csc} or \code{msc}) are available or -the build process fails, you can download \code{rawrr.exe} assembly from the -authors' site. -} -\examples{ -# to install all assemblies -\donttest{ -rawrr::installRawFileReaderDLLs() -rawrr::buildRawrrExe() || rawrr::installRawrrExe() -} -} -\references{ -\itemize{ - \item{\url{https://www.mono-project.com/docs/advanced/assemblies-and-the-gac/}} - \item{\url{https://planetorbitrap.com/rawfilereader}} - \item{\doi{10.1021/acs.jproteome.0c00866}} -} -} -\seealso{ -\link{buildRawrrExe} and \link{installRawrrExe} -} -\author{ -Christian Panse , 2021 -} diff --git a/man/installRawrrExe.Rd b/man/installRawrrExe.Rd index 4a3b1cc..f2e8cfb 100644 --- a/man/installRawrrExe.Rd +++ b/man/installRawrrExe.Rd @@ -6,13 +6,16 @@ \title{Download and install the \code{rawrr.exe} console application} \usage{ installRawrrExe( - sourceUrl = "https://github.com/fgcz/rawrr/releases/download/1.9.2/rawrr.1.9.2.exe", + sourceUrl = "https://fgcz-ms.uzh.ch/~cpanse/rawrr/dotnet/", + force = FALSE, ... ) } \arguments{ \item{sourceUrl}{url of \code{rawrr.exe} assembly.} +\item{force}{if \code{TRUE} it will overwrite the assembly} + \item{...}{other parameter for \code{download.file}.} } \value{ @@ -21,7 +24,7 @@ failure. For the "wget" and "curl" methods this is the status code returned by the external program. } \description{ -downloads and installs the \code{rawrr.exe} .Net assembly in +downloads and installs the \code{rawrr.exe} .NET assembly in the directory provided by \code{rawrrAssemblyPath()}. } \details{ diff --git a/man/rawrr-package.Rd b/man/rawrr-package.Rd index 12e2922..b3afee3 100644 --- a/man/rawrr-package.Rd +++ b/man/rawrr-package.Rd @@ -5,7 +5,7 @@ \alias{rawrr-package} \title{rawrr: Direct Access to Orbitrap Data and Beyond} \description{ -This package wraps the functionality of the RawFileReader .NET assembly. Within the R environment, spectra and chromatograms are represented by S3 objects. The package provides basic functions to download and install the required third-party libraries. The package is developed, tested, and used at the Functional Genomics Center Zurich, Switzerland. +This package wraps the functionality of the Thermo Fisher Scientic RawFileReader .NET 8.0 assembly. Within the R environment, spectra and chromatograms are represented by S3 objects. The package provides basic functions to download and install the required third-party libraries. The package is developed, tested, and used at the Functional Genomics Center Zurich, Switzerland. } \seealso{ Useful links: @@ -23,5 +23,10 @@ Authors: \item Tobias Kockmann \email{tobias.kockmann@fgcz.ethz.ch} (\href{https://orcid.org/0000-0002-1847-885X}{ORCID}) } +Other contributors: +\itemize{ + \item Leonardo Schwarz \email{Leonardo.Schwarz@fgcz.ethz.ch} (\href{https://orcid.org/0009-0003-1828-6924}{ORCID}) [contributor] +} + } \keyword{internal} From e3fe190787e1da047954e43c96c5edb7c267344b Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Fri, 1 Nov 2024 15:45:42 +0100 Subject: [PATCH 12/20] doc: add benchmark xlab --- vignettes/rawrr.Rmd | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vignettes/rawrr.Rmd b/vignettes/rawrr.Rmd index 4ecac76..57c0f53 100644 --- a/vignettes/rawrr.Rmd +++ b/vignettes/rawrr.Rmd @@ -280,7 +280,11 @@ The R script that renders the html page is also available as supplementary infor 1:4 |> lapply(FUN=function(x){rawrr::sampleFilePath() |> rawrr:::.benchmark()}) |> Reduce(f=rbind) -> S -boxplot(S$runTimeInSec ~ S$count, log='y') +boxplot(runTimeInSec ~ count, + data = S, + log='y', + xlab = 'number of random generated scan ids') +legend("topleft", Sys.info()['nodename'], cex = 1) ``` # Conclusions From 729fd5f471af35f04a85322b0c45e8cbf21ef224 Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Fri, 1 Nov 2024 15:46:21 +0100 Subject: [PATCH 13/20] doc: add man pages for benchmark and nupkgs function --- man/dot-addNupkgSource.Rd | 17 +++++++++++++++++ man/dot-benchmark.Rd | 6 +++--- 2 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 man/dot-addNupkgSource.Rd diff --git a/man/dot-addNupkgSource.Rd b/man/dot-addNupkgSource.Rd new file mode 100644 index 0000000..59f078a --- /dev/null +++ b/man/dot-addNupkgSource.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dotNetAssembly.R +\name{.addNupkgSource} +\alias{.addNupkgSource} +\title{dotnet nuget add source /Users/cp/Library/Caches/org.R-project.R/R/rawrr/rawrrassembly/ +dotnet nuget remove source "Package source 1" +dotnet nuget list source +dotnet add package ThermoFisher.CommonCore.MassPrecisionEstimator} +\usage{ +.addNupkgSource() +} +\description{ +dotnet nuget add source /Users/cp/Library/Caches/org.R-project.R/R/rawrr/rawrrassembly/ +dotnet nuget remove source "Package source 1" +dotnet nuget list source +dotnet add package ThermoFisher.CommonCore.MassPrecisionEstimator +} diff --git a/man/dot-benchmark.Rd b/man/dot-benchmark.Rd index 6242a17..96c356c 100644 --- a/man/dot-benchmark.Rd +++ b/man/dot-benchmark.Rd @@ -2,10 +2,10 @@ % Please edit documentation in R/benchmark.R \name{.benchmark} \alias{.benchmark} -\title{f <- "/Users/cp/Library/Caches/org.R-project.R/R/ExperimentHub/46314c3933e2_4590.raw"} +\title{benchmark execution time} \usage{ -.benchmark(f) +.benchmark(rawfile) } \description{ -f <- "/Users/cp/Library/Caches/org.R-project.R/R/ExperimentHub/46314c3933e2_4590.raw" +benchmark execution time } From 522cb6f5eb64ea9e5a2cff26ac062a771b01488f Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Sat, 2 Nov 2024 15:13:34 +0100 Subject: [PATCH 14/20] doc: figure cosmetics --- vignettes/rawrr.Rmd | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/vignettes/rawrr.Rmd b/vignettes/rawrr.Rmd index 57c0f53..73a3668 100644 --- a/vignettes/rawrr.Rmd +++ b/vignettes/rawrr.Rmd @@ -276,13 +276,16 @@ The R script that renders the html page is also available as supplementary infor ## Benchmark -```{r benchmark, message = FALSE, fig.cap="Runtime versus n Random selected scans.", fig.small=TRUE} -1:4 |> - lapply(FUN=function(x){rawrr::sampleFilePath() |> rawrr:::.benchmark()}) |> +```{r benchmark, message = FALSE, fig.cap="Runtime benchmark using `.rawrr::readSpectrum`.", fig.small=TRUE} +seq(1, 4) |> + lapply(FUN=function(x){rawrr::sampleFilePath() |> + rawrr:::.benchmark()}) |> Reduce(f=rbind) -> S + boxplot(runTimeInSec ~ count, data = S, - log='y', + log ='y', + sub = paste0("Overall runtime took ", sum(runTimeInSec), "seconds."), xlab = 'number of random generated scan ids') legend("topleft", Sys.info()['nodename'], cex = 1) ``` From 39038d20dde61ad821094038ad18371393f4d2aa Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Sat, 2 Nov 2024 15:43:25 +0100 Subject: [PATCH 15/20] doc: substitute rawrr with Bioc link --- vignettes/rawrr.Rmd | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/vignettes/rawrr.Rmd b/vignettes/rawrr.Rmd index 73a3668..a123a6e 100644 --- a/vignettes/rawrr.Rmd +++ b/vignettes/rawrr.Rmd @@ -42,11 +42,11 @@ knitr::opts_chunk$set(fig.wide = TRUE, fig.retina = 3, error=FALSE) Mass spectrometry-based proteomics and metabolomics are the preferred technologies to study the protein and metabolite landscape of complex biological systems. The Orbitrap mass analyzer is one of the key innovations that propelled the field by providing high-resolution accurate mass (HRAM) data on a chromatographic time scale. Driven by the need to analyze the resulting LC-MS data, several specialized software tools have been developed in the last decade. In the academic environment, [MaxQuant](https://maxquant.org/)[@Cox2008] and [Skyline](https://skyline.ms/project/home/begin.view?)[@MacLean2010] are by far the most popular ones. These software tools usually offer GUIs that control running predefined analysis templates/workflows, including free parameters that need to be defined by the user. In parallel, projects like [OpenMS](https://www.openms.de/)[@Rst2016] or `r BiocStyle::Githubpkg("levitsky/pyteomics")`[@Goloborodko2013] chose a fundamentally different approach. They aim at providing software libraries bound to specific programming languages like `C++` or `Python`. Naturally, these offer greater analytical flexibility but require programming skills from the end-user and have therefore not reached the popularity of their GUI counterparts. Proteomics and metabolomics specific libraries have also been developed for the [`R`](https://www.r-project.org/) statistical environment, but these mainly support high-level statistical analysis once the raw measurement data has undergone extensive preprocessing and aggregation by external software tools (often the GUI-based ones listed above). A typical example is the `R` package [MSstats](http://msstats.org/)[@Choi2014] for the statistical analysis of LC-MS experiments with complex designs or `r BiocStyle::Githubpkg("statOmics/MSqRob")`[@Goeminne2015]. MSstats can process MaxQuant or Skyline outputs and creates protein/peptide level estimates for whether the biological system shows statistically significant regulation. In a nutshell, these tools provide statistical postprocessing. Libraries that support working with the spectral data in `R` also exist, for instance, the Bioconductor package `r BiocStyle::Biocpkg('MSnbase')` [@Gatto2011]. However, they require conversion of raw data to exchange formats like [mzML](http://www.psidev.info/mzML), which is primarily supported by the [ProteoWizard](http://proteowizard.sourceforge.net/)[@Chambers2012] or `r BiocStyle::Githubpkg("compomics/ThermoRawFileParser")`[@ThermoRawFileParser] projects and its software tool `MSconvert`. -We strongly believe that a library providing raw data reading would finally close the gap and facilitate modular end-to-end analysis pipeline development in `R`. This could be of special interest to research environments/projects dealing with either big data analytics or scientists interested in code prototyping without formal computer science education. Another key aspect regarding multi-omics integration is the fact that high-throughput genomic data analysis is already done mostly in `R`. This is primarily due to the [Bioconductor project](https://www.bioconductor.org/)[@Huber2015] that currently provides >1900 open-source software packages, training and teaching, and a very active user and developer community. Having these thoughts in mind, we decided to implement our `R` package named `rawrr`. `rawrr` utilizes a vendor-provided API named `RawFileReader` [@rawfilereader] to access spectral data logged in proprietary Thermo Fisher Scientific raw files. These binary files are among others written by all Orbitrap mass spectrometers, unlocking an incredible amount of the recent global LC-MS data, also stored in public repositories like [ProteomeExchange](http://www.proteomexchange.org/). This manuscript presents a first package version/release and showcases its usage for bottom-up proteomics data analysis with a focus on Orbitrap data. +We strongly believe that a library providing raw data reading would finally close the gap and facilitate modular end-to-end analysis pipeline development in `R`. This could be of special interest to research environments/projects dealing with either big data analytics or scientists interested in code prototyping without formal computer science education. Another key aspect regarding multi-omics integration is the fact that high-throughput genomic data analysis is already done mostly in `R`. This is primarily due to the [Bioconductor project](https://www.bioconductor.org/)[@Huber2015] that currently provides >1900 open-source software packages, training and teaching, and a very active user and developer community. Having these thoughts in mind, we decided to implement our `R` package named `r BiocStyle::Biocpkg('rawrr')`. `r BiocStyle::Biocpkg('rawrr')` utilizes a vendor-provided API named `RawFileReader` [@rawfilereader] to access spectral data logged in proprietary Thermo Fisher Scientific raw files. These binary files are among others written by all Orbitrap mass spectrometers, unlocking an incredible amount of the recent global LC-MS data, also stored in public repositories like [ProteomeExchange](http://www.proteomexchange.org/). This manuscript presents a first package version/release and showcases its usage for bottom-up proteomics data analysis with a focus on Orbitrap data. # Implementation -Our implementation consists of two language layers, the top `R` layer and the hidden `C#` layer. Specifically, `R` functions requesting access to data stored in binary raw files (reader family functions listed in Table 1) invoke compiled `C#` wrapper methods using a system call. Calling a wrapper method typically results in the execution of methods defined in the `RawFileReader` dynamic link library provided by Thermo Fisher Scientific. Our precompiled wrapper methods are bundled in the `rawrr` executable file and shipped with the released `R` package. Running `rawrr.exe` requires the [Mono](https://www.mono-project.com/) environment on non-Microsoft operating systems. Mono is a cross platform, open source .NET framework. On Microsoft Windows the Microsoft .NET framework is typically already installed and sufficient. Our package also contains the `C#` source code (rawrr.cs), hopefully allowing other developers to follow and improve our code (open source). In order to return extracted data back to the `R` layer we use file I/O. More specifically, the extracted information is written to a temporary location on the harddrive, read back into memory and parsed into `R` objects. +Our implementation consists of two language layers, the top `R` layer and the hidden `C#` layer. Specifically, `R` functions requesting access to data stored in binary raw files (reader family functions listed in Table 1) invoke compiled `C#` wrapper methods using a system call. Calling a wrapper method typically results in the execution of methods defined in the `RawFileReader` dynamic link library provided by Thermo Fisher Scientific. Our precompiled wrapper methods are bundled in the `r BiocStyle::Biocpkg('rawrr')` executable file and shipped with the released `R` package. Running `rawrr.exe` requires the [Mono](https://www.mono-project.com/) environment on non-Microsoft operating systems. Mono is a cross platform, open source .NET framework. On Microsoft Windows the Microsoft .NET framework is typically already installed and sufficient. Our package also contains the `C#` source code (rawrr.cs), hopefully allowing other developers to follow and improve our code (open source). In order to return extracted data back to the `R` layer we use file I/O. More specifically, the extracted information is written to a temporary location on the harddrive, read back into memory and parsed into `R` objects. Since mass spectrometry typically uses two basic data items, the mass spectrum and the mass chromatogram, we decided to implement corresponding objects following `R`'s `S3` OOP system [@newS] named `rawrrSpectrum` and `rawrrChromatogram`. These objects function as simplistic interface to almost all data stored in raw-formatted files. The package provides functions to create and validate class instances. While class constructors primarily exist for (unit) testing purposes, instances are typically generated by the reader family of functions enumerated in Table 1 and returned as object sets (`rawrrSpectrumSet`, `rawrrChromatogramSet`). The names of objects encapsulated within `rawrrSpectrum` instances are keys returned by the `RawFileReader` API and the corresponding values become data parts of the objects, typically vectors of type `numeric`, `logical` or `character`. It needs to be mentioned that the `rawrrSpectrum` content partially depends on the instrument model and installed instrument control software version. For instance, the keys `FAIMS Voltage On:` and `FAIMS CV:` are only written by instruments that support FAIMS acquisition. We also implemented basic generics for printing and plotting of objects in base `R` to minimize dependencies. @@ -94,7 +94,7 @@ The following sections are inspired by real-world research/infrastructure projec H <- rawrr::readFileHeader(rawfile = rawfile) ``` -The Orbitrap detector has been a tremendous success story in MS, since it offers HRAM data on a time scale that is compatible with chromatographic analysis (LC-MS)[@Makarov2000] and is therefore heavily used in bottom-up proteomics. However, analyzing Orbitrap data in `R` has so far only been possible after raw data conversion to exchange formats like mz(X)ML. Unfortunately, conversion is accompanied by a loss of Orbitrap-specific information. This use case shows how easy it is to work directly with raw-formated Orbitrap data after installing our `R` package `rawrr` which applies vendor APIs for data access. We use a complete LC-MS run recorded on a `r H$"Instrument model"` by parallel reaction monitoring (PRM)[@Gallien2012] for demonstration purposes (File name: `r H$"RAW file"`). The `r H$"Time range"[2]` min run resulted in `r format(H$"Number of scans")` scans that were written to the file. Already typesetting the above lines uses `rawrr` functionality, since instrument model, file name, time range of data acquisition, and number of scans is extracted from the binary file header (Note: This manuscript was written in `R markdown` and combines `R` code with narration). The respective function is called `readFileHeader()` and returns a simple `R` object of type `list` (see Table 1). +The Orbitrap detector has been a tremendous success story in MS, since it offers HRAM data on a time scale that is compatible with chromatographic analysis (LC-MS)[@Makarov2000] and is therefore heavily used in bottom-up proteomics. However, analyzing Orbitrap data in `R` has so far only been possible after raw data conversion to exchange formats like mz(X)ML. Unfortunately, conversion is accompanied by a loss of Orbitrap-specific information. This use case shows how easy it is to work directly with raw-formated Orbitrap data after installing our `R` package `r BiocStyle::Biocpkg('rawrr')` which applies vendor APIs for data access. We use a complete LC-MS run recorded on a `r H$"Instrument model"` by parallel reaction monitoring (PRM)[@Gallien2012] for demonstration purposes (File name: `r H$"RAW file"`). The `r H$"Time range"[2]` min run resulted in `r format(H$"Number of scans")` scans that were written to the file. Already typesetting the above lines uses `r BiocStyle::Biocpkg('rawrr')` functionality, since instrument model, file name, time range of data acquisition, and number of scans is extracted from the binary file header (Note: This manuscript was written in `R markdown` and combines `R` code with narration). The respective function is called `readFileHeader()` and returns a simple `R` object of type `list` (see Table 1). |Function Name |Description |Return value | |:-------------------|----------------------------------------------------|:------------------------| @@ -104,7 +104,7 @@ The Orbitrap detector has been a tremendous success story in MS, since it offers |`readTrailer()` |Reads trailer values for each scan event |`vector` | |`readChromatogram()`|Reads chromatographic data from a raw file |`rawrrChromatogram(Set)` | -Table: lists `rawrr` package functions connected to reading functionality. More details can be found in the package documentation (see supporting information, S-20 onwards [@Kockmann2021]). +Table: lists `r BiocStyle::Biocpkg('rawrr')` package functions connected to reading functionality. More details can be found in the package documentation (see supporting information, S-20 onwards [@Kockmann2021]). Individual scans or scan collections (sets) can be read by the function `readSpectrum()` which returns a `rawrrSpectrum` object or `rawrrSpectrumSet`. Our package also provides generics for printing and plotting these objects. The following code chunk depicts how a set of scans is read from the raw file (scan numbers were selected based on a database search). The corresponding Figure 1 shows the resulting plot for scan `9594` (USI: [mzspec:MSV000086542:20181113_010_autoQC01:scan:9594:LGGNEQVTR/2](http://massive.ucsd.edu/ProteoSAFe/usi.jsp#{%22usi%22:%22mzspec:MSV000086542:20181113_010_autoQC01:scan:9594:LGGNEQVTR/2%22})) assigned to the doubly-charged iRT peptide LGGNEQVTR by MS-GF+ (Score: 144, SpecProb: 1.9e-12, DB E-Value: 4.4e-4, see [MassIVE RMSV000000336.1](https://massive.ucsd.edu/ProteoSAFe/dataset.jsp?task=575538e190e84cbfbf6c17aa1219e403#reanalyses_header) for details of the search): @@ -171,11 +171,11 @@ maxIonTime <- rawrr::makeAccessor(key = "Max. Ion Time (ms):", returnType = "dou maxIonTime(S[[1]]) ``` -More sophisticated analysis workflows applying `rawrr` functionalities have also been demonstrated recently. For example, `rawrr` was used to annotate and compare marker ions found in HCD MS2 spectra for ADP-ribosylated peptides at different collision energies [@Gehrig2020], as well as for the annotation of small molecule spectra after UVPD dissociation [@Panse2020]. Such information can be conveniently extracted, since the `rawrrSpectrum` object provides easy access to normalized and absolute HCD energies. +More sophisticated analysis workflows applying `r BiocStyle::Biocpkg('rawrr')` functionalities have also been demonstrated recently. For example, `r BiocStyle::Biocpkg('rawrr')` was used to annotate and compare marker ions found in HCD MS2 spectra for ADP-ribosylated peptides at different collision energies [@Gehrig2020], as well as for the annotation of small molecule spectra after UVPD dissociation [@Panse2020]. Such information can be conveniently extracted, since the `rawrrSpectrum` object provides easy access to normalized and absolute HCD energies. ## Use Case II - iRT Regression for System Suitability Monitoring -By applying linear regression, one can convert observed peptide retention times (RTs) into dimensionless scores termed iRT values and *vice versa* [@Escher2012]. This can be used for retention time calibration/prediction. In addition, fitted iRT regression models provide highly valuable information about LC-MS run performance. This example shows how easy it is to perform iRT regression in `R` by just using the raw measurement data, our package `rawrr`, and well known `base R` functions supporting linear modeling. To get a first impression of the data we calculate a total ion chromatogram (TIC) using the `readChromatogram()` function. Plotting the TIC shows chromatographic peaks between 15 and 28 min that could be of peptidic origin (see Figure 3). Of note, there is also a `type = "bpc"` option if you prefer a base peak chromatogram (BPC): +By applying linear regression, one can convert observed peptide retention times (RTs) into dimensionless scores termed iRT values and *vice versa* [@Escher2012]. This can be used for retention time calibration/prediction. In addition, fitted iRT regression models provide highly valuable information about LC-MS run performance. This example shows how easy it is to perform iRT regression in `R` by just using the raw measurement data, our package `r BiocStyle::Biocpkg('rawrr')`, and well known `base R` functions supporting linear modeling. To get a first impression of the data we calculate a total ion chromatogram (TIC) using the `readChromatogram()` function. Plotting the TIC shows chromatographic peaks between 15 and 28 min that could be of peptidic origin (see Figure 3). Of note, there is also a `type = "bpc"` option if you prefer a base peak chromatogram (BPC): ```{r TIC, fig.cap="Total ion chromatogram (TIC) calculated from all MS1-level scans contained in 20181113_010_autoQC01.raw."} message(rawfile) @@ -270,9 +270,9 @@ text(iRTscore, rt, iRTmz, pos=1,cex=0.5) ## Extension -An extended and dynamic version of the above use cases can be found at (https://fgcz-ms.uzh.ch/~cpanse/rawrr/test/functional_test.html). The web page displays spectra and iRT regression models obtained over a set of raw files recorded approximately every 12 hours on different Orbitrap mass spectrometers at the FGCZ (some systems have gone out of service in the meantime). The original purpose of these injections is automated longitudinal system suitability monitoring and quality control. We re-use the resulting raw files to showcase `rawrr`'s functionality across different Orbitrap instrument models/generations. In order to find the highest-scoring MS2 scan for iRT peptides we now use a simple scoring function implemented in `R` (it counts the number of matching y-ions), instead of running an external search engine. The web page automatically updates every 30 minutes using the most recent two files per system as input data. Be aware that the code is executed in a full parallel fashion (each core processes one raw file) on a Linux server with network-attached storage. +An extended and dynamic version of the above use cases can be found at (https://fgcz-ms.uzh.ch/~cpanse/rawrr/test/functional_test.html). The web page displays spectra and iRT regression models obtained over a set of raw files recorded approximately every 12 hours on different Orbitrap mass spectrometers at the FGCZ (some systems have gone out of service in the meantime). The original purpose of these injections is automated longitudinal system suitability monitoring and quality control. We re-use the resulting raw files to showcase `r BiocStyle::Biocpkg('rawrr')`'s functionality across different Orbitrap instrument models/generations. In order to find the highest-scoring MS2 scan for iRT peptides we now use a simple scoring function implemented in `R` (it counts the number of matching y-ions), instead of running an external search engine. The web page automatically updates every 30 minutes using the most recent two files per system as input data. Be aware that the code is executed in a full parallel fashion (each core processes one raw file) on a Linux server with network-attached storage. -The R script that renders the html page is also available as supplementary information. The correspoding [R markdown file](https://github.com/fgcz/rawrr/blob/master/vignettes/JPR_supplement.Rmd) is part of the `rawrr` package and can be processed locally after downloading a snapshot of the above described input data from [MSV000086542](https://massive.ucsd.edu/ProteoSAFe/dataset.jsp?accession=MSV000086542) [@MSV000086542]. In summary, this shows how scalable analysis pipelines can be constructed starting from basic building blocks. It demonstrates that `rawrr`'s data access mechanism works for all types of Orbitrap instrument models. +The R script that renders the html page is also available as supplementary information. The correspoding [R markdown file](https://github.com/fgcz/rawrr/blob/master/vignettes/JPR_supplement.Rmd) is part of the `r BiocStyle::Biocpkg('rawrr')` package and can be processed locally after downloading a snapshot of the above described input data from [MSV000086542](https://massive.ucsd.edu/ProteoSAFe/dataset.jsp?accession=MSV000086542) [@MSV000086542]. In summary, this shows how scalable analysis pipelines can be constructed starting from basic building blocks. It demonstrates that `r BiocStyle::Biocpkg('rawrr')`'s data access mechanism works for all types of Orbitrap instrument models. ## Benchmark @@ -285,19 +285,19 @@ seq(1, 4) |> boxplot(runTimeInSec ~ count, data = S, log ='y', - sub = paste0("Overall runtime took ", sum(runTimeInSec), "seconds."), + sub = paste0("Overall runtime took ", round(sum(S$runTimeInSec), 3), " seconds."), xlab = 'number of random generated scan ids') legend("topleft", Sys.info()['nodename'], cex = 1) ``` # Conclusions -Our R package `rawrr` provides direct access to spectral data stored in Thermo Fisher Scientific raw-formatted binary files, thereby eliminating the need for unfavorable conversion to exchange formats. Within the `R` environment, spectral data is presented by using only two non-standard objects representing data items well known to analytical scientists (mass spectrum and mass chromatogram). This design choice makes data handling relatively easy and intuitive and requires little knowledge about internal/technical details of the implementation. By using vendor API methods whenever possible, we nevertheless made sure that ease-of-use does not impair performance. We also emphasize that our implementation aligns well with common `R` conventions and styles. -Soon, we plan to align further efforts with the R for Mass Spectrometry initiative. We hope to extend `rawrr` towards the concept of exchangeable `r Biocpkg("Spectra")` backends, in particular, the `r Biocpkg("MsBackendRawFileReader")`, for data access and parallel computation. These would be necessary next steps towards big computational proteomics in `R`. +Our R package `r BiocStyle::Biocpkg('rawrr')` provides direct access to spectral data stored in Thermo Fisher Scientific raw-formatted binary files, thereby eliminating the need for unfavorable conversion to exchange formats. Within the `R` environment, spectral data is presented by using only two non-standard objects representing data items well known to analytical scientists (mass spectrum and mass chromatogram). This design choice makes data handling relatively easy and intuitive and requires little knowledge about internal/technical details of the implementation. By using vendor API methods whenever possible, we nevertheless made sure that ease-of-use does not impair performance. We also emphasize that our implementation aligns well with common `R` conventions and styles. +Soon, we plan to align further efforts with the R for Mass Spectrometry initiative. We hope to extend `r BiocStyle::Biocpkg('rawrr')` towards the concept of exchangeable `r Biocpkg("Spectra")` backends, in particular, the `r Biocpkg("MsBackendRawFileReader")`, for data access and parallel computation. These would be necessary next steps towards big computational proteomics in `R`. # Acknowledgements -We thank Lilly van de Venn for designing the `rawrr` package logo. We are grateful to Jim Shofstahl (Thermo Fisher Scientific) for providing the `RawFileReader` .NET assembly, `C#` example code, and for answering questions during the development process of `rawrr`. We are grateful to Antje Dittmann for carefully reading our manuscript and suggesting corrections and improvements. TK would like to thank Hadley Wickham for his inspiring books on advanced `R` and package development, especially for keeping those freely accessible. +We thank Lilly van de Venn for designing the `r BiocStyle::Biocpkg('rawrr')` package logo. We are grateful to Jim Shofstahl (Thermo Fisher Scientific) for providing the `RawFileReader` .NET assembly, `C#` example code, and for answering questions during the development process of `r BiocStyle::Biocpkg('rawrr')`. We are grateful to Antje Dittmann for carefully reading our manuscript and suggesting corrections and improvements. TK would like to thank Hadley Wickham for his inspiring books on advanced `R` and package development, especially for keeping those freely accessible. The package authors thank Hervé Pagès for very detailed and constructive feedback during the Bioconductor package review process. # Abbreviations From fe2c17e9ac5631d05021e2f46078d90c15cbc123 Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Sun, 3 Nov 2024 09:18:03 +0100 Subject: [PATCH 16/20] feat: build dotnet works --- R/dotNetAssembly.R | 204 ++++++++++++++++++++++++++------------------- 1 file changed, 118 insertions(+), 86 deletions(-) diff --git a/R/dotNetAssembly.R b/R/dotNetAssembly.R index 15610f2..f54cb4c 100644 --- a/R/dotNetAssembly.R +++ b/R/dotNetAssembly.R @@ -42,11 +42,11 @@ #' Derives the path where all .NET assemblies are stored. #' #' @return path -#' @export rawrrAssemblyPath #' @seealso \code{installRawFileReaderDLLs} and \code{installRawrrExe} #' #' @examples #' rawrrAssemblyPath() +#' @export rawrrAssemblyPath <- function(){ libdir <- tools::R_user_dir("rawrr", which='cache') d <- file.path(libdir, 'rawrrassembly') @@ -83,13 +83,16 @@ rawrrAssemblyPath <- function(){ .rawrrAssembly <- function(){ - if (Sys.info()['sysname'] == "Darwin"){ - file.path(rawrr::rawrrAssemblyPath(), 'osx-x64', 'rawrr') -> f - } else if (Sys.info()['sysname'] == "Linux"){ - file.path(rawrr::rawrrAssemblyPath(), 'linux-x64', 'rawrr') -> f - } else { - file.path(rawrr::rawrrAssemblyPath(), 'win-x64', 'rawrr.exe') -> f - } + libdir <- tools::R_user_dir("rawrr", which='cache') + d <- file.path(libdir, 'rawrrassembly') + + if (Sys.info()['sysname'] == "Darwin"){ + file.path(d, 'osx-x64', 'rawrr') -> f + } else if (Sys.info()['sysname'] == "Linux"){ + file.path(d, 'linux-x64', 'rawrr') -> f + } else { + file.path(d, 'win-x64', 'rawrr.exe') -> f + } return(f) } @@ -112,20 +115,6 @@ rawrrAssemblyPath <- function(){ #' @aliases ThermoFisher #' @aliases ThermoFisherScientific #' -#' @details -#' The console application assembly \code{rawrr.exe} requires: -#' \itemize{ -#' \item {\code{ThermoFisher.CommonCore.Data.dll}, } -#' \item{\code{ThermoFisher.CommonCore.MassPrecisionEstimator.dll}, and} -#' \item{ThermoFisher.CommonCore.RawFileReader.dll} -#' }. -#' -#' @references \itemize{ -#' \item{\url{https://www.mono-project.com/docs/advanced/assemblies-and-the-gac/}} -#' \item{\url{https://planetorbitrap.com/rawfilereader}} -#' \item{\doi{10.1021/acs.jproteome.0c00866}} -#' } -#' #' @author Christian Panse , 2021, 2024 #' #' @return An (invisible) vector of integer code, 0 for success and non-zero for @@ -157,7 +146,7 @@ rawrrAssemblyPath <- function(){ destfile <- file.path(rawfileReaderDLLsPath, nupkg) download.file(file.path(sourceUrl, nupkg), destfile = destfile, mode='wb') - }, 0) -> rv + }, FUN.VALUE = 0) -> rv rv } @@ -165,11 +154,49 @@ rawrrAssemblyPath <- function(){ #' dotnet nuget add source /Users/cp/Library/Caches/org.R-project.R/R/rawrr/rawrrassembly/ #' dotnet nuget remove source "Package source 1" #' dotnet nuget list source -#' dotnet add package ThermoFisher.CommonCore.MassPrecisionEstimator .addNupkgSource <- function(){ + system2('dotnet', args = c('nuget', 'add', 'source', rawrrAssemblyPath())) } -.addPackages <- function(){ +#' dotnet add package ThermoFisher.CommonCore.MassPrecisionEstimator +.addPackages <- function(dir){ + setwd(dir) + c('ThermoFisher.CommonCore.BackgroundSubtraction', + 'ThermoFisher.CommonCore.RandomAccessReaderPlugin', + 'ThermoFisher.CommonCore.Data', + 'ThermoFisher.CommonCore.RawfileReader', + 'ThermoFisher.CommonCore.MassPrecisionEstimator') |> + vapply(FUN = function(nupkg){ + system2('dotnet', args = c('add', 'package', nupkg)) + }, FUN.VALUE = 0) +} + +.build <- function(dir){ +################################################################################ + setwd(dir) + + tempOut <- tempfile(pattern = "rawrr.build.stdout.", tmpdir = dir, fileext = ".txt") + tempErr <- tempfile(pattern = "rawrr.build.stderr.", tmpdir = dir, fileext = ".txt") + + message("Running build ...") + message("Write stdout to", tempOut) + message("Write stderr to", tempErr) + + system2('dotnet', args = c('publish', '-c', 'Release', '-a', 'x64', '-p', + 'PublishReadyToRun=true', '-o', dirname(rawrr:::.rawrrAssembly())), + stdout = tempOut, + stderr = tempErr) -> rv + + + if (rv == 0){ + message("Build succesfully done.") + }else{ + message("Build error.") + if (interactive()){ + file.show(tempOut) + file.show(tempErr) + } + } } #' Download and install the \code{rawrr.exe} console application @@ -235,23 +262,14 @@ installRawrrExe <- } .buildOnLoad <- function(){ - return() - - ## TODO: - # nothing to do - if (file.exists(.rawrrAssembly())){ + if (file.exists(rawrr:::.rawrrAssembly())){ return() } - # check Thermo DLLs - if(isFALSE(.checkRawFileReaderDLLs(message))){ - return() - } - - if (Sys.which("msbuild") == "" && Sys.which("xbuild") == "") + if (Sys.which("dotnet") == "") { - msg <- c("Could not find 'msbuild' or 'xbuild' in the path. Therefore, ", + msg <- c("Could not find 'dotnet' in the path. Therefore, ", "it is not possible to build the 'rawrr.exe' assembly from", " source code.\nTry to run rawrr::installRawrrExe().") message(msg) @@ -262,12 +280,12 @@ installRawrrExe <- } -## TODO: make it work for dotnet #' Build \code{rawrr.exe} console application. #' #' @description builds \code{rawrr.exe} file from C# source code requiring -#' xbuild or msbuild tools. The console application \code{rawrr.exe} -#' is used by the package's reader functions through a \link{system2} call. +#' .NET SDK. The console application \code{rawrr.exe} +#' is used by the package's reader functions through a \link{system2} call +#' or a \link{textConnection}. #' #' @details The rawrr package implementation consists of two language layers, #' the top R layer and the hidden C# layer. Specifically, R functions requesting @@ -276,85 +294,99 @@ installRawrrExe <- #' execution of methods defined in the RawFileReader dynamic link library #' provided by Thermo Fisher Scientific. Our precompiled wrapper methods are #' bundled in the \code{rawrr.exe} executable file (.NET assembly) and shipped -#' with the released R package. Running \code{rawrr.exe} requires the -#' \url{https://www.mono-project.com/} environment on non-Microsoft -#' operating systems. Mono is a cross platform, open source .NET framework. -#' On Microsoft Windows the Microsoft .NET framework is typically already -#' installed and sufficient. Our package also contains the C# source code -#' \code{rawrr.cs}. +#' with the released R package. +#' Our package also contains the C# source code \code{rawrr.cs}. #' In order to return extracted data back to the R layer we use file I/O. #' More specifically, the extracted information is written to a temporary -#' location on the harddrive, read back into memory and parsed into R objects. +#' location on the harddrive, read back into memory and parsed into R objects. #' -#' @author Tobias Kockmann, Christian Panse , 2021 +#' @author Tobias Kockmann, Christian Panse , 2021, 2024 #' -#' @seealso \link{installRawrrExe} and \link{installRawFileReaderDLLs} +#' @seealso \link{installRawrrExe} #' #' @references \itemize{ -#' \item{\url{https://www.mono-project.com/docs/advanced/assemblies-and-the-gac/}} -#' \item{\url{https://planetorbitrap.com/rawfilereader}} +#' \item{\url{https://www.mono-project.com/docs/advanced/assemblies-and-the-gac/}, 2020} +#' \item{\url{https://planetorbitrap.com/rawfilereader}, 2020} +#' \item{\url{https://github.com/thermofisherlsms/RawFileReader/}, 2024} #' \item{\url{https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/compiler-options/advanced}} #' \item{\doi{10.1021/acs.jproteome.0c00866}} #' } #' #' @return the return value of the system2 command. -#' @export buildRawrrExe +#' @export buildRawrrExe <- function(){ packagedir <- system.file(package = 'rawrr') + buildDir <- tempdir() - if (isFALSE(dir.exists(rawrrAssemblyPath()))){ - dir.create(rawrrAssemblyPath(), recursive = TRUE) - } - - if (isFALSE(.checkRawFileReaderDLLs())){ - return() + + if (isFALSE(dir.exists( dirname(rawrr:::.rawrrAssembly()) ))){ + dir.create(dirname(rawrr:::.rawrrAssembly()), recursive = TRUE) } - - - if (Sys.which("msbuild") == "" && Sys.which("xbuild") == "") + if (Sys.which("dotnet") == "") { - msg <- c("Could not find 'msbuild' or 'xbuild' in the path. Therefore, ", + msg <- c("Could not find 'dotnet' in the path. Therefore, ", "it is not possible to build the 'rawrr.exe' assembly from", " source code.\nTry to run rawrr::installRawrrExe().") stop(msg) } + + ## TODO: copy files to tempdir + c("rawrrassembly/rawrr.cs", "rawrrassembly/rawrr.csproj") |> + lapply(function(f){ + src <- file.path(packagedir, f) + dst <- buildDir # dirname(rawrr:::.rawrrAssembly()) + message("Coping ", basename(src), " to ", dst) + stopifnot(file.copy(src, dst, overwrite = TRUE)) + }) + + ## TODO: check if already exists + # .downloadNupkgs() + # .addNupkgSource() + + .addPackages(dir = buildDir) + .build(dir = buildDir) + + # TODO: check if rawrr is working + # copy rawrr to rawrr:::.rawrrAssembly() dir + + return() cwd <- getwd() setwd(file.path(packagedir, 'rawrrassembly')) - cmd <- ifelse(Sys.which("msbuild") != "", "msbuild", "xbuild") + #cmd <- ifelse(Sys.which("msbuild") != "", "msbuild", "xbuild") # https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/compiler-options/advanced#additionallibpaths - additionalLibPath <- .determineAdditionalLibPath() + #additionalLibPath <- .determineAdditionalLibPath() buildLog <- tempfile("rawrr_build.log.", tmpdir = rawrrAssemblyPath()) - cmdArgs <- sprintf("/p:OutputPath=%s/ /p:AdditionalLibPaths=%s /v:diagnostic /flp:LogFile=%s rawrr.csproj", - shQuote(rawrrAssemblyPath()), - shQuote(additionalLibPath), - shQuote(buildLog)) + #cmdArgs <- sprintf("/p:OutputPath=%s/ /p:AdditionalLibPaths=%s /v:diagnostic /flp:LogFile=%s rawrr.csproj", + # shQuote(rawrrAssemblyPath()), + # shQuote(additionalLibPath), + # shQuote(buildLog)) - message("Attempting to build 'rawrr.exe', one time setup ...") - rv <- system2 (cmd, cmdArgs, wait=TRUE, stderr=TRUE, stdout=TRUE) + #message("Attempting to build 'rawrr.exe', one time setup ...") + #rv <- system2 (cmd, cmdArgs, wait=TRUE, stderr=TRUE, stdout=TRUE) - if (rv <- any(grepl("Build succeeded.", rv)) - && file.exists(.rawrrAssembly())){ - msg <- sprintf("'rawrr.exe' successfully built in \n'%s'. -The build report should have been saved in\n'%s'.", .rawrrAssembly(), buildLog) - message(msg) - }else{ - err <- sprintf("Building 'rawrr.exe' failed. For details see the build report, supposed to be saved in: -'%s' -Call 'rawrr::installRawrrExe()' to download and install a precompiled version -from a remote location. Note this requires internet connection.", - buildLog) - setwd(cwd) - stop(err) - } - setwd(cwd) - rv + #if (rv <- any(grepl("Build succeeded.", rv)) + # && file.exists(.rawrrAssembly())){ + # msg <- sprintf("'rawrr.exe' successfully built in \n'%s'. + # The build report should have been saved in\n'%s'.", .rawrrAssembly(), buildLog) + # message(msg) + #}else{ + # err <- sprintf("Building 'rawrr.exe' failed. For details see the build report, supposed to be saved in: + # '%s' + # Call 'rawrr::installRawrrExe()' to download and install a precompiled version + # from a remote location. Note this requires internet connection.", + # buildLog) + # setwd(cwd) + # stop(err) + #} + #setwd(cwd) + #rv } .eulaPath <- function(){ From f7c107fc2a82aa4df9e8b79577c3b4eeebf21560 Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Sun, 3 Nov 2024 10:52:31 +0100 Subject: [PATCH 17/20] feat: build rawrr process working --- INSTALL | 5 + NAMESPACE | 2 +- R/dotNetAssembly.R | 199 +++++++++++++----------------- R/zzz.R | 24 ++-- inst/templates/autoQC01.Rmd | 239 ------------------------------------ man/buildRawrrExe.Rd | 26 ++-- man/dot-addNupkgSource.Rd | 6 +- man/dot-benchmark.Rd | 6 +- man/dot-downloadNupkgs.Rd | 15 --- man/installRawrrExe.Rd | 6 +- man/rawrrAssemblyPath.Rd | 2 +- 11 files changed, 121 insertions(+), 409 deletions(-) delete mode 100644 inst/templates/autoQC01.Rmd diff --git a/INSTALL b/INSTALL index e10ee0e..7edaf8a 100644 --- a/INSTALL +++ b/INSTALL @@ -52,6 +52,11 @@ jim.Shofstahl@thermofisher.com dotnet publish rawrr-dotnet.csproj --os osx -a x64 --output /Users/cp/Library/Caches/org.R-project.R/R/rawrr/rawrrassembly/osx-x64 dotnet publish rawrr-dotnet.csproj --os win -a x64 --output /Users/cp/Library/Caches/org.R-project.R/R/rawrr/rawrrassembly/win-x64 dotnet publish rawrr-dotnet.csproj --os linux -a x64 --output /Users/cp/Library/Caches/org.R-project.R/R/rawrr/rawrrassembly/linux-x64 + + +## generates a ~110MB BLOB +dotnet publish -c Release -r linux-x64 -p PublishReadyToRun=true +dotnet publish -c Release -r win-x64 -p PublishReadyToRun=true ``` diff --git a/NAMESPACE b/NAMESPACE index 20df44c..8792ce4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,7 +6,6 @@ S3method(plot,rawrrSpectrum) S3method(print,rawrrSpectrum) S3method(summary,rawrrChromatogram) S3method(summary,rawrrSpectrum) -export(.thermofisherlsmsUrl) export(basePeak) export(buildRawrrExe) export(dependentScan) @@ -37,6 +36,7 @@ importFrom(graphics,text) importFrom(stats,lm) importFrom(stats,na.omit) importFrom(stats,predict) +importFrom(tools,R_user_dir) importFrom(utils,download.file) importFrom(utils,head) importFrom(utils,packageVersion) diff --git a/R/dotNetAssembly.R b/R/dotNetAssembly.R index f54cb4c..1de0d34 100644 --- a/R/dotNetAssembly.R +++ b/R/dotNetAssembly.R @@ -3,27 +3,26 @@ # Test if \code{rawrr.exe} .NET assembly is working .isAssemblyWorking <- function(FUN = stop, exe = .rawrrAssembly()){ - + if (isFALSE(file.exists(exe))){ msg <- c("'rawrr.exe' not found.\n", "Run 'rawrr::installRawrrExe()'.", " For more information, type '?rawrr.exe'.") FUN(msg) } - - + # execute rawrr.exe assembly and keep output string rvs <- "?" if (file.exists(exe)){ rvs <- system2(exe, stdout = TRUE) } - + # expect that output string if (rvs != "No RAW file specified!"){ msg <- ("The 'rawrr.exe' dot Net assembly is not working!") FUN(msg) } - + if(interactive()){ stopifnot(.isRawFileReaderLicenseAccepted()) } TRUE } @@ -42,22 +41,13 @@ #' Derives the path where all .NET assemblies are stored. #' #' @return path -#' @seealso \code{installRawFileReaderDLLs} and \code{installRawrrExe} +#' @seealso \code{installRawrrExe} and \code{buildRawrrExe} #' #' @examples #' rawrrAssemblyPath() #' @export rawrrAssemblyPath <- function(){ - libdir <- tools::R_user_dir("rawrr", which='cache') - d <- file.path(libdir, 'rawrrassembly') - - if (interactive()){ - if (isFALSE(dir.exists(d))){ - #msg <- sprintf("rawrr .NET assemply path '%s' is not existing!", d) - #warning(msg) - } - } - return(d) + dirname(.rawrrAssembly()) } @@ -71,7 +61,7 @@ rawrrAssemblyPath <- function(){ } return(dllExists) }, FALSE) - + if (isFALSE(all(rv)) && TRUE){ FUN("'ThermoFisher.CommonCore.*.dll' files are not available on the system.\n", "Run 'rawrr::installRawFileReaderDLLs()' or setenv MONO_PATH to ", @@ -82,6 +72,7 @@ rawrrAssemblyPath <- function(){ } +#' @importFrom tools R_user_dir .rawrrAssembly <- function(){ libdir <- tools::R_user_dir("rawrr", which='cache') d <- file.path(libdir, 'rawrrassembly') @@ -107,39 +98,39 @@ rawrrAssemblyPath <- function(){ #' Download and install the Thermo Fisher Scientific .NET 8.0 nupkgs -#' +#' #' @param sourceUrl url of nupkgs. #' @param force if \code{TRUE} it will overwrite the pkgs. #' #' @aliases Thermo #' @aliases ThermoFisher #' @aliases ThermoFisherScientific -#' +#' #' @author Christian Panse , 2021, 2024 -#' +#' #' @return An (invisible) vector of integer code, 0 for success and non-zero for #' failure. For the "wget" and "curl" methods this is the status code returned #' by the external program. #' #' @importFrom utils download.file .downloadNupkgs <- function(sourceUrl = .thermofisherlsmsUrl(), force = TRUE){ - + rawfileReaderDLLsPath <- rawrrAssemblyPath() if (isFALSE(dir.exists(rawfileReaderDLLsPath))){ dir.create(rawfileReaderDLLsPath, recursive = TRUE) } - + if (isTRUE(dir.exists(rawfileReaderDLLsPath))){ msg <- sprintf("removing nupkgs files in directory '%s'", rawfileReaderDLLsPath) message(msg) - + file.remove(file.path(rawrrAssemblyPath(), list.files(rawrrAssemblyPath(), pattern="\\.nupkg$"))) } - + c('ThermoFisher.CommonCore.BackgroundSubtraction.8.0.6.nupkg', - 'ThermoFisher.CommonCore.RandomAccessReaderPlugin.8.0.6.nupkg', + 'ThermoFisher.CommonCore.RandomAccessReaderPlugin.8.0.6.nupkg', 'ThermoFisher.CommonCore.Data.8.0.6.nupkg', 'ThermoFisher.CommonCore.RawfileReader.8.0.6.nupkg', 'ThermoFisher.CommonCore.MassPrecisionEstimator.8.0.6.nupkg') |> vapply(FUN = function(nupkg){ @@ -153,24 +144,54 @@ rawrrAssemblyPath <- function(){ #' dotnet nuget add source /Users/cp/Library/Caches/org.R-project.R/R/rawrr/rawrrassembly/ #' dotnet nuget remove source "Package source 1" -#' dotnet nuget list source +#' dotnet nuget list source .addNupkgSource <- function(){ system2('dotnet', args = c('nuget', 'add', 'source', rawrrAssemblyPath())) } +.copySourceCode <- function(dir) { + dst <- dir + sourceCodeFiles <- c("rawrrassembly/rawrr.cs", "rawrrassembly/rawrr.csproj") + packagedir <- system.file(package = 'rawrr') + + + sourceCodeFiles |> + lapply(function(f){ + src <- file.path(packagedir, f) + message("Copying ", basename(src), " to ", dst) + stopifnot(file.copy(src, dst, overwrite = TRUE)) + }) +} + #' dotnet add package ThermoFisher.CommonCore.MassPrecisionEstimator -.addPackages <- function(dir){ +.addPackages <- function(dir, version = "8.0.6"){ + tempOut <- tempfile(pattern = "rawrr.add.packages.stdout.", tmpdir = dir, fileext = ".txt") + tempErr <- tempfile(pattern = "rawrr.add.packages.stderr.", tmpdir = dir, fileext = ".txt") setwd(dir) c('ThermoFisher.CommonCore.BackgroundSubtraction', - 'ThermoFisher.CommonCore.RandomAccessReaderPlugin', + 'ThermoFisher.CommonCore.RandomAccessReaderPlugin', 'ThermoFisher.CommonCore.Data', 'ThermoFisher.CommonCore.RawfileReader', 'ThermoFisher.CommonCore.MassPrecisionEstimator') |> vapply(FUN = function(nupkg){ - system2('dotnet', args = c('add', 'package', nupkg)) + system2('dotnet', args = c('add', 'package', nupkg, '-v', version), + stdout = tempOut, + stderr = tempErr) -> rv + if (interactive()){ + if (rv != 0){ + file.show(tempOut) + file.show(tempErr) + } + } + rv }, FUN.VALUE = 0) } +.clean <- function(){ + message("Removing ", rawrr:::.rawrrAssembly()) + file.remove(rawrr:::.rawrrAssembly()) +} + .build <- function(dir){ ################################################################################ setwd(dir) @@ -187,7 +208,7 @@ rawrrAssemblyPath <- function(){ stdout = tempOut, stderr = tempErr) -> rv - + if (rv == 0){ message("Build succesfully done.") }else{ @@ -199,14 +220,14 @@ rawrrAssemblyPath <- function(){ } } -#' Download and install the \code{rawrr.exe} console application -#' -#' @description downloads and installs the \code{rawrr.exe} .NET assembly in +#' Download \code{rawrr} assembly +#' +#' @description downloads and installs the \code{rawrr.exe} .NET assembly in #' the directory provided by \code{rawrrAssemblyPath()}. -#' -#' @details The console application \code{rawrr.exe} is used by the package's +#' +#' @details The console application \code{rawrr} is used by the package's #' reader functions through a \link{system2} call. -#' +#' #' @param sourceUrl url of \code{rawrr.exe} assembly. #' @param force if \code{TRUE} it will overwrite the assembly #' @param ... other parameter for \code{download.file}. @@ -228,7 +249,7 @@ installRawrrExe <- if (interactive()){ response <- readline(prompt = sprintf("Assembly exists. Do you want to overwrite it? [Y/n]: ")) if (tolower(response) == "y"){ - + }else{ return() } @@ -252,7 +273,7 @@ installRawrrExe <- dir.create(dirname(rawrrAssembly), recursive = TRUE, showWarnings = FALSE) - rv = download.file(sourceUrl, destfile = rawrrAssembly, mode = "wb", + rv = download.file(sourceUrl, destfile = rawrrAssembly, mode = "wb", ...) Sys.chmod(rawrrAssembly, mode = "0777", use_umask = TRUE) @@ -263,16 +284,16 @@ installRawrrExe <- .buildOnLoad <- function(){ # nothing to do - if (file.exists(rawrr:::.rawrrAssembly())){ + if (file.exists(.rawrrAssembly())){ return() } - + if (Sys.which("dotnet") == "") { msg <- c("Could not find 'dotnet' in the path. Therefore, ", "it is not possible to build the 'rawrr.exe' assembly from", " source code.\nTry to run rawrr::installRawrrExe().") - message(msg) + warning(msg) return() } @@ -281,12 +302,12 @@ installRawrrExe <- #' Build \code{rawrr.exe} console application. -#' -#' @description builds \code{rawrr.exe} file from C# source code requiring +#' +#' @description builds \code{rawrr.exe} file from C# source code requiring #' .NET SDK. The console application \code{rawrr.exe} #' is used by the package's reader functions through a \link{system2} call #' or a \link{textConnection}. -#' +#' #' @details The rawrr package implementation consists of two language layers, #' the top R layer and the hidden C# layer. Specifically, R functions requesting #' access to data stored in binary raw files invoke compiled C# wrapper methods @@ -299,30 +320,25 @@ installRawrrExe <- #' In order to return extracted data back to the R layer we use file I/O. #' More specifically, the extracted information is written to a temporary #' location on the harddrive, read back into memory and parsed into R objects. -#' +#' #' @author Tobias Kockmann, Christian Panse , 2021, 2024 -#' -#' @seealso \link{installRawrrExe} -#' +#' +#' @seealso \link{installRawrrExe} +#' #' @references \itemize{ #' \item{\url{https://www.mono-project.com/docs/advanced/assemblies-and-the-gac/}, 2020} -#' \item{\url{https://planetorbitrap.com/rawfilereader}, 2020} +#' \item{\url{https://planetorbitrap.com/rawfilereader}, 2020} #' \item{\url{https://github.com/thermofisherlsms/RawFileReader/}, 2024} #' \item{\url{https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/compiler-options/advanced}} #' \item{\doi{10.1021/acs.jproteome.0c00866}} #' } -#' +#' #' @return the return value of the system2 command. -#' @export +#' @export buildRawrrExe <- function(){ packagedir <- system.file(package = 'rawrr') buildDir <- tempdir() - - - if (isFALSE(dir.exists( dirname(rawrr:::.rawrrAssembly()) ))){ - dir.create(dirname(rawrr:::.rawrrAssembly()), recursive = TRUE) - } - + if (Sys.which("dotnet") == "") { msg <- c("Could not find 'dotnet' in the path. Therefore, ", @@ -331,62 +347,21 @@ buildRawrrExe <- function(){ stop(msg) } - ## TODO: copy files to tempdir - c("rawrrassembly/rawrr.cs", "rawrrassembly/rawrr.csproj") |> - lapply(function(f){ - src <- file.path(packagedir, f) - dst <- buildDir # dirname(rawrr:::.rawrrAssembly()) - message("Coping ", basename(src), " to ", dst) - stopifnot(file.copy(src, dst, overwrite = TRUE)) - }) + message("Building rawrr assembly using .NET 8.0 ...") + + if (isFALSE(dir.exists( rawrrAssemblyPath() ))){ + dir.create(rawrrAssemblyPath(), recursive = TRUE) + } + + .copySourceCode(dir = buildDir) ## TODO: check if already exists # .downloadNupkgs() # .addNupkgSource() - + .addPackages(dir = buildDir) .build(dir = buildDir) - - # TODO: check if rawrr is working - # copy rawrr to rawrr:::.rawrrAssembly() dir - - return() - - cwd <- getwd() - setwd(file.path(packagedir, 'rawrrassembly')) - - #cmd <- ifelse(Sys.which("msbuild") != "", "msbuild", "xbuild") - - # https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/compiler-options/advanced#additionallibpaths - #additionalLibPath <- .determineAdditionalLibPath() - - buildLog <- tempfile("rawrr_build.log.", - tmpdir = rawrrAssemblyPath()) - - #cmdArgs <- sprintf("/p:OutputPath=%s/ /p:AdditionalLibPaths=%s /v:diagnostic /flp:LogFile=%s rawrr.csproj", - # shQuote(rawrrAssemblyPath()), - # shQuote(additionalLibPath), - # shQuote(buildLog)) - - #message("Attempting to build 'rawrr.exe', one time setup ...") - #rv <- system2 (cmd, cmdArgs, wait=TRUE, stderr=TRUE, stdout=TRUE) - - #if (rv <- any(grepl("Build succeeded.", rv)) - # && file.exists(.rawrrAssembly())){ - # msg <- sprintf("'rawrr.exe' successfully built in \n'%s'. - # The build report should have been saved in\n'%s'.", .rawrrAssembly(), buildLog) - # message(msg) - #}else{ - # err <- sprintf("Building 'rawrr.exe' failed. For details see the build report, supposed to be saved in: - # '%s' - # Call 'rawrr::installRawrrExe()' to download and install a precompiled version - # from a remote location. Note this requires internet connection.", - # buildLog) - # setwd(cwd) - # stop(err) - #} - #setwd(cwd) - #rv + .isAssemblyWorking() } .eulaPath <- function(){ @@ -397,12 +372,12 @@ buildRawrrExe <- function(){ licenseFile <- file.path(system.file(package = 'rawrr'), 'rawrrassembly', 'RawFileReaderLicense.txt') stopifnot(file.exists(licenseFile)) - + eulaFile <- .eulaPath() - + msg <- c("# By changing the setting below to TRUE you are accepting ", "the Thermo License agreement.") - + if (!file.exists(eulaFile)){ file.show(licenseFile) fmt <- "Do you accept the Thermo License agreement '%s'? [Y/n]: " @@ -416,13 +391,13 @@ buildRawrrExe <- function(){ writeLines(paste(msg, paste0("# ", date()), "eula=true", sep="\n"), fileConn) close(fileConn) - + return(TRUE %in% grepl("eula=true", tolower(readLines(eulaFile)))) } }else{ return(TRUE %in% grepl("eula=true", tolower(readLines(eulaFile)))) } - + msg <- ("You have to accept the Thermo Fisher Scientific License agreement!") stop(msg) } diff --git a/R/zzz.R b/R/zzz.R index a10cea7..6952d46 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,28 +1,18 @@ #R + +.onLoad <- function(lib, pkg){ + if(interactive()){ + .buildOnLoad() + } +} #' @importFrom utils packageVersion .onAttach <- function(lib, pkg){ if(interactive()){ - packagedir <- system.file(package = 'rawrr') + ## packagedir <- system.file(package = 'rawrr') version <- packageVersion('rawrr') thermocopyright <- "RawFileReader reading tool. Copyright \u00A9 2016 by Thermo Fisher Scientific, Inc. All rights reserved." packageStartupMessage("Package 'rawrr' version ", version, " using\n", thermocopyright) invisible() } } - -.onLoad <- function(lib, pkg){ - if (Sys.info()['sysname'] %in% c("Darwin", "Linux")){ - mp <- Sys.which('mono') - if (!nzchar(mp)){ - msg <- c("The cross platform, open source .NET framework (mono) is not available.\n", - "Consider to install 'apt-get install mono-runtime libmono-system-data4.0-cil -y' on Linux\n", - "or download/install from https://www.mono-project.com/.") - stop(msg) - } - } - - if(interactive()){ - .buildOnLoad() - } -} diff --git a/inst/templates/autoQC01.Rmd b/inst/templates/autoQC01.Rmd deleted file mode 100644 index 0438048..0000000 --- a/inst/templates/autoQC01.Rmd +++ /dev/null @@ -1,239 +0,0 @@ ---- -title: "autoQC01 STATISTICS" -author: CP/TK -output: - html_document: - toc: true - toc_float: true - number_sections: true - theme: united -params: - input: "/scratch/cpanse/autoQC01.csv" - r.squared.cutoff.yellow: 0.98 - r.squared.cutoff.red: 0.9 - r.squared.cutoff.yellow.rgb: "lightgray" - r.squared.cutoff.red.rgb: "#D7261E" ---- - - - -# Configuiration parameters - -| | | -|-------------------------------|---------------------------------------| -| file report release date: | `r date()` | -| autoQC01 lm cache file: | `r params[['input']]` | -| nodename: | `r Sys.info()['nodename']` | -| R version: | `r R.version.string` | -| rawrr version: | `r packageVersion('rawrr')` | -| rmarkdown version: | `r packageVersion('rmarkdown')` | -| r.squared.cutoff.red: | `r params[['r.squared.cutoff.red']]` | - -```{r defineHelperFunctions, message=TRUE, echo=FALSE} -stopifnot( -require(knitr), -require(kableExtra), -require(lattice), -require(protViz), -require(colorspace) -) - -.getIRTs <- function(){ - iRTpeptide <- c("LGGNEQVTR", "YILAGVENSK", "GTFIIDPGGVIR", "GTFIIDPAAVIR", - "GAGSSEPVTGLDAK", "TPVISGGPYEYR", "VEATFGVDESNAK", "TPVITGAPYEYR", - "DGLDAASYYAPVR", "ADVTPADFSEWSK", "LFLQFGAQGSPFLK") - - df <- protViz::iRTpeptides[protViz::iRTpeptides$peptide %in% iRTpeptide,] - df$ssrc <- protViz::ssrc(as.character(df$peptide)) - df$mass <- protViz::parentIonMass(as.character(df$peptide)) - df$mass2Hplus <- (df$mass + 1.008) / 2 - df -} - -.flm_autoQC01 <- - function(filename, peptides = .getIRTs(), tol=10, r.squared.cutoff=0.98, - fileprefix='/srv/www/htdocs'){ - - rawfile <- file.path(fileprefix, filename) - if(!file.exists(rawfile)){ - warning(paste0("file ", rawfile, "does not exist.")) - return() - } - start <- as.numeric(Sys.time()) * 1000 - - result <- tryCatch({ - message(paste0("fetching XICs for rawfile", rawfile, " ...")) - XIC <- rawrr::readChromatogram(rawfile, peptides$mass2Hplus, tol = tol) - t <- sapply(XIC, function(x){if(length(x$times) > 0){ - x$times[x$intensities == max(x$intensities)][1]}else{NA}}) - n <- length(t) - intensity.max <- max(sapply(XIC, function(x){max(x$intensities)})) - - xx <- data.frame(rt = t, irtscore = peptides$rt) - fm <- lm(rt ~ irtscore, data = xx, na.action=na.exclude) - list(fm = fm, - xx = xx, - intensity.max = intensity.max, - n=n, - filename = filename, - runtime = (as.numeric(Sys.time()) * 1000 - start) / 1000) - }, - error = function(err) { NULL }) - } - -rv <- lapply("32b749642472_4590.raw", .flm_autoQC01, - fileprefix = "/Users/cp/Library/Caches/org.R-project.R/R/ExperimentHub/") |> - lapply(FUN=function(x){ - data.frame(r.squared = summary(x$fm)$r.squared, - slope = x$fm$coefficients[2], - intercept = x$fm$coefficients[1], - n = x$n, - intensity.max = x$intensity.max, - filename = as.character(x$filename), - runtime = x$runtime, - row.names = NULL) - }) |> - Reduce(f=rbind) - -.assignInstrument <- function(x){ - stopifnot(is.data.frame(x)) - - x$instrument <- NA - for (p in c("FUSION_1", "FUSION_2", "G2HD_1", "LC1100", - "LTQ_1", "LTQFT_1", "ORBI_1", "ORBI_2", "PROTEONXPR36", - "QEXACTIVE_1", "QEXACTIVE_2", "QEXACTIVE_3", "QEXACTIVEHF_1", - "QEXACTIVEHF_2", "QEXACTIVEHF_4", "QEXACTIVEHFX_1", "QTRAP_1", "T100_1", - "TOFTOF_2", "TRIPLETOF_1", "TSQ_1", "TSQ_2", "VELOS_1", - "VELOS_2", "LUMOS_1", "LUMOS_2", "EXPLORIS_1")){ - x$instrument[grep(p, x$filename)] <- p - } - x -} -``` - -```{r colormaps, echo=FALSE, message=FALSE} -cv <- 1-2:7/10 -t <- trellis.par.get("strip.background") -t$col <- (rgb(cv,cv,cv)) -trellis.par.set("strip.background",t) - -tp <- trellis.par.get("par.sub.text") -tp$cex <- 0.5 -trellis.par.set("par.sub.text", tp) -``` - - -```{r readCache, echo=FALSE} -if(file.exists(params[['input']])){ - autoQC01 <- read.table(params[['input']], header=TRUE, sep=';') - autoQC01 <- assignInstrument(autoQC01) - autoQC01$POSIXct <- as.POSIXct(autoQC01$time, origin="1970-01-01") -} -``` - - -```{r zscores.slope, echo=FALSE} -if(nrow(autoQC01) > 1){ - qc01.mean <- aggregate(slope ~ instrument, data=autoQC01, FUN=mean, subset=autoQC01$r.squared > params$r.squared.cutoff.red) - qc01.sd <- aggregate(slope ~ instrument, data=autoQC01, FUN=sd, subset=autoQC01$r.squared > params$r.squared.cutoff.red) - names(qc01.mean) <- c('instrument','instrument.slope.mean') - names(qc01.sd) <- c('instrument','instrument.slope.sd') - qc01.mean.sd <- merge( qc01.mean, qc01.sd) - autoQC01 <- merge(autoQC01, qc01.mean.sd, by='instrument') - autoQC01$slope.zscore <- (autoQC01$slope - autoQC01$instrument.slope.mean) / autoQC01$instrument.slope.sd -} -``` - -```{r zscores.intercept, echo=FALSE} -if(nrow(autoQC01) > 1){ - qc01.mean <- aggregate(intercept ~ instrument, data=autoQC01, FUN=mean, subset=autoQC01$r.squared > 0.9) - qc01.sd <- aggregate(intercept ~ instrument, data=autoQC01, FUN=sd, subset=autoQC01$r.squared > 0.9) - names(qc01.mean) <- c('instrument','instrument.intercept.mean') - names(qc01.sd) <- c('instrument','instrument.intercept.sd') - qc01.mean.sd <- merge( qc01.mean, qc01.sd) - - autoQC01 <- merge(autoQC01, qc01.mean.sd, by='instrument') - autoQC01$intercept.zscore <- (autoQC01$intercept - autoQC01$instrument.intercept.mean) / autoQC01$instrument.intercept.sd -} -``` - -# Last 24 hours - -```{r echo=FALSE, message=FALSE} -autoQC01.24h <- autoQC01[rev(which(Sys.time() - autoQC01$time < 3600 * 24 * 2)), ] - -idx <- rev(order(autoQC01.24h$time)) - -autoQC01.24h <- autoQC01.24h[idx, c('POSIXct', 'filename', 'slope', - 'slope.zscore', 'intercept', 'intercept.zscore', 'r.squared')] - -QCred <- which(autoQC01.24h$r.squared < params$r.squared.cutoff.red) -QCyellow <- which(autoQC01.24h$r.squared < params$r.squared.cutoff.yellow) - -kable(autoQC01.24h, row.names=FALSE) |> - kable_styling("striped", full_width = FALSE) |> - row_spec(QCyellow, bold = TRUE, color = "black", background = params$r.squared.cutoff.yellow.rgb) |> - row_spec(QCred, bold = TRUE, color = "white", background = params$r.squared.cutoff.red.rgb) -``` - -# Fitted models - -```{r flm.autoQC01, fig.retina=3, message=FALSE, echo=FALSE, fig.width=10, fig.height=2.25} -rv <- parallel::mclapply(autoQC01.24h$filename, FUN=.flm_autoQC01, mc.cores=16) - -rv <- lapply(rv, function(x, r.squared.cutoff=0.98){ - message(paste("plotting", x$filename, "...")) - - if (summary(x$fm)$r.squared < params$r.squared.cutoff.red){ - op <- par(mfrow = c(1,5), mar=c(5,5,5,1),bg=params$r.squared.cutoff.red.rgb, col='white') - }else if (summary(x$fm)$r.squared < params$r.squared.cutoff.yellow){ - op <- par(mfrow = c(1,5), mar=c(5,5,5,1), bg=params$r.squared.cutoff.yellow.rgb, col='black') - }else{ - op <- par(mfrow = c(1,5), mar=c(5,5,5,1)) - } - - plot(x$xx$rt ~ x$xx$irtscore, asp = 1, main = "rt ~ irtscore", type='n') - legend("topleft", strsplit(as.character(x$filename), split = '/', perl = TRUE)[[1]][c(4,3,1)],box.col = 'transparent',cex = 0.75) - legend.text<- paste(c('slope','intercept','r.squared'), as.numeric(round(c(coef(x$fm)["irtscore"], coef(x$fm)[1], summary(x$fm)$r.squared),3)),sep=": ") - - legend("bottomright", legend.text, cex=0.75, box.col = 'transparent') - points(x$xx$irtscore, x$xx$rt, pch=16) - abline(x$fm) - - plot(x$fm) - par(op) - }) -``` - -
- -# Study slope and intercept - -```{r fig.retina=3, fig.height=7} -tp <- trellis.par.get("superpose.symbol") -tp$col <- colorspace::rainbow_hcl(8, alpha = 0.152) -trellis.par.set("superpose.symbol", tp) - -xyplot(intercept ~ slope | instrument, - group=instrument, - data=autoQC01, - subset=r.squared > params$r.squared.cutoff.yellow, - pch=16 - ) -``` - -# iRT peptides - -```{r echo=FALSE} -df <- .getIRTs() -names(df) <- c("peptide", "iRT", "ssrc", "mass", "mZ") -df <- df[,c(1,2,5)] -kable(df) |> - kable_styling("striped") -``` - -# Session Info - -```{r} -sessionInfo() -``` diff --git a/man/buildRawrrExe.Rd b/man/buildRawrrExe.Rd index bc13273..476c03d 100644 --- a/man/buildRawrrExe.Rd +++ b/man/buildRawrrExe.Rd @@ -10,9 +10,10 @@ buildRawrrExe() the return value of the system2 command. } \description{ -builds \code{rawrr.exe} file from C# source code requiring -xbuild or msbuild tools. The console application \code{rawrr.exe} -is used by the package's reader functions through a \link{system2} call. +builds \code{rawrr.exe} file from C# source code requiring +.NET SDK. The console application \code{rawrr.exe} +is used by the package's reader functions through a \link{system2} call +or a \link{textConnection}. } \details{ The rawrr package implementation consists of two language layers, @@ -22,27 +23,24 @@ using a \link{system2} call. Calling a wrapper method typically results in the execution of methods defined in the RawFileReader dynamic link library provided by Thermo Fisher Scientific. Our precompiled wrapper methods are bundled in the \code{rawrr.exe} executable file (.NET assembly) and shipped -with the released R package. Running \code{rawrr.exe} requires the -\url{https://www.mono-project.com/} environment on non-Microsoft -operating systems. Mono is a cross platform, open source .NET framework. -On Microsoft Windows the Microsoft .NET framework is typically already -installed and sufficient. Our package also contains the C# source code -\code{rawrr.cs}. +with the released R package. +Our package also contains the C# source code \code{rawrr.cs}. In order to return extracted data back to the R layer we use file I/O. More specifically, the extracted information is written to a temporary -location on the harddrive, read back into memory and parsed into R objects. +location on the harddrive, read back into memory and parsed into R objects. } \references{ \itemize{ - \item{\url{https://www.mono-project.com/docs/advanced/assemblies-and-the-gac/}} - \item{\url{https://planetorbitrap.com/rawfilereader}} + \item{\url{https://www.mono-project.com/docs/advanced/assemblies-and-the-gac/}, 2020} + \item{\url{https://planetorbitrap.com/rawfilereader}, 2020} + \item{\url{https://github.com/thermofisherlsms/RawFileReader/}, 2024} \item{\url{https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/compiler-options/advanced}} \item{\doi{10.1021/acs.jproteome.0c00866}} } } \seealso{ -\link{installRawrrExe} and \link{installRawFileReaderDLLs} +\link{installRawrrExe} } \author{ -Tobias Kockmann, Christian Panse , 2021 +Tobias Kockmann, Christian Panse , 2021, 2024 } diff --git a/man/dot-addNupkgSource.Rd b/man/dot-addNupkgSource.Rd index 59f078a..7789099 100644 --- a/man/dot-addNupkgSource.Rd +++ b/man/dot-addNupkgSource.Rd @@ -4,14 +4,12 @@ \alias{.addNupkgSource} \title{dotnet nuget add source /Users/cp/Library/Caches/org.R-project.R/R/rawrr/rawrrassembly/ dotnet nuget remove source "Package source 1" -dotnet nuget list source -dotnet add package ThermoFisher.CommonCore.MassPrecisionEstimator} +dotnet nuget list source} \usage{ .addNupkgSource() } \description{ dotnet nuget add source /Users/cp/Library/Caches/org.R-project.R/R/rawrr/rawrrassembly/ dotnet nuget remove source "Package source 1" -dotnet nuget list source -dotnet add package ThermoFisher.CommonCore.MassPrecisionEstimator +dotnet nuget list source } diff --git a/man/dot-benchmark.Rd b/man/dot-benchmark.Rd index 96c356c..6242a17 100644 --- a/man/dot-benchmark.Rd +++ b/man/dot-benchmark.Rd @@ -2,10 +2,10 @@ % Please edit documentation in R/benchmark.R \name{.benchmark} \alias{.benchmark} -\title{benchmark execution time} +\title{f <- "/Users/cp/Library/Caches/org.R-project.R/R/ExperimentHub/46314c3933e2_4590.raw"} \usage{ -.benchmark(rawfile) +.benchmark(f) } \description{ -benchmark execution time +f <- "/Users/cp/Library/Caches/org.R-project.R/R/ExperimentHub/46314c3933e2_4590.raw" } diff --git a/man/dot-downloadNupkgs.Rd b/man/dot-downloadNupkgs.Rd index 076487f..5de7ebe 100644 --- a/man/dot-downloadNupkgs.Rd +++ b/man/dot-downloadNupkgs.Rd @@ -22,21 +22,6 @@ by the external program. \description{ Download and install the Thermo Fisher Scientific .NET 8.0 nupkgs } -\details{ -The console application assembly \code{rawrr.exe} requires: -\itemize{ -\item {\code{ThermoFisher.CommonCore.Data.dll}, } -\item{\code{ThermoFisher.CommonCore.MassPrecisionEstimator.dll}, and} -\item{ThermoFisher.CommonCore.RawFileReader.dll} -}. -} -\references{ -\itemize{ - \item{\url{https://www.mono-project.com/docs/advanced/assemblies-and-the-gac/}} - \item{\url{https://planetorbitrap.com/rawfilereader}} - \item{\doi{10.1021/acs.jproteome.0c00866}} -} -} \author{ Christian Panse , 2021, 2024 } diff --git a/man/installRawrrExe.Rd b/man/installRawrrExe.Rd index f2e8cfb..92e1047 100644 --- a/man/installRawrrExe.Rd +++ b/man/installRawrrExe.Rd @@ -3,7 +3,7 @@ \name{installRawrrExe} \alias{installRawrrExe} \alias{rawrr.exe} -\title{Download and install the \code{rawrr.exe} console application} +\title{Download \code{rawrr} assembly} \usage{ installRawrrExe( sourceUrl = "https://fgcz-ms.uzh.ch/~cpanse/rawrr/dotnet/", @@ -24,11 +24,11 @@ failure. For the "wget" and "curl" methods this is the status code returned by the external program. } \description{ -downloads and installs the \code{rawrr.exe} .NET assembly in +downloads and installs the \code{rawrr.exe} .NET assembly in the directory provided by \code{rawrrAssemblyPath()}. } \details{ -The console application \code{rawrr.exe} is used by the package's +The console application \code{rawrr} is used by the package's reader functions through a \link{system2} call. } \references{ diff --git a/man/rawrrAssemblyPath.Rd b/man/rawrrAssemblyPath.Rd index e9723e3..8c85d23 100644 --- a/man/rawrrAssemblyPath.Rd +++ b/man/rawrrAssemblyPath.Rd @@ -16,5 +16,5 @@ Derives the path where all .NET assemblies are stored. rawrrAssemblyPath() } \seealso{ -\code{installRawFileReaderDLLs} and \code{installRawrrExe} +\code{installRawrrExe} and \code{buildRawrrExe} } From 450c451dbae7a6f2aa52d76966621428602fc8f8 Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Sun, 3 Nov 2024 15:52:48 +0100 Subject: [PATCH 18/20] doc: cosmetics --- R/dotNetAssembly.R | 13 ++++++++++--- R/rawrr.R | 16 +++------------- man/dot-addPackages.Rd | 11 +++++++++++ man/installRawrrExe.Rd | 3 --- 4 files changed, 24 insertions(+), 19 deletions(-) create mode 100644 man/dot-addPackages.Rd diff --git a/R/dotNetAssembly.R b/R/dotNetAssembly.R index 1de0d34..81ea242 100644 --- a/R/dotNetAssembly.R +++ b/R/dotNetAssembly.R @@ -236,15 +236,22 @@ rawrrAssemblyPath <- function(){ #' failure. For the "wget" and "curl" methods this is the status code returned #' by the external program. #' @seealso \link{buildRawrrExe} -#' @references \doi{10.1021/acs.jproteome.0c00866} #' @aliases rawrr.exe -#' @export installRawrrExe +#' @export installRawrrExe <- function (sourceUrl = "https://fgcz-ms.uzh.ch/~cpanse/rawrr/dotnet/", force = FALSE, ...) { rawrrAssembly <- .rawrrAssembly() - if (file.exists(rawrrAssembly) && isFALSE(force)){ + + if (isTRUE(file.exists(rawrrAssembly)) && isFALSE(force)){ + if (interactive()){ + message("The rawrr assembly exists. Have a lot of fun!") + } + return() + } + + if (isTRUE(file.exists(rawrrAssembly)) && isTRUE(force)){ ## TODO: if interactive ask to override if (interactive()){ response <- readline(prompt = sprintf("Assembly exists. Do you want to overwrite it? [Y/n]: ")) diff --git a/R/rawrr.R b/R/rawrr.R index 50c2ada..ff84e6f 100644 --- a/R/rawrr.R +++ b/R/rawrr.R @@ -196,7 +196,7 @@ is.rawrrSpectrumSet <- function(x){ #' Sample volume, Sample injection volume, Sample row number, #' Sample dilution factor, or Sample barcode. #' -#' @export readFileHeader +#' @export #' #' @examples #' rawrr::sampleFilePath() |> readFileHeader() @@ -218,7 +218,7 @@ readFileHeader <- function(rawfile){ #' scan, scanType, StartTime, precursorMass, MSOrder, charge, masterScan, and #' dependencyType of all spectra. #' -#' @export readIndex +#' @export #' @importFrom utils read.table #' @author Tobias Kockmann and Christian Panse , 2020, 2021 #' @@ -454,12 +454,6 @@ sampleFilePath <- function(){ #' #' @aliases readSpectrum rawrr #' -#' @export -## #' @export readSpectrum -## #' @exportClass rawrrSpectrum -## #' @exportS3Method plot rawrrSpectrum -## #' @exportS3Method print rawrrSpectrum -## #' @exportS3Method summary rawrrSpectrum #' #' @return a nested list of \code{rawrrSpectrum} objects containing more than 50 #' values of scan information, e.g., the charge state, two vectors containing @@ -542,6 +536,7 @@ sampleFilePath <- function(){ #' rawrr::readSpectrum(rawfile=rawfile, 11091) |> #' lapply(function(x).UniversalSpectrumExplorer(x, sequence = GAG)) #' } +#' @export readSpectrum <- function(rawfile, scan = NULL, tmpdir = tempdir(), validate = FALSE, mode = ''){ .isAssemblyWorking() @@ -684,11 +679,6 @@ validate_rawrrChromatogram <- function(x){ #' \href{https://massive.ucsd.edu/ProteoSAFe/dataset.jsp?accession=MSV000086542}{MSV000086542}. #' #' @export -## #' @export readChromatogram -## #' @exportClass rawrrChromatogram -## #' @exportClass rawrrChromatogramSet -## #' @exportS3Method plot rawrrChromatogram -## #' @exportS3Method plot rawrrChromatogramSet #' @importFrom utils read.csv2 #' @examples #' diff --git a/man/dot-addPackages.Rd b/man/dot-addPackages.Rd new file mode 100644 index 0000000..fae652c --- /dev/null +++ b/man/dot-addPackages.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dotNetAssembly.R +\name{.addPackages} +\alias{.addPackages} +\title{dotnet add package ThermoFisher.CommonCore.MassPrecisionEstimator} +\usage{ +.addPackages(dir, version = "8.0.6") +} +\description{ +dotnet add package ThermoFisher.CommonCore.MassPrecisionEstimator +} diff --git a/man/installRawrrExe.Rd b/man/installRawrrExe.Rd index 92e1047..d87c657 100644 --- a/man/installRawrrExe.Rd +++ b/man/installRawrrExe.Rd @@ -31,9 +31,6 @@ the directory provided by \code{rawrrAssemblyPath()}. The console application \code{rawrr} is used by the package's reader functions through a \link{system2} call. } -\references{ -\doi{10.1021/acs.jproteome.0c00866} -} \seealso{ \link{buildRawrrExe} } From ed853d8b060de9fea6cd6a6dfd9a8a8b4e118c8b Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Sun, 3 Nov 2024 17:53:04 +0100 Subject: [PATCH 19/20] doc: replace mono part by dotnet --- NAMESPACE | 1 + R/benchmark.R | 3 ++- R/dotNetAssembly.R | 52 +++++++++++++++++++++------------------ man/dot-addNupkgSource.Rd | 15 ----------- man/dot-addPackages.Rd | 11 --------- man/dot-benchmark.Rd | 11 --------- vignettes/rawrr.Rmd | 44 ++++++++++++++++++++++++++++++--- vignettes/rawrr.bib | 7 ++++++ 8 files changed, 79 insertions(+), 65 deletions(-) delete mode 100644 man/dot-addNupkgSource.Rd delete mode 100644 man/dot-addPackages.Rd delete mode 100644 man/dot-benchmark.Rd diff --git a/NAMESPACE b/NAMESPACE index 8792ce4..8a75b9f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -39,6 +39,7 @@ importFrom(stats,predict) importFrom(tools,R_user_dir) importFrom(utils,download.file) importFrom(utils,head) +importFrom(utils,object.size) importFrom(utils,packageVersion) importFrom(utils,read.csv2) importFrom(utils,read.table) diff --git a/R/benchmark.R b/R/benchmark.R index 5da179d..5b768a9 100644 --- a/R/benchmark.R +++ b/R/benchmark.R @@ -1,6 +1,7 @@ #R -#' f <- "/Users/cp/Library/Caches/org.R-project.R/R/ExperimentHub/46314c3933e2_4590.raw" +#' @importFrom utils object.size +# f <- "/Users/cp/Library/Caches/org.R-project.R/R/ExperimentHub/46314c3933e2_4590.raw" .benchmark <- function(f){ stopifnot(file.exists(f)) diff --git a/R/dotNetAssembly.R b/R/dotNetAssembly.R index 81ea242..2bf4c6c 100644 --- a/R/dotNetAssembly.R +++ b/R/dotNetAssembly.R @@ -142,9 +142,9 @@ rawrrAssemblyPath <- function(){ rv } -#' dotnet nuget add source /Users/cp/Library/Caches/org.R-project.R/R/rawrr/rawrrassembly/ -#' dotnet nuget remove source "Package source 1" -#' dotnet nuget list source +# dotnet nuget add source /Users/cp/Library/Caches/org.R-project.R/R/rawrr/rawrrassembly/ +# dotnet nuget remove source "Package source 1" +# dotnet nuget list source .addNupkgSource <- function(){ system2('dotnet', args = c('nuget', 'add', 'source', rawrrAssemblyPath())) } @@ -163,7 +163,7 @@ rawrrAssemblyPath <- function(){ }) } -#' dotnet add package ThermoFisher.CommonCore.MassPrecisionEstimator +# dotnet add package ThermoFisher.CommonCore.MassPrecisionEstimator .addPackages <- function(dir, version = "8.0.6"){ tempOut <- tempfile(pattern = "rawrr.add.packages.stdout.", tmpdir = dir, fileext = ".txt") tempErr <- tempfile(pattern = "rawrr.add.packages.stderr.", tmpdir = dir, fileext = ".txt") @@ -188,8 +188,8 @@ rawrrAssemblyPath <- function(){ } .clean <- function(){ - message("Removing ", rawrr:::.rawrrAssembly()) - file.remove(rawrr:::.rawrrAssembly()) + message("Removing ", .rawrrAssembly()) + file.remove(.rawrrAssembly()) } .build <- function(dir){ @@ -204,7 +204,7 @@ rawrrAssemblyPath <- function(){ message("Write stderr to", tempErr) system2('dotnet', args = c('publish', '-c', 'Release', '-a', 'x64', '-p', - 'PublishReadyToRun=true', '-o', dirname(rawrr:::.rawrrAssembly())), + 'PublishReadyToRun=true', '-o', dirname(.rawrrAssembly())), stdout = tempOut, stderr = tempErr) -> rv @@ -314,21 +314,14 @@ installRawrrExe <- #' .NET SDK. The console application \code{rawrr.exe} #' is used by the package's reader functions through a \link{system2} call #' or a \link{textConnection}. +#' +#' To use this function, ensure that the local RawFileReader NuGet packages +#' are added to the NuGet source list. You can accomplish this by +#' downloading the necessary packages with +#' \code{rawrr:::.downloadNupkgs()} and subsequently running +#' \code{rawrr:::.addNupkgSource()}. #' -#' @details The rawrr package implementation consists of two language layers, -#' the top R layer and the hidden C# layer. Specifically, R functions requesting -#' access to data stored in binary raw files invoke compiled C# wrapper methods -#' using a \link{system2} call. Calling a wrapper method typically results in the -#' execution of methods defined in the RawFileReader dynamic link library -#' provided by Thermo Fisher Scientific. Our precompiled wrapper methods are -#' bundled in the \code{rawrr.exe} executable file (.NET assembly) and shipped -#' with the released R package. -#' Our package also contains the C# source code \code{rawrr.cs}. -#' In order to return extracted data back to the R layer we use file I/O. -#' More specifically, the extracted information is written to a temporary -#' location on the harddrive, read back into memory and parsed into R objects. -#' -#' @author Tobias Kockmann, Christian Panse , 2021, 2024 +#' @author Christian Panse , 2021, 2024 #' #' @seealso \link{installRawrrExe} #' @@ -362,9 +355,20 @@ buildRawrrExe <- function(){ .copySourceCode(dir = buildDir) - ## TODO: check if already exists - # .downloadNupkgs() - # .addNupkgSource() + (system2("dotnet", args =c('nuget', 'list', 'source'), + stdout = TRUE) |> + grepl(pattern = "rawrrassembly") |> + sum() >= 1) -> nugetPkgsPresent + + if(nugetPkgsPresent){ + warning("Have you downloaded the 'thermofisherlsms/RawFileReader' NuGet packages? \n", + "If not, please consider executing the methods:\n", + " -> rawrr:::.downloadNupkgs()\n", + " -> rawrr:::.addNupkgSource()\n", + "These steps should be performed once to ensure proper setup.") + # .downloadNupkgs() + # .addNupkgSource() + } .addPackages(dir = buildDir) .build(dir = buildDir) diff --git a/man/dot-addNupkgSource.Rd b/man/dot-addNupkgSource.Rd deleted file mode 100644 index 7789099..0000000 --- a/man/dot-addNupkgSource.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dotNetAssembly.R -\name{.addNupkgSource} -\alias{.addNupkgSource} -\title{dotnet nuget add source /Users/cp/Library/Caches/org.R-project.R/R/rawrr/rawrrassembly/ -dotnet nuget remove source "Package source 1" -dotnet nuget list source} -\usage{ -.addNupkgSource() -} -\description{ -dotnet nuget add source /Users/cp/Library/Caches/org.R-project.R/R/rawrr/rawrrassembly/ -dotnet nuget remove source "Package source 1" -dotnet nuget list source -} diff --git a/man/dot-addPackages.Rd b/man/dot-addPackages.Rd deleted file mode 100644 index fae652c..0000000 --- a/man/dot-addPackages.Rd +++ /dev/null @@ -1,11 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dotNetAssembly.R -\name{.addPackages} -\alias{.addPackages} -\title{dotnet add package ThermoFisher.CommonCore.MassPrecisionEstimator} -\usage{ -.addPackages(dir, version = "8.0.6") -} -\description{ -dotnet add package ThermoFisher.CommonCore.MassPrecisionEstimator -} diff --git a/man/dot-benchmark.Rd b/man/dot-benchmark.Rd deleted file mode 100644 index 6242a17..0000000 --- a/man/dot-benchmark.Rd +++ /dev/null @@ -1,11 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/benchmark.R -\name{.benchmark} -\alias{.benchmark} -\title{f <- "/Users/cp/Library/Caches/org.R-project.R/R/ExperimentHub/46314c3933e2_4590.raw"} -\usage{ -.benchmark(f) -} -\description{ -f <- "/Users/cp/Library/Caches/org.R-project.R/R/ExperimentHub/46314c3933e2_4590.raw" -} diff --git a/vignettes/rawrr.Rmd b/vignettes/rawrr.Rmd index a123a6e..a8221db 100644 --- a/vignettes/rawrr.Rmd +++ b/vignettes/rawrr.Rmd @@ -46,7 +46,45 @@ We strongly believe that a library providing raw data reading would finally clos # Implementation -Our implementation consists of two language layers, the top `R` layer and the hidden `C#` layer. Specifically, `R` functions requesting access to data stored in binary raw files (reader family functions listed in Table 1) invoke compiled `C#` wrapper methods using a system call. Calling a wrapper method typically results in the execution of methods defined in the `RawFileReader` dynamic link library provided by Thermo Fisher Scientific. Our precompiled wrapper methods are bundled in the `r BiocStyle::Biocpkg('rawrr')` executable file and shipped with the released `R` package. Running `rawrr.exe` requires the [Mono](https://www.mono-project.com/) environment on non-Microsoft operating systems. Mono is a cross platform, open source .NET framework. On Microsoft Windows the Microsoft .NET framework is typically already installed and sufficient. Our package also contains the `C#` source code (rawrr.cs), hopefully allowing other developers to follow and improve our code (open source). In order to return extracted data back to the `R` layer we use file I/O. More specifically, the extracted information is written to a temporary location on the harddrive, read back into memory and parsed into `R` objects. +Our implementation consists of two language layers, the top `R` layer and the hidden `C#` layer. Specifically, `R` functions requesting access to data stored in binary raw files (reader family functions listed in Table 1) invoke compiled `C#` wrapper methods using a system call. Calling a wrapper method typically results in the execution of methods defined in the `RawFileReader` dynamic link library provided by Thermo Fisher Scientific. Our `.NET 8.0` [@dotnet] precompiled wrapper methods are bundled, including the runtime, in the `r BiocStyle::Biocpkg('rawrr')` executable file and shipped with the released `R` package. +Our package also contains the `C#` source code (rawrr.cs), hopefully allowing other developers to follow and improve our code (open source). In order to return extracted data back to the `R` layer we use file I/O. More specifically, the extracted information is written to a temporary location on the harddrive, read back into memory and parsed into `R` objects. +The graphic below depicts the described software stack. + + + + + + + + + + + + + + + + + + + + + +
+`R>` +
+`system2` or +`text connection` +
+.NET or Mono Runtime +
+Managed Assembly +(CIL/.NET code) +
+rawrr.exe +
+ThermoFisher.CommonCore.*.dll +
Since mass spectrometry typically uses two basic data items, the mass spectrum and the mass chromatogram, we decided to implement corresponding objects following `R`'s `S3` OOP system [@newS] named `rawrrSpectrum` and `rawrrChromatogram`. These objects function as simplistic interface to almost all data stored in raw-formatted files. The package provides functions to create and validate class instances. While class constructors primarily exist for (unit) testing purposes, instances are typically generated by the reader family of functions enumerated in Table 1 and returned as object sets (`rawrrSpectrumSet`, `rawrrChromatogramSet`). The names of objects encapsulated within `rawrrSpectrum` instances are keys returned by the `RawFileReader` API and the corresponding values become data parts of the objects, typically vectors of type `numeric`, `logical` or `character`. It needs to be mentioned that the `rawrrSpectrum` content partially depends on the instrument model and installed instrument control software version. For instance, the keys `FAIMS Voltage On:` and `FAIMS CV:` are only written by instruments that support FAIMS acquisition. We also implemented basic generics for printing and plotting of objects in base `R` to minimize dependencies. @@ -104,7 +142,7 @@ The Orbitrap detector has been a tremendous success story in MS, since it offers |`readTrailer()` |Reads trailer values for each scan event |`vector` | |`readChromatogram()`|Reads chromatographic data from a raw file |`rawrrChromatogram(Set)` | -Table: lists `r BiocStyle::Biocpkg('rawrr')` package functions connected to reading functionality. More details can be found in the package documentation (see supporting information, S-20 onwards [@Kockmann2021]). +Table: lists `r BiocStyle::Biocpkg('rawrr')` package functions connected to reading functionality. More details can be found in the package man pages. Individual scans or scan collections (sets) can be read by the function `readSpectrum()` which returns a `rawrrSpectrum` object or `rawrrSpectrumSet`. Our package also provides generics for printing and plotting these objects. The following code chunk depicts how a set of scans is read from the raw file (scan numbers were selected based on a database search). The corresponding Figure 1 shows the resulting plot for scan `9594` (USI: [mzspec:MSV000086542:20181113_010_autoQC01:scan:9594:LGGNEQVTR/2](http://massive.ucsd.edu/ProteoSAFe/usi.jsp#{%22usi%22:%22mzspec:MSV000086542:20181113_010_autoQC01:scan:9594:LGGNEQVTR/2%22})) assigned to the doubly-charged iRT peptide LGGNEQVTR by MS-GF+ (Score: 144, SpecProb: 1.9e-12, DB E-Value: 4.4e-4, see [MassIVE RMSV000000336.1](https://massive.ucsd.edu/ProteoSAFe/dataset.jsp?task=575538e190e84cbfbf6c17aa1219e403#reanalyses_header) for details of the search): @@ -369,7 +407,7 @@ See also [#fgcz/rawDiag/issues/33](https://github.com/fgcz/rawDiag/issues/33). sessionInfo() ``` -# Mono information {-} +# .NET information {-} ```{bash dotnet--info, echo=TRUE, error=TRUE} dotnet --info diff --git a/vignettes/rawrr.bib b/vignettes/rawrr.bib index ad41211..f83f68e 100644 --- a/vignettes/rawrr.bib +++ b/vignettes/rawrr.bib @@ -24,6 +24,13 @@ @ONLINE{mono year = {2021} } +@ONLINE{dotnet, + url = {https://dotnet.microsoft.com/en-us/}, + author = {Microsoft}, + title = {.NET | Build. Test. Deploy.}, + year = {2021} +} + @ONLINE{rawfilereader, url = {https://github.com/thermofisherlsms/RawFileReader}, author = {Jim Shofstahl}, From 84d12902c53d2dabddb512b47517245fc9bcc58c Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Sun, 3 Nov 2024 17:59:32 +0100 Subject: [PATCH 20/20] Update DESCRIPTION bump 1.15.3 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index d1a5371..bdc737c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: rawrr Type: Package Title: Direct Access to Orbitrap Data and Beyond -Version: 1.15.2.0 +Version: 1.15.3 Authors@R: c(person("Christian", "Panse", email = "cp@fgcz.ethz.ch", role = c("aut", "cre"),