diff --git a/.Rbuildignore b/.Rbuildignore
new file mode 100644
index 0000000..680036d
--- /dev/null
+++ b/.Rbuildignore
@@ -0,0 +1,8 @@
+^inst/\.quarto$
+^\.png$
+^\.quarto$
+^\.github$
+^LICENSE\.md$
+^NOTES\.md$
+^README\.Rmd$
+^Dockerfile$
diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..655e9e9
--- /dev/null
+++ b/.github/CODE_OF_CONDUCT.md
@@ -0,0 +1,17 @@
+The Bioconductor community values
+
+* an open approach to science that promotes the sharing of ideas, code, and expertise
+* collaboration
+* diversity and inclusivity
+* a kind and welcoming environment
+* community contributions
+
+In line with these values, Bioconductor is dedicated to providing a welcoming, supportive, collegial, experience free of harassment, intimidation, and bullying regardless of:
+
+* identity: gender, gender identity and expression, sexual orientation, disability, physical appearance, ethnicity, body size, race, age, religion, etc.
+* intellectual position: approaches to data analysis, software preferences, coding style, scientific perspective, etc.
+* stage of career
+
+In order to uphold these values, members of the Bioconductor community are required to follow the Code of Conduct.The latest version of Bioconductor project Code of Conduct is available at http://bioconductor.org/about/code-of-conduct/. Please read the Code of Conduct before contributing to this project.
+
+Thank you!
diff --git a/.github/workflows/biocbook.yml b/.github/workflows/biocbook.yml
new file mode 100644
index 0000000..723be33
--- /dev/null
+++ b/.github/workflows/biocbook.yml
@@ -0,0 +1,111 @@
+name: biocbook
+
+on:
+  push:
+    branches:
+      - devel
+      - RELEASE_**
+
+jobs:
+  build-push:
+    runs-on: ubuntu-latest
+    name: build-book (${{ github.ref_name }})
+    permissions:
+      contents: write
+      packages: write
+
+    steps:
+      
+      - name: 🧾 Checkout repository
+        uses: actions/checkout@v3
+
+      - name: ⏳ Collect Workflow Telemetry
+        uses: runforesight/workflow-telemetry-action@v1
+
+      - name: 🐳 Set up QEMU
+        uses: docker/setup-qemu-action@v2
+      - name: 🐳 Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+  
+      - name: 📝 Get book info
+        id: info
+        env:
+          OWNER: ${{ github.repository_owner }}
+        run: |
+          Pkgname=$(grep -m1 -E '^Package: +' DESCRIPTION | sed -E 's/.*: +//')
+          echo Pkgname=${Pkgname} >> "${GITHUB_ENV}"
+          pkgname=${Pkgname,,}
+          echo pkgname=${pkgname} >> "${GITHUB_ENV}"
+          owner=${OWNER,,}
+          echo owner=${owner} >> "${GITHUB_ENV}"
+          echo pkgversion=$(grep -m1 -E '^Version: +' DESCRIPTION | sed -E 's/.*: +//') >> "${GITHUB_ENV}"
+
+      - name: 🔐 Log in to the Github Container registry
+        uses: docker/login-action@v2
+        with:
+          registry: ghcr.io
+          username: ${{ env.owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: 🏷 Get metadata for Docker
+        id: meta
+        uses: docker/metadata-action@v4
+        with:
+          images: ghcr.io/${{ env.owner }}/${{ env.pkgname }}
+          tags: |
+            ${{ github.ref_name }}
+            ${{ env.pkgversion }}
+            type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'devel') }}
+
+      - name: 📦 Install, build and check package in local Docker image
+        id: docker
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          load: true
+          tags: ${{ steps.meta.outputs.tags }}
+          build-args: |
+            BIOC_VERSION=${{ github.ref_name }}
+
+      - name: 🚀 Push local Docker image to ghcr.io
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          build-args: |
+            BIOC_VERSION=${{ github.ref_name }}
+
+      - name: 📚 Recover pkg artifacts generated during build in local Docker container (pkg bundle and book) 
+        env:
+          IMG: ${{ steps.docker.outputs.ImageID }}
+        run: |
+          SHA=$(docker container create ${{ env.IMG }})
+          docker container cp ${SHA}:/${{ env.Pkgname }}_${{ env.pkgversion }}.tar.gz .
+          tar --extract --gzip --file ${{ env.Pkgname }}_${{ env.pkgversion }}.tar.gz
+          echo bundle_path=${{ env.Pkgname }}_${{ env.pkgversion }}.tar.gz >> "${GITHUB_ENV}"
+          echo book_path=${{ env.Pkgname }}/inst/doc/book/ >> "${GITHUB_ENV}"
+
+      - name: 🏷 Get gh-branch directory to deploy to
+        run: |
+          echo target_folder=$(echo ${{ github.ref_name }} | sed 's,RELEASE_,,' | tr '_' '.') >> "${GITHUB_ENV}"
+
+      - name: 🚀 Deploy book to Github Pages on versioned branch
+        uses: JamesIves/github-pages-deploy-action@v4.4.3
+        with:
+          folder: ${{ env.book_path }}/ 
+          target-folder: docs/${{ env.target_folder }}/ 
+          branch: gh-pages
+          clean: false 
+
+      - name: 💾 Upload package bundle artifact
+        uses: actions/upload-artifact@v3
+        with:
+          name: bundle
+          path: ${{ env.bundle_path }}
+
+      - name: 💾 Upload book artifact
+        uses: actions/upload-artifact@v3
+        with:
+          name: book
+          path: ${{ env.book_path }}
diff --git a/.github/workflows/rworkflows.yml b/.github/workflows/rworkflows.yml
new file mode 100644
index 0000000..0df685c
--- /dev/null
+++ b/.github/workflows/rworkflows.yml
@@ -0,0 +1,53 @@
+## Adapted from neurogenomics/rworkflows: rworkflows::use_workflow()
+
+name: rworkflows
+'on':
+  push:
+    branches:
+    - devel
+    - RELEASE_**
+  pull_request:
+    branches:
+    - devel
+    - RELEASE_**
+
+jobs:
+  rworkflows:
+    runs-on: ${{ matrix.config.os }}
+    name: ${{ matrix.config.os }} (${{ matrix.config.r }})
+    container: ${{ matrix.config.cont }}
+    strategy:
+      fail-fast: ${{ false }}
+      matrix:
+        config:
+        - os: ubuntu-latest
+          bioc: devel
+          r: auto
+          cont: ghcr.io/bioconductor/bioconductor:devel
+          rspm: https://packagemanager.rstudio.com/cran/__linux__/focal/release
+        # - os: macOS-latest
+        #   bioc: release
+        #   r: auto
+        #   cont: ~
+        #   rspm: ~
+        # - os: windows-latest
+        #   bioc: release
+        #   r: auto
+        #   cont: ~
+        #   rspm: ~
+    steps:
+    - uses: neurogenomics/rworkflows@master
+      with:
+        run_bioccheck: ${{ false }}
+        run_rcmdcheck: ${{ true }}
+        as_cran: ${{ false }}
+        run_vignettes: ${{ false }}
+        has_testthat: ${{ true }}
+        run_covr: ${{ false }}
+        run_pkgdown: ${{ false }}
+        has_runit: ${{ false }}
+        has_latex: ${{ false }}
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run_docker: ${{ false }}
+        runner_os: ${{ runner.os }}
+        cache_version: cache-v1
diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..2c39be1
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,61 @@
+Package: R4MS
+Title: R for mass spectrometry
+Description: |
+    This repository provides documentation and teaching material 
+    focus on the analysis of mass spectrometry data for proteomics and metabolomics
+    using the [R for Mass Spectrometry](https://www.rformassspectrometry.org/) 
+    software infrastructure.
+Version: 0.98.0
+Date: `r date()`
+Authors@R: c(person(given = "Laurent", family = "Gatto",
+                    comment = c(ORCID = "0000-0002-1520-2268"),
+                    email = "laurent.gatto@uclouvain.be",
+                    role = c("aut","cre")),
+             person(given = "Johannes", family = "Rainer",
+                    email = "Johannes.Rainer@eurac.edu",
+                    role = "aut",
+                    comment = c(ORCID = "0000-0002-6977-7147")),
+             person(given = "Sebastian", family = "Gibb",
+                    email = "mail@sebastiangibb.de",
+                    role = "aut",
+                    comment = c(ORCID = "0000-0001-7406-4443")))
+URL: https://github.com/js2264/R4MS
+BugReports: https://github.com/js2264/R4MS
+biocViews:
+    Book
+Depends:
+    R (>= 4.3)
+Imports:
+    tidyverse, 
+    factoextra, 
+    msdata, 
+    mzR, 
+    rhdf5, 
+    rpx, 
+    MsCoreUtils, 
+    QFeatures, 
+    Spectra, 
+    ProtGenerics, 
+    PSMatch, 
+    pheatmap, 
+    limma, 
+    gplots,
+    patchwork,  
+    MSnID
+Suggests:
+    BiocManager, 
+    BiocVersion, 
+    BiocStyle, 
+    BiocCheck, 
+    rcmdcheck, 
+    glue,
+    sessioninfo, 
+    knitr, 
+    quarto,
+    BiocBook
+Encoding: UTF-8
+Roxygen: list(markdown = TRUE)
+RoxygenNote: 7.2.3
+BiocType: Book
+VignetteBuilder: knitr
+License: GPL (>= 3)
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..9e9a308
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,10 @@
+ARG BIOC_VERSION
+FROM bioconductor/bioconductor_docker:${BIOC_VERSION}
+COPY . /opt/pkg
+
+# Install book package 
+RUN Rscript -e 'repos <- BiocManager::repositories() ; remotes::install_local(path = "/opt/pkg/", repos=repos, dependencies=TRUE, build_vignettes=FALSE, upgrade=TRUE) ; sessioninfo::session_info(installed.packages()[,"Package"], include_base = TRUE)'
+
+## Build/install using same approach than BBS
+RUN R CMD INSTALL /opt/pkg
+RUN R CMD build --keep-empty-dirs --no-resave-data /opt/pkg
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000..175443c
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,595 @@
+GNU General Public License
+==========================
+
+_Version 3, 29 June 2007_  
+_Copyright © 2007 Free Software Foundation, Inc. &lt;<http://fsf.org/>&gt;_
+
+Everyone is permitted to copy and distribute verbatim copies of this license
+document, but changing it is not allowed.
+
+## Preamble
+
+The GNU General Public License is a free, copyleft license for software and other
+kinds of works.
+
+The licenses for most software and other practical works are designed to take away
+your freedom to share and change the works. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change all versions of a
+program--to make sure it remains free software for all its users. We, the Free
+Software Foundation, use the GNU General Public License for most of our software; it
+applies also to any other work released this way by its authors. You can apply it to
+your programs, too.
+
+When we speak of free software, we are referring to freedom, not price. Our General
+Public Licenses are designed to make sure that you have the freedom to distribute
+copies of free software (and charge for them if you wish), that you receive source
+code or can get it if you want it, that you can change the software or use pieces of
+it in new free programs, and that you know you can do these things.
+
+To protect your rights, we need to prevent others from denying you these rights or
+asking you to surrender the rights. Therefore, you have certain responsibilities if
+you distribute copies of the software, or if you modify it: responsibilities to
+respect the freedom of others.
+
+For example, if you distribute copies of such a program, whether gratis or for a fee,
+you must pass on to the recipients the same freedoms that you received. You must make
+sure that they, too, receive or can get the source code. And you must show them these
+terms so they know their rights.
+
+Developers that use the GNU GPL protect your rights with two steps: **(1)** assert
+copyright on the software, and **(2)** offer you this License giving you legal permission
+to copy, distribute and/or modify it.
+
+For the developers' and authors' protection, the GPL clearly explains that there is
+no warranty for this free software. For both users' and authors' sake, the GPL
+requires that modified versions be marked as changed, so that their problems will not
+be attributed erroneously to authors of previous versions.
+
+Some devices are designed to deny users access to install or run modified versions of
+the software inside them, although the manufacturer can do so. This is fundamentally
+incompatible with the aim of protecting users' freedom to change the software. The
+systematic pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we have designed
+this version of the GPL to prohibit the practice for those products. If such problems
+arise substantially in other domains, we stand ready to extend this provision to
+those domains in future versions of the GPL, as needed to protect the freedom of
+users.
+
+Finally, every program is threatened constantly by software patents. States should
+not allow patents to restrict development and use of software on general-purpose
+computers, but in those that do, we wish to avoid the special danger that patents
+applied to a free program could make it effectively proprietary. To prevent this, the
+GPL assures that patents cannot be used to render the program non-free.
+
+The precise terms and conditions for copying, distribution and modification follow.
+
+## TERMS AND CONDITIONS
+
+### 0. Definitions
+
+“This License” refers to version 3 of the GNU General Public License.
+
+“Copyright” also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+“The Program” refers to any copyrightable work licensed under this
+License. Each licensee is addressed as “you”. “Licensees” and
+“recipients” may be individuals or organizations.
+
+To “modify” a work means to copy from or adapt all or part of the work in
+a fashion requiring copyright permission, other than the making of an exact copy. The
+resulting work is called a “modified version” of the earlier work or a
+work “based on” the earlier work.
+
+A “covered work” means either the unmodified Program or a work based on
+the Program.
+
+To “propagate” a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for infringement under
+applicable copyright law, except executing it on a computer or modifying a private
+copy. Propagation includes copying, distribution (with or without modification),
+making available to the public, and in some countries other activities as well.
+
+To “convey” a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through a computer
+network, with no transfer of a copy, is not conveying.
+
+An interactive user interface displays “Appropriate Legal Notices” to the
+extent that it includes a convenient and prominently visible feature that **(1)**
+displays an appropriate copyright notice, and **(2)** tells the user that there is no
+warranty for the work (except to the extent that warranties are provided), that
+licensees may convey the work under this License, and how to view a copy of this
+License. If the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+### 1. Source Code
+
+The “source code” for a work means the preferred form of the work for
+making modifications to it. “Object code” means any non-source form of a
+work.
+
+A “Standard Interface” means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of interfaces
+specified for a particular programming language, one that is widely used among
+developers working in that language.
+
+The “System Libraries” of an executable work include anything, other than
+the work as a whole, that **(a)** is included in the normal form of packaging a Major
+Component, but which is not part of that Major Component, and **(b)** serves only to
+enable use of the work with that Major Component, or to implement a Standard
+Interface for which an implementation is available to the public in source code form.
+A “Major Component”, in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system (if any) on which
+the executable work runs, or a compiler used to produce the work, or an object code
+interpreter used to run it.
+
+The “Corresponding Source” for a work in object code form means all the
+source code needed to generate, install, and (for an executable work) run the object
+code and to modify the work, including scripts to control those activities. However,
+it does not include the work's System Libraries, or general-purpose tools or
+generally available free programs which are used unmodified in performing those
+activities but which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for the work, and
+the source code for shared libraries and dynamically linked subprograms that the work
+is specifically designed to require, such as by intimate data communication or
+control flow between those subprograms and other parts of the work.
+
+The Corresponding Source need not include anything that users can regenerate
+automatically from other parts of the Corresponding Source.
+
+The Corresponding Source for a work in source code form is that same work.
+
+### 2. Basic Permissions
+
+All rights granted under this License are granted for the term of copyright on the
+Program, and are irrevocable provided the stated conditions are met. This License
+explicitly affirms your unlimited permission to run the unmodified Program. The
+output from running a covered work is covered by this License only if the output,
+given its content, constitutes a covered work. This License acknowledges your rights
+of fair use or other equivalent, as provided by copyright law.
+
+You may make, run and propagate covered works that you do not convey, without
+conditions so long as your license otherwise remains in force. You may convey covered
+works to others for the sole purpose of having them make modifications exclusively
+for you, or provide you with facilities for running those works, provided that you
+comply with the terms of this License in conveying all material for which you do not
+control copyright. Those thus making or running the covered works for you must do so
+exclusively on your behalf, under your direction and control, on terms that prohibit
+them from making any copies of your copyrighted material outside their relationship
+with you.
+
+Conveying under any other circumstances is permitted solely under the conditions
+stated below. Sublicensing is not allowed; section 10 makes it unnecessary.
+
+### 3. Protecting Users' Legal Rights From Anti-Circumvention Law
+
+No covered work shall be deemed part of an effective technological measure under any
+applicable law fulfilling obligations under article 11 of the WIPO copyright treaty
+adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention
+of such measures.
+
+When you convey a covered work, you waive any legal power to forbid circumvention of
+technological measures to the extent such circumvention is effected by exercising
+rights under this License with respect to the covered work, and you disclaim any
+intention to limit operation or modification of the work as a means of enforcing,
+against the work's users, your or third parties' legal rights to forbid circumvention
+of technological measures.
+
+### 4. Conveying Verbatim Copies
+
+You may convey verbatim copies of the Program's source code as you receive it, in any
+medium, provided that you conspicuously and appropriately publish on each copy an
+appropriate copyright notice; keep intact all notices stating that this License and
+any non-permissive terms added in accord with section 7 apply to the code; keep
+intact all notices of the absence of any warranty; and give all recipients a copy of
+this License along with the Program.
+
+You may charge any price or no price for each copy that you convey, and you may offer
+support or warranty protection for a fee.
+
+### 5. Conveying Modified Source Versions
+
+You may convey a work based on the Program, or the modifications to produce it from
+the Program, in the form of source code under the terms of section 4, provided that
+you also meet all of these conditions:
+
+* **a)** The work must carry prominent notices stating that you modified it, and giving a
+relevant date.
+* **b)** The work must carry prominent notices stating that it is released under this
+License and any conditions added under section 7. This requirement modifies the
+requirement in section 4 to “keep intact all notices”.
+* **c)** You must license the entire work, as a whole, under this License to anyone who
+comes into possession of a copy. This License will therefore apply, along with any
+applicable section 7 additional terms, to the whole of the work, and all its parts,
+regardless of how they are packaged. This License gives no permission to license the
+work in any other way, but it does not invalidate such permission if you have
+separately received it.
+* **d)** If the work has interactive user interfaces, each must display Appropriate Legal
+Notices; however, if the Program has interactive interfaces that do not display
+Appropriate Legal Notices, your work need not make them do so.
+
+A compilation of a covered work with other separate and independent works, which are
+not by their nature extensions of the covered work, and which are not combined with
+it such as to form a larger program, in or on a volume of a storage or distribution
+medium, is called an “aggregate” if the compilation and its resulting
+copyright are not used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work in an aggregate
+does not cause this License to apply to the other parts of the aggregate.
+
+### 6. Conveying Non-Source Forms
+
+You may convey a covered work in object code form under the terms of sections 4 and
+5, provided that you also convey the machine-readable Corresponding Source under the
+terms of this License, in one of these ways:
+
+* **a)** Convey the object code in, or embodied in, a physical product (including a
+physical distribution medium), accompanied by the Corresponding Source fixed on a
+durable physical medium customarily used for software interchange.
+* **b)** Convey the object code in, or embodied in, a physical product (including a
+physical distribution medium), accompanied by a written offer, valid for at least
+three years and valid for as long as you offer spare parts or customer support for
+that product model, to give anyone who possesses the object code either **(1)** a copy of
+the Corresponding Source for all the software in the product that is covered by this
+License, on a durable physical medium customarily used for software interchange, for
+a price no more than your reasonable cost of physically performing this conveying of
+source, or **(2)** access to copy the Corresponding Source from a network server at no
+charge.
+* **c)** Convey individual copies of the object code with a copy of the written offer to
+provide the Corresponding Source. This alternative is allowed only occasionally and
+noncommercially, and only if you received the object code with such an offer, in
+accord with subsection 6b.
+* **d)** Convey the object code by offering access from a designated place (gratis or for
+a charge), and offer equivalent access to the Corresponding Source in the same way
+through the same place at no further charge. You need not require recipients to copy
+the Corresponding Source along with the object code. If the place to copy the object
+code is a network server, the Corresponding Source may be on a different server
+(operated by you or a third party) that supports equivalent copying facilities,
+provided you maintain clear directions next to the object code saying where to find
+the Corresponding Source. Regardless of what server hosts the Corresponding Source,
+you remain obligated to ensure that it is available for as long as needed to satisfy
+these requirements.
+* **e)** Convey the object code using peer-to-peer transmission, provided you inform
+other peers where the object code and Corresponding Source of the work are being
+offered to the general public at no charge under subsection 6d.
+
+A separable portion of the object code, whose source code is excluded from the
+Corresponding Source as a System Library, need not be included in conveying the
+object code work.
+
+A “User Product” is either **(1)** a “consumer product”, which
+means any tangible personal property which is normally used for personal, family, or
+household purposes, or **(2)** anything designed or sold for incorporation into a
+dwelling. In determining whether a product is a consumer product, doubtful cases
+shall be resolved in favor of coverage. For a particular product received by a
+particular user, “normally used” refers to a typical or common use of
+that class of product, regardless of the status of the particular user or of the way
+in which the particular user actually uses, or expects or is expected to use, the
+product. A product is a consumer product regardless of whether the product has
+substantial commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+“Installation Information” for a User Product means any methods,
+procedures, authorization keys, or other information required to install and execute
+modified versions of a covered work in that User Product from a modified version of
+its Corresponding Source. The information must suffice to ensure that the continued
+functioning of the modified object code is in no case prevented or interfered with
+solely because modification has been made.
+
+If you convey an object code work under this section in, or with, or specifically for
+use in, a User Product, and the conveying occurs as part of a transaction in which
+the right of possession and use of the User Product is transferred to the recipient
+in perpetuity or for a fixed term (regardless of how the transaction is
+characterized), the Corresponding Source conveyed under this section must be
+accompanied by the Installation Information. But this requirement does not apply if
+neither you nor any third party retains the ability to install modified object code
+on the User Product (for example, the work has been installed in ROM).
+
+The requirement to provide Installation Information does not include a requirement to
+continue to provide support service, warranty, or updates for a work that has been
+modified or installed by the recipient, or for the User Product in which it has been
+modified or installed. Access to a network may be denied when the modification itself
+materially and adversely affects the operation of the network or violates the rules
+and protocols for communication across the network.
+
+Corresponding Source conveyed, and Installation Information provided, in accord with
+this section must be in a format that is publicly documented (and with an
+implementation available to the public in source code form), and must require no
+special password or key for unpacking, reading or copying.
+
+### 7. Additional Terms
+
+“Additional permissions” are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions. Additional
+permissions that are applicable to the entire Program shall be treated as though they
+were included in this License, to the extent that they are valid under applicable
+law. If additional permissions apply only to part of the Program, that part may be
+used separately under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+When you convey a copy of a covered work, you may at your option remove any
+additional permissions from that copy, or from any part of it. (Additional
+permissions may be written to require their own removal in certain cases when you
+modify the work.) You may place additional permissions on material, added by you to a
+covered work, for which you have or can give appropriate copyright permission.
+
+Notwithstanding any other provision of this License, for material you add to a
+covered work, you may (if authorized by the copyright holders of that material)
+supplement the terms of this License with terms:
+
+* **a)** Disclaiming warranty or limiting liability differently from the terms of
+sections 15 and 16 of this License; or
+* **b)** Requiring preservation of specified reasonable legal notices or author
+attributions in that material or in the Appropriate Legal Notices displayed by works
+containing it; or
+* **c)** Prohibiting misrepresentation of the origin of that material, or requiring that
+modified versions of such material be marked in reasonable ways as different from the
+original version; or
+* **d)** Limiting the use for publicity purposes of names of licensors or authors of the
+material; or
+* **e)** Declining to grant rights under trademark law for use of some trade names,
+trademarks, or service marks; or
+* **f)** Requiring indemnification of licensors and authors of that material by anyone
+who conveys the material (or modified versions of it) with contractual assumptions of
+liability to the recipient, for any liability that these contractual assumptions
+directly impose on those licensors and authors.
+
+All other non-permissive additional terms are considered “further
+restrictions” within the meaning of section 10. If the Program as you received
+it, or any part of it, contains a notice stating that it is governed by this License
+along with a term that is a further restriction, you may remove that term. If a
+license document contains a further restriction but permits relicensing or conveying
+under this License, you may add to a covered work material governed by the terms of
+that license document, provided that the further restriction does not survive such
+relicensing or conveying.
+
+If you add terms to a covered work in accord with this section, you must place, in
+the relevant source files, a statement of the additional terms that apply to those
+files, or a notice indicating where to find the applicable terms.
+
+Additional terms, permissive or non-permissive, may be stated in the form of a
+separately written license, or stated as exceptions; the above requirements apply
+either way.
+
+### 8. Termination
+
+You may not propagate or modify a covered work except as expressly provided under
+this License. Any attempt otherwise to propagate or modify it is void, and will
+automatically terminate your rights under this License (including any patent licenses
+granted under the third paragraph of section 11).
+
+However, if you cease all violation of this License, then your license from a
+particular copyright holder is reinstated **(a)** provisionally, unless and until the
+copyright holder explicitly and finally terminates your license, and **(b)** permanently,
+if the copyright holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+Moreover, your license from a particular copyright holder is reinstated permanently
+if the copyright holder notifies you of the violation by some reasonable means, this
+is the first time you have received notice of violation of this License (for any
+work) from that copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+Termination of your rights under this section does not terminate the licenses of
+parties who have received copies or rights from you under this License. If your
+rights have been terminated and not permanently reinstated, you do not qualify to
+receive new licenses for the same material under section 10.
+
+### 9. Acceptance Not Required for Having Copies
+
+You are not required to accept this License in order to receive or run a copy of the
+Program. Ancillary propagation of a covered work occurring solely as a consequence of
+using peer-to-peer transmission to receive a copy likewise does not require
+acceptance. However, nothing other than this License grants you permission to
+propagate or modify any covered work. These actions infringe copyright if you do not
+accept this License. Therefore, by modifying or propagating a covered work, you
+indicate your acceptance of this License to do so.
+
+### 10. Automatic Licensing of Downstream Recipients
+
+Each time you convey a covered work, the recipient automatically receives a license
+from the original licensors, to run, modify and propagate that work, subject to this
+License. You are not responsible for enforcing compliance by third parties with this
+License.
+
+An “entity transaction” is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an organization, or
+merging organizations. If propagation of a covered work results from an entity
+transaction, each party to that transaction who receives a copy of the work also
+receives whatever licenses to the work the party's predecessor in interest had or
+could give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if the predecessor
+has it or can get it with reasonable efforts.
+
+You may not impose any further restrictions on the exercise of the rights granted or
+affirmed under this License. For example, you may not impose a license fee, royalty,
+or other charge for exercise of rights granted under this License, and you may not
+initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging
+that any patent claim is infringed by making, using, selling, offering for sale, or
+importing the Program or any portion of it.
+
+### 11. Patents
+
+A “contributor” is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The work thus
+licensed is called the contributor's “contributor version”.
+
+A contributor's “essential patent claims” are all patent claims owned or
+controlled by the contributor, whether already acquired or hereafter acquired, that
+would be infringed by some manner, permitted by this License, of making, using, or
+selling its contributor version, but do not include claims that would be infringed
+only as a consequence of further modification of the contributor version. For
+purposes of this definition, “control” includes the right to grant patent
+sublicenses in a manner consistent with the requirements of this License.
+
+Each contributor grants you a non-exclusive, worldwide, royalty-free patent license
+under the contributor's essential patent claims, to make, use, sell, offer for sale,
+import and otherwise run, modify and propagate the contents of its contributor
+version.
+
+In the following three paragraphs, a “patent license” is any express
+agreement or commitment, however denominated, not to enforce a patent (such as an
+express permission to practice a patent or covenant not to sue for patent
+infringement). To “grant” such a patent license to a party means to make
+such an agreement or commitment not to enforce a patent against the party.
+
+If you convey a covered work, knowingly relying on a patent license, and the
+Corresponding Source of the work is not available for anyone to copy, free of charge
+and under the terms of this License, through a publicly available network server or
+other readily accessible means, then you must either **(1)** cause the Corresponding
+Source to be so available, or **(2)** arrange to deprive yourself of the benefit of the
+patent license for this particular work, or **(3)** arrange, in a manner consistent with
+the requirements of this License, to extend the patent license to downstream
+recipients. “Knowingly relying” means you have actual knowledge that, but
+for the patent license, your conveying the covered work in a country, or your
+recipient's use of the covered work in a country, would infringe one or more
+identifiable patents in that country that you have reason to believe are valid.
+
+If, pursuant to or in connection with a single transaction or arrangement, you
+convey, or propagate by procuring conveyance of, a covered work, and grant a patent
+license to some of the parties receiving the covered work authorizing them to use,
+propagate, modify or convey a specific copy of the covered work, then the patent
+license you grant is automatically extended to all recipients of the covered work and
+works based on it.
+
+A patent license is “discriminatory” if it does not include within the
+scope of its coverage, prohibits the exercise of, or is conditioned on the
+non-exercise of one or more of the rights that are specifically granted under this
+License. You may not convey a covered work if you are a party to an arrangement with
+a third party that is in the business of distributing software, under which you make
+payment to the third party based on the extent of your activity of conveying the
+work, and under which the third party grants, to any of the parties who would receive
+the covered work from you, a discriminatory patent license **(a)** in connection with
+copies of the covered work conveyed by you (or copies made from those copies), or **(b)**
+primarily for and in connection with specific products or compilations that contain
+the covered work, unless you entered into that arrangement, or that patent license
+was granted, prior to 28 March 2007.
+
+Nothing in this License shall be construed as excluding or limiting any implied
+license or other defenses to infringement that may otherwise be available to you
+under applicable patent law.
+
+### 12. No Surrender of Others' Freedom
+
+If conditions are imposed on you (whether by court order, agreement or otherwise)
+that contradict the conditions of this License, they do not excuse you from the
+conditions of this License. If you cannot convey a covered work so as to satisfy
+simultaneously your obligations under this License and any other pertinent
+obligations, then as a consequence you may not convey it at all. For example, if you
+agree to terms that obligate you to collect a royalty for further conveying from
+those to whom you convey the Program, the only way you could satisfy both those terms
+and this License would be to refrain entirely from conveying the Program.
+
+### 13. Use with the GNU Affero General Public License
+
+Notwithstanding any other provision of this License, you have permission to link or
+combine any covered work with a work licensed under version 3 of the GNU Affero
+General Public License into a single combined work, and to convey the resulting work.
+The terms of this License will continue to apply to the part which is the covered
+work, but the special requirements of the GNU Affero General Public License, section
+13, concerning interaction through a network will apply to the combination as such.
+
+### 14. Revised Versions of this License
+
+The Free Software Foundation may publish revised and/or new versions of the GNU
+General Public License from time to time. Such new versions will be similar in spirit
+to the present version, but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program specifies that
+a certain numbered version of the GNU General Public License “or any later
+version” applies to it, you have the option of following the terms and
+conditions either of that numbered version or of any later version published by the
+Free Software Foundation. If the Program does not specify a version number of the GNU
+General Public License, you may choose any version ever published by the Free
+Software Foundation.
+
+If the Program specifies that a proxy can decide which future versions of the GNU
+General Public License can be used, that proxy's public statement of acceptance of a
+version permanently authorizes you to choose that version for the Program.
+
+Later license versions may give you additional or different permissions. However, no
+additional obligations are imposed on any author or copyright holder as a result of
+your choosing to follow a later version.
+
+### 15. Disclaimer of Warranty
+
+THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER
+EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE
+QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
+DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+### 16. Limitation of Liability
+
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY
+COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS
+PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
+INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE
+OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE
+WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+### 17. Interpretation of Sections 15 and 16
+
+If the disclaimer of warranty and limitation of liability provided above cannot be
+given local legal effect according to their terms, reviewing courts shall apply local
+law that most closely approximates an absolute waiver of all civil liability in
+connection with the Program, unless a warranty or assumption of liability accompanies
+a copy of the Program in return for a fee.
+
+_END OF TERMS AND CONDITIONS_
+
+## How to Apply These Terms to Your New Programs
+
+If you develop a new program, and you want it to be of the greatest possible use to
+the public, the best way to achieve this is to make it free software which everyone
+can redistribute and change under these terms.
+
+To do so, attach the following notices to the program. It is safest to attach them
+to the start of each source file to most effectively state the exclusion of warranty;
+and each file should have at least the “copyright” line and a pointer to
+where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program does terminal interaction, make it output a short notice like this
+when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type 'show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type 'show c' for details.
+
+The hypothetical commands `show w` and `show c` should show the appropriate parts of
+the General Public License. Of course, your program's commands might be different;
+for a GUI interface, you would use an “about box”.
+
+You should also get your employer (if you work as a programmer) or school, if any, to
+sign a “copyright disclaimer” for the program, if necessary. For more
+information on this, and how to apply and follow the GNU GPL, see
+&lt;<http://www.gnu.org/licenses/>&gt;.
+
+The GNU General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may consider it
+more useful to permit linking proprietary applications with the library. If this is
+what you want to do, use the GNU Lesser General Public License instead of this
+License. But first, please read
+&lt;<http://www.gnu.org/philosophy/why-not-lgpl.html>&gt;.
diff --git a/Makefile b/Makefile
deleted file mode 100644
index 02e3755..0000000
--- a/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-all:
-	make book
-
-book:
-	R -e 'bookdown::render_book(".")'
-
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..6ae9268
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,2 @@
+# Generated by roxygen2: do not edit by hand
+
diff --git a/README.md b/README.md
index ca88f65..f980b29 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,16 @@
-# R for Mass Spectrometry documentation
+<!-- badges: start -->
+📦 [Repo](https://github.com/js2264/R4MS) [![rworkflows](https://img.shields.io/github/actions/workflow/status/js2264/R4MS/rworkflows.yml?label=Package%20check)](https://github.com/js2264/R4MS/actions/workflows/rworkflows.yml)   
+📖 [Book](https://js2264.github.io/R4MS/) [![deployment](https://img.shields.io/github/actions/workflow/status/js2264/R4MS/pages/pages-build-deployment?label=Book%20deployment)](https://github.com/js2264/R4MS/actions/workflows/pages/pages-build-deployment)  
+🐳 [Docker](https://github.com/js2264/R4MS/pkgs/container/R4MS) [![biocbook](https://img.shields.io/github/actions/workflow/status/js2264/R4MS/biocbook.yml?label=Docker%20image)](https://github.com/js2264/R4MS/actions/workflows/biocbook.yml)  
+<!-- badges: end -->
 
-This repository provides documentation and teaching material focus on
-the analysis of mass spectrometry data for proteomics and metabolomics
-using the [R for Mass
-Spectrometry](https://www.rformassspectrometry.org/) software
-infrastructure.
+This is the [BiocBook](https://www.bioconductor.org/packages/release/bioc/html/BiocBook.html) version of the original [R for mass spectrometry book](https://rformassspectrometry.github.io/book). 
 
+Original authors of the *R for mass spectrometry* book are: 
+
+- Laurent Gatto
+- Sebastian Gibb
+- Johannes Rainer
 
 Go to http://rformassspectrometry.github.io/book to browse the
 material online.
diff --git a/_bookdown.yml b/_bookdown.yml
deleted file mode 100644
index 5d919b6..0000000
--- a/_bookdown.yml
+++ /dev/null
@@ -1,6 +0,0 @@
-book_filename: "R4MS"
-delete_merged_file: true
-output_dir: "docs"
-language:
-  ui:
-    chapter_name: "Chapter "
diff --git a/docs/404.html b/docs/404.html
deleted file mode 100644
index 236f113..0000000
--- a/docs/404.html
+++ /dev/null
@@ -1,206 +0,0 @@
-<!DOCTYPE html>
-<html lang="" xml:lang="">
-<head>
-
-<meta charset="utf-8" />
-<meta name="generator" content="pandoc" />
-<meta name="viewport" content="width=device-width, initial-scale=1" />
-<meta property="og:title" content="Page not found | R for Mass Spectrometry" />
-<meta property="og:type" content="book" />
-
-
-
-
-<meta name="author" content="Laurent Gatto, Sebastian Gibb, Johannes Rainer" />
-
-<meta name="date" content="2023-09-06" />
-
-
-<meta name="description" content="Page not found | R for Mass Spectrometry">
-
-<title>Page not found | R for Mass Spectrometry</title>
-
-<link href="libs/tufte-css-2015.12.29/tufte.css" rel="stylesheet" />
-<link href="libs/tufte-css-2015.12.29/envisioned.css" rel="stylesheet" />
-<link href="libs/msmb-css-0/msmb.css" rel="stylesheet" />
-<script>
-function toggle_visibility(id1, id2) {
-var e = document.getElementById(id1);
-var f = document.getElementById(id2);
-
-e.style.display = ((e.style.display!='none') ? 'none' : 'block');
-
-if(f.classList.contains('fa-plus-square')) {
-    f.classList.add('fa-minus-square')
-    f.classList.remove('fa-plus-square')
-} else {
-    f.classList.add('fa-plus-square')
-    f.classList.remove('fa-minus-square')
-}
-
-}
-</script>
-<script>
-function copy_link(id) {
-  var dummy = document.createElement('input'),
-  text = window.location.href.split(/[?#]/)[0] + '#' + id;
-  document.body.appendChild(dummy);
-  dummy.value = text;
-  dummy.select();
-  document.execCommand('copy');
-  document.body.removeChild(dummy);
-  
-  var tooltip = document.getElementById(id + '-tooltip');
-  tooltip.innerHTML = 'Copied!';
-}
-
-function reset_tooltip(id) {
-  var tooltip = document.getElementById(id);
-  tooltip.innerHTML = 'Copy link';
-}
-</script>
-
-
-<style type="text/css">code{white-space: pre;}</style>
-<style type="text/css">
-pre > code.sourceCode { white-space: pre; position: relative; }
-pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
-pre > code.sourceCode > span:empty { height: 1.2em; }
-
-code.sourceCode > span { color: inherit; text-decoration: inherit; }
-div.sourceCode { margin: 1em 0; }
-pre.sourceCode { margin: 0; }
-@media screen {
-div.sourceCode { overflow: auto; }
-}
-@media print {
-pre > code.sourceCode { white-space: pre-wrap; }
-pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
-}
-pre.numberSource code
-  { counter-reset: source-line 0; }
-pre.numberSource code > span
-  { position: relative; left: -4em; counter-increment: source-line; }
-pre.numberSource code > span > a:first-child::before
-  { content: counter(source-line);
-    position: relative; left: -1em; text-align: right; vertical-align: baseline;
-    border: none; display: inline-block;
-    -webkit-touch-callout: none; -webkit-user-select: none;
-    -khtml-user-select: none; -moz-user-select: none;
-    -ms-user-select: none; user-select: none;
-    padding: 0 4px; width: 4em;
-    color: #aaaaaa;
-  }
-pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
-div.sourceCode
-  {  background-color: #f8f8f8; }
-@media screen {
-pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
-}
-code span.al { color: #ef2929; } /* Alert */
-code span.an { color: #8f5902; font-weight: bold; font-style: italic; } /* Annotation */
-code span.at { color: #204a87; } /* Attribute */
-code span.bn { color: #0000cf; } /* BaseN */
-code span.cf { color: #204a87; font-weight: bold; } /* ControlFlow */
-code span.ch { color: #4e9a06; } /* Char */
-code span.cn { color: #8f5902; } /* Constant */
-code span.co { color: #8f5902; font-style: italic; } /* Comment */
-code span.cv { color: #8f5902; font-weight: bold; font-style: italic; } /* CommentVar */
-code span.do { color: #8f5902; font-weight: bold; font-style: italic; } /* Documentation */
-code span.dt { color: #204a87; } /* DataType */
-code span.dv { color: #0000cf; } /* DecVal */
-code span.er { color: #a40000; font-weight: bold; } /* Error */
-code span.ex { } /* Extension */
-code span.fl { color: #0000cf; } /* Float */
-code span.fu { color: #204a87; font-weight: bold; } /* Function */
-code span.im { } /* Import */
-code span.in { color: #8f5902; font-weight: bold; font-style: italic; } /* Information */
-code span.kw { color: #204a87; font-weight: bold; } /* Keyword */
-code span.op { color: #ce5c00; font-weight: bold; } /* Operator */
-code span.ot { color: #8f5902; } /* Other */
-code span.pp { color: #8f5902; font-style: italic; } /* Preprocessor */
-code span.sc { color: #ce5c00; font-weight: bold; } /* SpecialChar */
-code span.ss { color: #4e9a06; } /* SpecialString */
-code span.st { color: #4e9a06; } /* String */
-code span.va { color: #000000; } /* Variable */
-code span.vs { color: #4e9a06; } /* VerbatimString */
-code span.wa { color: #8f5902; font-weight: bold; font-style: italic; } /* Warning */
-</style>
-
-
-<style type="text/css">
-/* Used with Pandoc 2.11+ new --citeproc when CSL is used */
-div.csl-bib-body { }
-div.csl-entry {
-  clear: both;
-}
-.hanging div.csl-entry {
-  margin-left:2em;
-  text-indent:-2em;
-}
-div.csl-left-margin {
-  min-width:2em;
-  float:left;
-}
-div.csl-right-inline {
-  margin-left:2em;
-  padding-left:1em;
-}
-div.csl-indent {
-  margin-left: 2em;
-}
-</style>
-
-<link rel="stylesheet" href="style.css" type="text/css" />
-
-</head>
-
-<body>
-
-
-
-<div class="row">
-<div class="col-sm-12">
-<div id="TOC">
-<ul class="navbar">
-<li class="msmb"><p class="title">R for Mass Spectrometry<p><p class="author">Laurent Gatto, Sebastian Gibb, Johannes Rainer</p>
-<li class="dropdown" style="float:right">
-<a href="javascript:void(0)" class="dropbtn">&#x25BE; Chapters</a>
-<div class="dropdown-content">
-<a href="index.html" id="toc-preamble"><span class="toc-section-number">1</span> Preamble</a>
-<a href="sec-msintro.html" id="toc-sec-msintro"><span class="toc-section-number">2</span> Introduction</a>
-<a href="sec-raw.html" id="toc-sec-raw"><span class="toc-section-number">3</span> Raw MS data</a>
-<a href="sec-id.html" id="toc-sec-id"><span class="toc-section-number">4</span> Identification data</a>
-<a href="sec-quant.html" id="toc-sec-quant"><span class="toc-section-number">5</span> Quantitative data</a>
-<a href="sec-anx.html" id="toc-sec-anx"><span class="toc-section-number">6</span> Annex</a>
-<a href="sec-si.html" id="toc-sec-si"><span class="toc-section-number">7</span> Additional materials and session information</a>
-</div>
-</li>
-</ul>
-</div>
-</div>
-</div>
-<div class="row">
-<div class="col-sm-12">
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
-<html><body><div id="page-not-found" class="section level1">
-<h1>Page not found</h1>
-<p>The page you requested cannot be found (perhaps it was moved or renamed).</p>
-<p>You may want to try searching to find the page's new location, or use
-the table of contents to find the page you are looking for.</p>
-</div></body></html>
-
-<p style="text-align: center;">
-</p>
-<p class="build-date">Page built: 
-2023-09-06
- using 
-R version 4.3.1 Patched (2023-07-10 r84676)
-</p>
-</div>
-</div>
-
-
-
-</body>
-</html>
diff --git a/docs/R4MS_files/figure-html/answid1-1.png b/docs/R4MS_files/figure-html/answid1-1.png
deleted file mode 100644
index 90aea6a..0000000
Binary files a/docs/R4MS_files/figure-html/answid1-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/ex_raw-1.png b/docs/R4MS_files/figure-html/ex_raw-1.png
deleted file mode 100644
index c5c6b8e..0000000
Binary files a/docs/R4MS_files/figure-html/ex_raw-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/ex_raw-2.png b/docs/R4MS_files/figure-html/ex_raw-2.png
deleted file mode 100644
index 3aaf3a1..0000000
Binary files a/docs/R4MS_files/figure-html/ex_raw-2.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/ex_raw2-1.png b/docs/R4MS_files/figure-html/ex_raw2-1.png
deleted file mode 100644
index 036c7da..0000000
Binary files a/docs/R4MS_files/figure-html/ex_raw2-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/ex_raw2-2.png b/docs/R4MS_files/figure-html/ex_raw2-2.png
deleted file mode 100644
index 2ed9d49..0000000
Binary files a/docs/R4MS_files/figure-html/ex_raw2-2.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/featuresplot-1.png b/docs/R4MS_files/figure-html/featuresplot-1.png
deleted file mode 100644
index 8b5a86e..0000000
Binary files a/docs/R4MS_files/figure-html/featuresplot-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/idqc1-1.png b/docs/R4MS_files/figure-html/idqc1-1.png
deleted file mode 100644
index 3e58c7e..0000000
Binary files a/docs/R4MS_files/figure-html/idqc1-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/idqc2-1.png b/docs/R4MS_files/figure-html/idqc2-1.png
deleted file mode 100644
index 60350c6..0000000
Binary files a/docs/R4MS_files/figure-html/idqc2-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/imagena-1.png b/docs/R4MS_files/figure-html/imagena-1.png
deleted file mode 100644
index 2346a20..0000000
Binary files a/docs/R4MS_files/figure-html/imagena-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/miximp-1.png b/docs/R4MS_files/figure-html/miximp-1.png
deleted file mode 100644
index 8872ca9..0000000
Binary files a/docs/R4MS_files/figure-html/miximp-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/nSequencePlot-1.png b/docs/R4MS_files/figure-html/nSequencePlot-1.png
deleted file mode 100644
index 0f95474..0000000
Binary files a/docs/R4MS_files/figure-html/nSequencePlot-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/na2-1.png b/docs/R4MS_files/figure-html/na2-1.png
deleted file mode 100644
index 7b50f5c..0000000
Binary files a/docs/R4MS_files/figure-html/na2-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/naex3-1.png b/docs/R4MS_files/figure-html/naex3-1.png
deleted file mode 100644
index 90a7766..0000000
Binary files a/docs/R4MS_files/figure-html/naex3-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/nasetdist-1.png b/docs/R4MS_files/figure-html/nasetdist-1.png
deleted file mode 100644
index 604faf3..0000000
Binary files a/docs/R4MS_files/figure-html/nasetdist-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/plotdens-1.png b/docs/R4MS_files/figure-html/plotdens-1.png
deleted file mode 100644
index 10c9ae3..0000000
Binary files a/docs/R4MS_files/figure-html/plotdens-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/plotpca-1.png b/docs/R4MS_files/figure-html/plotpca-1.png
deleted file mode 100644
index eec5818..0000000
Binary files a/docs/R4MS_files/figure-html/plotpca-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/plotqf-1.png b/docs/R4MS_files/figure-html/plotqf-1.png
deleted file mode 100644
index c8deef0..0000000
Binary files a/docs/R4MS_files/figure-html/plotqf-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/plotqf2-1.png b/docs/R4MS_files/figure-html/plotqf2-1.png
deleted file mode 100644
index fb5ad34..0000000
Binary files a/docs/R4MS_files/figure-html/plotqf2-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-11-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-11-1.png
deleted file mode 100644
index efe0dc0..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-11-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-12-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-12-1.png
deleted file mode 100644
index efe0dc0..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-12-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-13-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-13-1.png
deleted file mode 100644
index 5fae6c1..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-13-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-14-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-14-1.png
deleted file mode 100644
index 5fae6c1..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-14-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-15-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-15-1.png
deleted file mode 100644
index dafc790..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-15-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-16-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-16-1.png
deleted file mode 100644
index 78c33a0..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-16-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-17-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-17-1.png
deleted file mode 100644
index 43f406a..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-17-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-18-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-18-1.png
deleted file mode 100644
index e1acd0b..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-18-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-19-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-19-1.png
deleted file mode 100644
index e1acd0b..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-19-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-20-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-20-1.png
deleted file mode 100644
index 980e04a..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-20-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-21-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-21-1.png
deleted file mode 100644
index a1580a5..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-21-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-22-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-22-1.png
deleted file mode 100644
index 08da677..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-22-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-23-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-23-1.png
deleted file mode 100644
index 42d1ce7..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-23-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-24-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-24-1.png
deleted file mode 100644
index fa90b83..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-24-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-25-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-25-1.png
deleted file mode 100644
index 2265bdc..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-25-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-41-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-41-1.png
deleted file mode 100644
index bc5231b..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-41-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-42-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-42-1.png
deleted file mode 100644
index bc5231b..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-42-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-43-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-43-1.png
deleted file mode 100644
index f8af815..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-43-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-45-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-45-1.png
deleted file mode 100644
index f8af815..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-45-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-49-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-49-1.png
deleted file mode 100644
index 0044c98..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-49-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-51-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-51-1.png
deleted file mode 100644
index 158545f..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-51-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-52-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-52-1.png
deleted file mode 100644
index 158545f..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-52-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-53-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-53-1.png
deleted file mode 100644
index 102964c..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-53-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-54-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-54-1.png
deleted file mode 100644
index 102964c..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-54-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-55-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-55-1.png
deleted file mode 100644
index bd92623..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-55-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-56-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-56-1.png
deleted file mode 100644
index bd92623..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-56-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-57-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-57-1.png
deleted file mode 100644
index 72026b9..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-57-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-57-2.png b/docs/R4MS_files/figure-html/unnamed-chunk-57-2.png
deleted file mode 100644
index 7902420..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-57-2.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-58-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-58-1.png
deleted file mode 100644
index e0707a0..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-58-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-59-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-59-1.png
deleted file mode 100644
index 06c9ac6..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-59-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-59-2.png b/docs/R4MS_files/figure-html/unnamed-chunk-59-2.png
deleted file mode 100644
index 362f25f..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-59-2.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-59-3.png b/docs/R4MS_files/figure-html/unnamed-chunk-59-3.png
deleted file mode 100644
index b2b861a..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-59-3.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-60-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-60-1.png
deleted file mode 100644
index e913f2e..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-60-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-60-2.png b/docs/R4MS_files/figure-html/unnamed-chunk-60-2.png
deleted file mode 100644
index c7fc839..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-60-2.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-60-3.png b/docs/R4MS_files/figure-html/unnamed-chunk-60-3.png
deleted file mode 100644
index b2c2e11..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-60-3.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-61-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-61-1.png
deleted file mode 100644
index e913f2e..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-61-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-61-2.png b/docs/R4MS_files/figure-html/unnamed-chunk-61-2.png
deleted file mode 100644
index 362f25f..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-61-2.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-61-3.png b/docs/R4MS_files/figure-html/unnamed-chunk-61-3.png
deleted file mode 100644
index b2b861a..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-61-3.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-62-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-62-1.png
deleted file mode 100644
index b7835ed..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-62-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-62-2.png b/docs/R4MS_files/figure-html/unnamed-chunk-62-2.png
deleted file mode 100644
index b7e8684..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-62-2.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-62-3.png b/docs/R4MS_files/figure-html/unnamed-chunk-62-3.png
deleted file mode 100644
index b2b861a..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-62-3.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-63-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-63-1.png
deleted file mode 100644
index 1ff202c..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-63-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-63-2.png b/docs/R4MS_files/figure-html/unnamed-chunk-63-2.png
deleted file mode 100644
index c55452a..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-63-2.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-64-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-64-1.png
deleted file mode 100644
index e4c4f03..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-64-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-64-2.png b/docs/R4MS_files/figure-html/unnamed-chunk-64-2.png
deleted file mode 100644
index 981908f..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-64-2.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-65-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-65-1.png
deleted file mode 100644
index e4c4f03..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-65-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-65-2.png b/docs/R4MS_files/figure-html/unnamed-chunk-65-2.png
deleted file mode 100644
index 981908f..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-65-2.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-66-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-66-1.png
deleted file mode 100644
index 3417715..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-66-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-66-2.png b/docs/R4MS_files/figure-html/unnamed-chunk-66-2.png
deleted file mode 100644
index 75099b0..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-66-2.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-67-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-67-1.png
deleted file mode 100644
index 8df003a..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-67-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-67-2.png b/docs/R4MS_files/figure-html/unnamed-chunk-67-2.png
deleted file mode 100644
index bb8705f..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-67-2.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-68-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-68-1.png
deleted file mode 100644
index 8df003a..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-68-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-68-2.png b/docs/R4MS_files/figure-html/unnamed-chunk-68-2.png
deleted file mode 100644
index a7a6f34..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-68-2.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-69-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-69-1.png
deleted file mode 100644
index f482722..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-69-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-69-2.png b/docs/R4MS_files/figure-html/unnamed-chunk-69-2.png
deleted file mode 100644
index bb8705f..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-69-2.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/unnamed-chunk-71-1.png b/docs/R4MS_files/figure-html/unnamed-chunk-71-1.png
deleted file mode 100644
index a42ffae..0000000
Binary files a/docs/R4MS_files/figure-html/unnamed-chunk-71-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/vis-1.png b/docs/R4MS_files/figure-html/vis-1.png
deleted file mode 100644
index 8b50cd4..0000000
Binary files a/docs/R4MS_files/figure-html/vis-1.png and /dev/null differ
diff --git a/docs/R4MS_files/figure-html/vp-1.png b/docs/R4MS_files/figure-html/vp-1.png
deleted file mode 100644
index 716e430..0000000
Binary files a/docs/R4MS_files/figure-html/vp-1.png and /dev/null differ
diff --git a/docs/identification-data.html b/docs/identification-data.html
deleted file mode 100644
index f6ab029..0000000
--- a/docs/identification-data.html
+++ /dev/null
@@ -1,1758 +0,0 @@
-<!DOCTYPE html>
-<html lang="" xml:lang="">
-<head>
-
-<meta charset="utf-8" />
-<meta name="generator" content="pandoc" />
-<meta name="viewport" content="width=device-width, initial-scale=1" />
-<meta property="og:title" content="Chapter 4 Identification data | R for Mass Spectrometry" />
-<meta property="og:type" content="book" />
-
-
-
-
-
-<meta name="author" content="Laurent Gatto, Sebastian Gibb, Johannes Rainer" />
-
-<meta name="date" content="2021-08-31" />
-
-
-<meta name="description" content="Chapter 4 Identification data | R for Mass Spectrometry">
-
-<title>Chapter 4 Identification data | R for Mass Spectrometry</title>
-
-<script src="libs/header-attrs-2.10/header-attrs.js"></script>
-<link href="libs/tufte-css-2015.12.29/tufte.css" rel="stylesheet" />
-<link href="libs/tufte-css-2015.12.29/envisioned.css" rel="stylesheet" />
-<link href="libs/msmb-css-0/msmb.css" rel="stylesheet" />
-<script>
-function toggle_visibility(id1, id2) {
-var e = document.getElementById(id1);
-var f = document.getElementById(id2);
-
-e.style.display = ((e.style.display!='none') ? 'none' : 'block');
-
-if(f.classList.contains('fa-plus-square')) {
-    f.classList.add('fa-minus-square')
-    f.classList.remove('fa-plus-square')
-} else {
-    f.classList.add('fa-plus-square')
-    f.classList.remove('fa-minus-square')
-}
-
-}
-</script>
-<script>
-function copy_link(id) {
-  var dummy = document.createElement('input'),
-  text = window.location.href.split(/[?#]/)[0] + '#' + id;
-  document.body.appendChild(dummy);
-  dummy.value = text;
-  dummy.select();
-  document.execCommand('copy');
-  document.body.removeChild(dummy);
-  
-  var tooltip = document.getElementById(id + '-tooltip');
-  tooltip.innerHTML = 'Copied!';
-}
-
-function reset_tooltip(id) {
-  var tooltip = document.getElementById(id);
-  tooltip.innerHTML = 'Copy link';
-}
-</script>
-
-
-<style type="text/css">code{white-space: pre;}</style>
-<style type="text/css">
-pre > code.sourceCode { white-space: pre; position: relative; }
-pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
-pre > code.sourceCode > span:empty { height: 1.2em; }
-
-code.sourceCode > span { color: inherit; text-decoration: inherit; }
-div.sourceCode { margin: 1em 0; }
-pre.sourceCode { margin: 0; }
-@media screen {
-div.sourceCode { overflow: auto; }
-}
-@media print {
-pre > code.sourceCode { white-space: pre-wrap; }
-pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
-}
-pre.numberSource code
-  { counter-reset: source-line 0; }
-pre.numberSource code > span
-  { position: relative; left: -4em; counter-increment: source-line; }
-pre.numberSource code > span > a:first-child::before
-  { content: counter(source-line);
-    position: relative; left: -1em; text-align: right; vertical-align: baseline;
-    border: none; display: inline-block;
-    -webkit-touch-callout: none; -webkit-user-select: none;
-    -khtml-user-select: none; -moz-user-select: none;
-    -ms-user-select: none; user-select: none;
-    padding: 0 4px; width: 4em;
-    color: #aaaaaa;
-  }
-pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
-div.sourceCode
-  {  background-color: #f8f8f8; }
-@media screen {
-pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
-}
-code span.al { color: #ef2929; } /* Alert */
-code span.an { color: #8f5902; font-weight: bold; font-style: italic; } /* Annotation */
-code span.at { color: #c4a000; } /* Attribute */
-code span.bn { color: #0000cf; } /* BaseN */
-code span.cf { color: #204a87; font-weight: bold; } /* ControlFlow */
-code span.ch { color: #4e9a06; } /* Char */
-code span.cn { color: #000000; } /* Constant */
-code span.co { color: #8f5902; font-style: italic; } /* Comment */
-code span.cv { color: #8f5902; font-weight: bold; font-style: italic; } /* CommentVar */
-code span.do { color: #8f5902; font-weight: bold; font-style: italic; } /* Documentation */
-code span.dt { color: #204a87; } /* DataType */
-code span.dv { color: #0000cf; } /* DecVal */
-code span.er { color: #a40000; font-weight: bold; } /* Error */
-code span.ex { } /* Extension */
-code span.fl { color: #0000cf; } /* Float */
-code span.fu { color: #000000; } /* Function */
-code span.im { } /* Import */
-code span.in { color: #8f5902; font-weight: bold; font-style: italic; } /* Information */
-code span.kw { color: #204a87; font-weight: bold; } /* Keyword */
-code span.op { color: #ce5c00; font-weight: bold; } /* Operator */
-code span.ot { color: #8f5902; } /* Other */
-code span.pp { color: #8f5902; font-style: italic; } /* Preprocessor */
-code span.sc { color: #000000; } /* SpecialChar */
-code span.ss { color: #4e9a06; } /* SpecialString */
-code span.st { color: #4e9a06; } /* String */
-code span.va { color: #000000; } /* Variable */
-code span.vs { color: #4e9a06; } /* VerbatimString */
-code span.wa { color: #8f5902; font-weight: bold; font-style: italic; } /* Warning */
-</style>
-
-
-<style type="text/css">
-/* Used with Pandoc 2.11+ new --citeproc when CSL is used */
-div.csl-bib-body { }
-div.csl-entry {
-  clear: both;
-}
-.hanging div.csl-entry {
-  margin-left:2em;
-  text-indent:-2em;
-}
-div.csl-left-margin {
-  min-width:2em;
-  float:left;
-}
-div.csl-right-inline {
-  margin-left:2em;
-  padding-left:1em;
-}
-div.csl-indent {
-  margin-left: 2em;
-}
-</style>
-
-<link rel="stylesheet" href="style.css" type="text/css" />
-
-</head>
-
-<body>
-
-
-
-<div class="row">
-<div class="col-sm-12">
-<div id="TOC">
-<ul class="navbar">
-<li class="msmb"><p class="title">R for Mass Spectrometry<p><p class="author">Laurent Gatto, Sebastian Gibb, Johannes Rainer</p>
-<li class="dropdown" style="float:right">
-<a href="javascript:void(0)" class="dropbtn">&#x25BE; Chapters</a>
-<div class="dropdown-content">
-<a href="index.html"><span class="toc-section-number">1</span> Preamble</a>
-<a href="sec:msintro.html"><span class="toc-section-number">2</span> Introduction</a>
-<a href="raw-ms-data.html"><span class="toc-section-number">3</span> Raw MS data</a>
-<a id="active-page" href="identification-data.html"><span class="toc-section-number">4</span> Identification data</a><ul class="toc-sections">
-<li class="toc"><a href="#NA"> Identification data.frame</a></li>
-<li class="toc"><a href="#keeping-all-matches"> Keeping all matches</a></li>
-<li class="toc"><a href="#filtering-data"> Filtering data</a></li>
-<li class="toc"><a href="#low-level-access-to-id-data-optional"> Low level access to id data (optional)</a></li>
-<li class="toc"><a href="#msms-database-search"> MS/MS database search</a></li>
-<li class="toc"><a href="#adding-identification-data-to-raw-data"> Adding identification data to raw data</a></li>
-<li class="toc"><a href="#visualising-peptide-spectrum-matches"> Visualising peptide-spectrum matches</a></li>
-<li class="toc"><a href="#comparing-spectra"> Comparing spectra</a></li>
-<li class="toc"><a href="#summary-exercice"> Summary exercice</a></li>
-<li class="toc"><a href="#exploration-and-assessment-of-identifications-using-msnid"> Exploration and Assessment of Identifications using <code>MSnID</code></a></li>
-</ul>
-<a href="sec:quant.html"><span class="toc-section-number">5</span> Quantitative data</a>
-<a href="sec:si.html"><span class="toc-section-number">6</span> References and session information</a>
-</div>
-</li>
-</ul>
-</div>
-</div>
-</div>
-<div class="row">
-<div class="col-sm-12">
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
-<html><body>
-<div id="identification-data" class="section level1" number="4">
-<h1>
-<span class="header-section-number">Chapter 4</span> Identification data</h1>
-<div id="identification-data.frame" class="section level2" number="4.1">
-<h2>
-<span class="header-section-number">4.1</span> Identification data.frame<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('identification-data.frame')" onmouseout="reset_tooltip('identification-data.frame-tooltip')"><span class="tooltiptext" id="identification-data.frame-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>Let’s use the identification from from <code>msdata</code>:</p>
-<div class="sourceCode" id="cb106"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb106-1"><a href="identification-data.html#cb106-1" aria-hidden="true" tabindex="-1"></a>idf <span class="ot">&lt;-</span> msdata<span class="sc">::</span><span class="fu">ident</span>(<span class="at">full.names =</span> <span class="cn">TRUE</span>)</span>
-<span id="cb106-2"><a href="identification-data.html#cb106-2" aria-hidden="true" tabindex="-1"></a><span class="fu">basename</span>(idf)</span></code></pre></div>
-<pre><code>## [1] "TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzid"</code></pre>
-<p>The easiest way to read identification data in <code>mzIdentML</code> (often
-abbreviated with <code>mzid</code>) into R is to read it with <code>readPSMs()</code>
-function from the <a href="https://rformassspectrometry.github.io/PSM/"><code>PSM</code></a>
-package. The function will parse the file and return a <code>DataFrame</code>.</p>
-<div class="sourceCode" id="cb108"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb108-1"><a href="identification-data.html#cb108-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(PSM)</span>
-<span id="cb108-2"><a href="identification-data.html#cb108-2" aria-hidden="true" tabindex="-1"></a>id <span class="ot">&lt;-</span> <span class="fu">readPSMs</span>(idf)</span>
-<span id="cb108-3"><a href="identification-data.html#cb108-3" aria-hidden="true" tabindex="-1"></a><span class="fu">dim</span>(id)</span></code></pre></div>
-<pre><code>## [1] 5802   35</code></pre>
-<div class="sourceCode" id="cb110"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb110-1"><a href="identification-data.html#cb110-1" aria-hidden="true" tabindex="-1"></a><span class="fu">names</span>(id)</span></code></pre></div>
-<pre><code>##  [1] "sequence"                 "spectrumID"              
-##  [3] "chargeState"              "rank"                    
-##  [5] "passThreshold"            "experimentalMassToCharge"
-##  [7] "calculatedMassToCharge"   "peptideRef"              
-##  [9] "modNum"                   "isDecoy"                 
-## [11] "post"                     "pre"                     
-## [13] "start"                    "end"                     
-## [15] "DatabaseAccess"           "DBseqLength"             
-## [17] "DatabaseSeq"              "DatabaseDescription"     
-## [19] "scan.number.s."           "acquisitionNum"          
-## [21] "spectrumFile"             "idFile"                  
-## [23] "MS.GF.RawScore"           "MS.GF.DeNovoScore"       
-## [25] "MS.GF.SpecEValue"         "MS.GF.EValue"            
-## [27] "MS.GF.QValue"             "MS.GF.PepQValue"         
-## [29] "modPeptideRef"            "modName"                 
-## [31] "modMass"                  "modLocation"             
-## [33] "subOriginalResidue"       "subReplacementResidue"   
-## [35] "subLocation"</code></pre>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Verify that this table contains 5802 matches for 5343
-scans and 4938 peptides sequences.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-11" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-11', 'sol-start-11')"></span>
-</p>
-<div id="sol-body-11" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb112"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb112-1"><a href="identification-data.html#cb112-1" aria-hidden="true" tabindex="-1"></a><span class="fu">nrow</span>(id) <span class="do">## number of matches</span></span></code></pre></div>
-<pre><code>## [1] 5802</code></pre>
-<div class="sourceCode" id="cb114"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb114-1"><a href="identification-data.html#cb114-1" aria-hidden="true" tabindex="-1"></a><span class="fu">length</span>(<span class="fu">unique</span>(id<span class="sc">$</span>spectrumID)) <span class="do">## number of scans</span></span></code></pre></div>
-<pre><code>## [1] 5343</code></pre>
-<div class="sourceCode" id="cb116"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb116-1"><a href="identification-data.html#cb116-1" aria-hidden="true" tabindex="-1"></a><span class="fu">length</span>(<span class="fu">unique</span>(id<span class="sc">$</span>sequence))   <span class="do">## number of peptide sequences</span></span></code></pre></div>
-<pre><code>## [1] 4938</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<p>The PSM data are read as is, without and filtering. As we can see
-below, we still have all the hits from the forward and reverse (decoy)
-databases.</p>
-<div class="sourceCode" id="cb118"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb118-1"><a href="identification-data.html#cb118-1" aria-hidden="true" tabindex="-1"></a><span class="fu">table</span>(id<span class="sc">$</span>isDecoy)</span></code></pre></div>
-<pre><code>## 
-## FALSE  TRUE 
-##  2906  2896</code></pre>
-</div>
-<div id="keeping-all-matches" class="section level2" number="4.2">
-<h2>
-<span class="header-section-number">4.2</span> Keeping all matches<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('keeping-all-matches')" onmouseout="reset_tooltip('keeping-all-matches-tooltip')"><span class="tooltiptext" id="keeping-all-matches-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>The data contains also contains multiple matches for several
-spectra. The table below shows the number of number of spectra that
-have 1, 2, … up to 5 matches.</p>
-<div class="sourceCode" id="cb120"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb120-1"><a href="identification-data.html#cb120-1" aria-hidden="true" tabindex="-1"></a><span class="fu">table</span>(<span class="fu">table</span>(id<span class="sc">$</span>spectrumID))</span></code></pre></div>
-<pre><code>## 
-##    1    2    3    4    5 
-## 4936  369   26   10    2</code></pre>
-<p>Below, we can see how scan 1774 has 4 matches, all to sequence
-<code>RTRYQAEVR</code>, which itself matches to 4 different proteins:</p>
-<div class="sourceCode" id="cb122"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb122-1"><a href="identification-data.html#cb122-1" aria-hidden="true" tabindex="-1"></a>i <span class="ot">&lt;-</span> <span class="fu">which</span>(id<span class="sc">$</span>spectrumID <span class="sc">==</span> <span class="st">"controllerType=0 controllerNumber=1 scan=1774"</span>)</span>
-<span id="cb122-2"><a href="identification-data.html#cb122-2" aria-hidden="true" tabindex="-1"></a>id[i, <span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>]</span></code></pre></div>
-<pre><code>## PSM with 4 rows and 5 columns.
-## names(5): sequence spectrumID ... rank passThreshold</code></pre>
-<p>If the goal is to keep all the matches, but arranged by scan/spectrum,
-one can <em>reduce</em> the <code>DataFrame</code> object by the <code>spectrumID</code> variable,
-so that each scan correponds to a single row that still stores all
-values<label for="tufte-sn-5" class="margin-toggle sidenote-number">5</label><input type="checkbox" id="tufte-sn-5" class="margin-toggle"><span class="sidenote"><span class="sidenote-number">5</span> The rownames aren’t needed here are are removed to reduce
-to output in the the next code chunk display parts of <code>id2</code>.</span>:</p>
-<div class="sourceCode" id="cb124"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb124-1"><a href="identification-data.html#cb124-1" aria-hidden="true" tabindex="-1"></a>id2 <span class="ot">&lt;-</span> QFeatures<span class="sc">::</span><span class="fu">reduceDataFrame</span>(id, id<span class="sc">$</span>spectrumID)</span></code></pre></div>
-<pre><code>## Warning: The dim() method for DataFrameList objects is deprecated. Please use
-##   dims() on these objects instead.</code></pre>
-<pre><code>## Warning: The nrow() method for DataFrameList objects is deprecated. Please use
-##   nrows() on these objects instead.</code></pre>
-<pre><code>## Warning: The ncol() method for CompressedSplitDataFrameList objects is
-##   deprecated. Please use ncols() on these objects instead.</code></pre>
-<div class="sourceCode" id="cb128"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb128-1"><a href="identification-data.html#cb128-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rownames</span>(id2) <span class="ot">&lt;-</span> <span class="cn">NULL</span> <span class="do">## rownames not needed here</span></span>
-<span id="cb128-2"><a href="identification-data.html#cb128-2" aria-hidden="true" tabindex="-1"></a><span class="fu">dim</span>(id2)</span></code></pre></div>
-<pre><code>## [1] 5343   35</code></pre>
-<p>The resulting object contains a single entrie for scan 1774 with
-information for the multiple matches stored as lists within the cells.</p>
-<div class="sourceCode" id="cb130"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb130-1"><a href="identification-data.html#cb130-1" aria-hidden="true" tabindex="-1"></a>j <span class="ot">&lt;-</span> <span class="fu">which</span>(id2<span class="sc">$</span>spectrumID <span class="sc">==</span> <span class="st">"controllerType=0 controllerNumber=1 scan=1774"</span>)</span>
-<span id="cb130-2"><a href="identification-data.html#cb130-2" aria-hidden="true" tabindex="-1"></a>id2[j, ]</span></code></pre></div>
-<pre><code>## DataFrame with 1 row and 35 columns
-##                            sequence    spectrumID chargeState          rank
-##                     &lt;CharacterList&gt;   &lt;character&gt;   &lt;integer&gt; &lt;IntegerList&gt;
-## 1 RTRYQAEVR,RTRYQAEVR,RTRYQAEVR,... controller...           2     1,1,1,...
-##   passThreshold experimentalMassToCharge      calculatedMassToCharge
-##       &lt;logical&gt;                &lt;numeric&gt;               &lt;NumericList&gt;
-## 1          TRUE                  589.821 589.823,589.823,589.823,...
-##                    peptideRef        modNum               isDecoy
-##               &lt;CharacterList&gt; &lt;IntegerList&gt;         &lt;LogicalList&gt;
-## 1 Pep1890,Pep1890,Pep1890,...     0,0,0,... FALSE,FALSE,FALSE,...
-##              post             pre         start           end
-##   &lt;CharacterList&gt; &lt;CharacterList&gt; &lt;IntegerList&gt; &lt;IntegerList&gt;
-## 1       P,P,P,...       R,R,R,...  89,99,89,... 97,107,97,...
-##                DatabaseAccess     DBseqLength DatabaseSeq
-##               &lt;CharacterList&gt;   &lt;IntegerList&gt; &lt;character&gt;
-## 1 ECA2104,ECA2867,ECA3427,... 675,619,678,...            
-##                             DatabaseDescription scan.number.s. acquisitionNum
-##                                 &lt;CharacterList&gt;      &lt;numeric&gt;      &lt;numeric&gt;
-## 1 ECA2104 Vg...,ECA2867 pu...,ECA3427 co...,...           1774           1774
-##    spectrumFile        idFile MS.GF.RawScore MS.GF.DeNovoScore MS.GF.SpecEValue
-##     &lt;character&gt;   &lt;character&gt;      &lt;numeric&gt;         &lt;numeric&gt;        &lt;numeric&gt;
-## 1 TMT_Erwini... TMT_Erwini...              0                96      3.69254e-06
-##                  MS.GF.EValue MS.GF.QValue                MS.GF.PepQValue
-##                 &lt;NumericList&gt;    &lt;numeric&gt;                  &lt;NumericList&gt;
-## 1 10.5388,10.5388,10.5388,...            1 0.990816,0.990816,0.990816,...
-##     modPeptideRef         modName       modMass   modLocation
-##   &lt;CharacterList&gt; &lt;CharacterList&gt; &lt;NumericList&gt; &lt;IntegerList&gt;
-## 1    NA,NA,NA,...    NA,NA,NA,...  NA,NA,NA,...  NA,NA,NA,...
-##   subOriginalResidue subReplacementResidue subLocation
-##          &lt;character&gt;           &lt;character&gt;   &lt;integer&gt;
-## 1                 NA                    NA          NA</code></pre>
-<div class="sourceCode" id="cb132"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb132-1"><a href="identification-data.html#cb132-1" aria-hidden="true" tabindex="-1"></a>id2[j, <span class="st">"DatabaseAccess"</span>]</span></code></pre></div>
-<pre><code>## CharacterList of length 1
-## [["controllerType=0 controllerNumber=1 scan=1774"]] ECA2104 ECA2867 ECA3427 ECA4142</code></pre>
-<p>The is the type of complete identification table that could be used to
-annotate an raw mass spectrometry <code>Spectra</code> object, as shown below.</p>
-</div>
-<div id="filtering-data" class="section level2" number="4.3">
-<h2>
-<span class="header-section-number">4.3</span> Filtering data<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('filtering-data')" onmouseout="reset_tooltip('filtering-data-tooltip')"><span class="tooltiptext" id="filtering-data-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>Often, the PSM data is filtered to only retain reliable matches. The
-<code>MSnID</code> package can be used to set thresholds to attain user-defined
-PSM, peptide or protein-level FDRs. Here, we will simply filter out
-wrong identification manually.</p>
-<p>Here, the <code>filter()</code> from the <code>dplyr</code> package comes very handy. We
-will thus start by convering the <code>DataFrame</code> to a <code>tibble</code>.</p>
-<div class="sourceCode" id="cb134"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb134-1"><a href="identification-data.html#cb134-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"dplyr"</span>)</span>
-<span id="cb134-2"><a href="identification-data.html#cb134-2" aria-hidden="true" tabindex="-1"></a>id_tbl <span class="ot">&lt;-</span> tidyr<span class="sc">::</span><span class="fu">as_tibble</span>(id)</span>
-<span id="cb134-3"><a href="identification-data.html#cb134-3" aria-hidden="true" tabindex="-1"></a>id_tbl</span></code></pre></div>
-<pre><code>## # A tibble: 5,802 × 35
-##    sequence     spectrumID     chargeState  rank passThreshold experimentalMass…
-##    &lt;chr&gt;        &lt;chr&gt;                &lt;int&gt; &lt;int&gt; &lt;lgl&gt;                     &lt;dbl&gt;
-##  1 RQCRTDFLNYLR controllerTyp…           3     1 TRUE                       548.
-##  2 ESVALADQVTC… controllerTyp…           2     1 TRUE                      1288.
-##  3 KELLCLAMQIIR controllerTyp…           2     1 TRUE                       744.
-##  4 QRMARTSDKQQ… controllerTyp…           3     1 TRUE                       913.
-##  5 KDEGSTEPLKV… controllerTyp…           3     1 TRUE                       927.
-##  6 DGGPAIYGHER… controllerTyp…           3     1 TRUE                       969.
-##  7 QRMARTSDKQQ… controllerTyp…           2     1 TRUE                      1369.
-##  8 CIDRARHVEVQ… controllerTyp…           3     1 TRUE                      1285.
-##  9 CIDRARHVEVQ… controllerTyp…           3     1 TRUE                      1285.
-## 10 VGRCRPIINYL… controllerTyp…           2     1 TRUE                      1102.
-## # … with 5,792 more rows, and 29 more variables: calculatedMassToCharge &lt;dbl&gt;,
-## #   peptideRef &lt;chr&gt;, modNum &lt;int&gt;, isDecoy &lt;lgl&gt;, post &lt;chr&gt;, pre &lt;chr&gt;,
-## #   start &lt;int&gt;, end &lt;int&gt;, DatabaseAccess &lt;chr&gt;, DBseqLength &lt;int&gt;,
-## #   DatabaseSeq &lt;chr&gt;, DatabaseDescription &lt;chr&gt;, scan.number.s. &lt;dbl&gt;,
-## #   acquisitionNum &lt;dbl&gt;, spectrumFile &lt;chr&gt;, idFile &lt;chr&gt;,
-## #   MS.GF.RawScore &lt;dbl&gt;, MS.GF.DeNovoScore &lt;dbl&gt;, MS.GF.SpecEValue &lt;dbl&gt;,
-## #   MS.GF.EValue &lt;dbl&gt;, MS.GF.QValue &lt;dbl&gt;, MS.GF.PepQValue &lt;dbl&gt;, …</code></pre>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ul>
-<li>Remove decoy hits</li>
-</ul>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-12" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-12', 'sol-start-12')"></span>
-</p>
-<div id="sol-body-12" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb136"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb136-1"><a href="identification-data.html#cb136-1" aria-hidden="true" tabindex="-1"></a>id_tbl <span class="ot">&lt;-</span> id_tbl <span class="sc">%&gt;%</span></span>
-<span id="cb136-2"><a href="identification-data.html#cb136-2" aria-hidden="true" tabindex="-1"></a>    <span class="fu">filter</span>(<span class="sc">!</span>isDecoy)</span>
-<span id="cb136-3"><a href="identification-data.html#cb136-3" aria-hidden="true" tabindex="-1"></a>id_tbl</span></code></pre></div>
-<pre><code>## # A tibble: 2,906 × 35
-##    sequence     spectrumID     chargeState  rank passThreshold experimentalMass…
-##    &lt;chr&gt;        &lt;chr&gt;                &lt;int&gt; &lt;int&gt; &lt;lgl&gt;                     &lt;dbl&gt;
-##  1 RQCRTDFLNYLR controllerTyp…           3     1 TRUE                       548.
-##  2 ESVALADQVTC… controllerTyp…           2     1 TRUE                      1288.
-##  3 QRMARTSDKQQ… controllerTyp…           3     1 TRUE                       913.
-##  4 DGGPAIYGHER… controllerTyp…           3     1 TRUE                       969.
-##  5 QRMARTSDKQQ… controllerTyp…           2     1 TRUE                      1369.
-##  6 CIDRARHVEVQ… controllerTyp…           3     1 TRUE                      1285.
-##  7 CIDRARHVEVQ… controllerTyp…           3     1 TRUE                      1285.
-##  8 VGRCRPIINYL… controllerTyp…           2     1 TRUE                      1102.
-##  9 QRLDEHCVGVG… controllerTyp…           3     1 TRUE                       713.
-## 10 VDYQGKKVVII… controllerTyp…           4     1 TRUE                       870.
-## # … with 2,896 more rows, and 29 more variables: calculatedMassToCharge &lt;dbl&gt;,
-## #   peptideRef &lt;chr&gt;, modNum &lt;int&gt;, isDecoy &lt;lgl&gt;, post &lt;chr&gt;, pre &lt;chr&gt;,
-## #   start &lt;int&gt;, end &lt;int&gt;, DatabaseAccess &lt;chr&gt;, DBseqLength &lt;int&gt;,
-## #   DatabaseSeq &lt;chr&gt;, DatabaseDescription &lt;chr&gt;, scan.number.s. &lt;dbl&gt;,
-## #   acquisitionNum &lt;dbl&gt;, spectrumFile &lt;chr&gt;, idFile &lt;chr&gt;,
-## #   MS.GF.RawScore &lt;dbl&gt;, MS.GF.DeNovoScore &lt;dbl&gt;, MS.GF.SpecEValue &lt;dbl&gt;,
-## #   MS.GF.EValue &lt;dbl&gt;, MS.GF.QValue &lt;dbl&gt;, MS.GF.PepQValue &lt;dbl&gt;, …</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ul>
-<li>Keep first rank matches</li>
-</ul>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-13" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-13', 'sol-start-13')"></span>
-</p>
-<div id="sol-body-13" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb138"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb138-1"><a href="identification-data.html#cb138-1" aria-hidden="true" tabindex="-1"></a>id_tbl <span class="ot">&lt;-</span> id_tbl <span class="sc">%&gt;%</span></span>
-<span id="cb138-2"><a href="identification-data.html#cb138-2" aria-hidden="true" tabindex="-1"></a>    <span class="fu">filter</span>(rank <span class="sc">==</span> <span class="dv">1</span>)</span>
-<span id="cb138-3"><a href="identification-data.html#cb138-3" aria-hidden="true" tabindex="-1"></a>id_tbl</span></code></pre></div>
-<pre><code>## # A tibble: 2,751 × 35
-##    sequence     spectrumID     chargeState  rank passThreshold experimentalMass…
-##    &lt;chr&gt;        &lt;chr&gt;                &lt;int&gt; &lt;int&gt; &lt;lgl&gt;                     &lt;dbl&gt;
-##  1 RQCRTDFLNYLR controllerTyp…           3     1 TRUE                       548.
-##  2 ESVALADQVTC… controllerTyp…           2     1 TRUE                      1288.
-##  3 QRMARTSDKQQ… controllerTyp…           3     1 TRUE                       913.
-##  4 DGGPAIYGHER… controllerTyp…           3     1 TRUE                       969.
-##  5 QRMARTSDKQQ… controllerTyp…           2     1 TRUE                      1369.
-##  6 CIDRARHVEVQ… controllerTyp…           3     1 TRUE                      1285.
-##  7 CIDRARHVEVQ… controllerTyp…           3     1 TRUE                      1285.
-##  8 VGRCRPIINYL… controllerTyp…           2     1 TRUE                      1102.
-##  9 QRLDEHCVGVG… controllerTyp…           3     1 TRUE                       713.
-## 10 VDYQGKKVVII… controllerTyp…           4     1 TRUE                       870.
-## # … with 2,741 more rows, and 29 more variables: calculatedMassToCharge &lt;dbl&gt;,
-## #   peptideRef &lt;chr&gt;, modNum &lt;int&gt;, isDecoy &lt;lgl&gt;, post &lt;chr&gt;, pre &lt;chr&gt;,
-## #   start &lt;int&gt;, end &lt;int&gt;, DatabaseAccess &lt;chr&gt;, DBseqLength &lt;int&gt;,
-## #   DatabaseSeq &lt;chr&gt;, DatabaseDescription &lt;chr&gt;, scan.number.s. &lt;dbl&gt;,
-## #   acquisitionNum &lt;dbl&gt;, spectrumFile &lt;chr&gt;, idFile &lt;chr&gt;,
-## #   MS.GF.RawScore &lt;dbl&gt;, MS.GF.DeNovoScore &lt;dbl&gt;, MS.GF.SpecEValue &lt;dbl&gt;,
-## #   MS.GF.EValue &lt;dbl&gt;, MS.GF.QValue &lt;dbl&gt;, MS.GF.PepQValue &lt;dbl&gt;, …</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ul>
-<li>Remove non-proteotypic peptides. Start by identifying scans that
-match different proteins. For example scan 4884 matches proteins
-<code>XXX_ECA3406</code> and <code>ECA3415</code>. Scan 4099 match <code>XXX_ECA4416_1</code>,
-<code>XXX_ECA4416_2</code> and <code>XXX_ECA4416_3</code>. Then remove the scans that
-match any of these proteins.</li>
-</ul>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-14" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-14', 'sol-start-14')"></span>
-</p>
-<div id="sol-body-14" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb140"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb140-1"><a href="identification-data.html#cb140-1" aria-hidden="true" tabindex="-1"></a>mltm <span class="ot">&lt;-</span></span>
-<span id="cb140-2"><a href="identification-data.html#cb140-2" aria-hidden="true" tabindex="-1"></a>    id_tbl <span class="sc">%&gt;%</span></span>
-<span id="cb140-3"><a href="identification-data.html#cb140-3" aria-hidden="true" tabindex="-1"></a>    <span class="fu">group_by</span>(spectrumID) <span class="sc">%&gt;%</span></span>
-<span id="cb140-4"><a href="identification-data.html#cb140-4" aria-hidden="true" tabindex="-1"></a>    <span class="fu">mutate</span>(<span class="at">nProts =</span> <span class="fu">length</span>(<span class="fu">unique</span>(DatabaseAccess))) <span class="sc">%&gt;%</span></span>
-<span id="cb140-5"><a href="identification-data.html#cb140-5" aria-hidden="true" tabindex="-1"></a>    <span class="fu">filter</span>(nProts <span class="sc">&gt;</span> <span class="dv">1</span>) <span class="sc">%&gt;%</span></span>
-<span id="cb140-6"><a href="identification-data.html#cb140-6" aria-hidden="true" tabindex="-1"></a>    <span class="fu">select</span>(DatabaseAccess, nProts)</span></code></pre></div>
-<pre><code>## Adding missing grouping variables: `spectrumID`</code></pre>
-<div class="sourceCode" id="cb142"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb142-1"><a href="identification-data.html#cb142-1" aria-hidden="true" tabindex="-1"></a>mltm</span></code></pre></div>
-<pre><code>## # A tibble: 85 × 3
-## # Groups:   spectrumID [39]
-##    spectrumID                                    DatabaseAccess nProts
-##    &lt;chr&gt;                                         &lt;chr&gt;           &lt;int&gt;
-##  1 controllerType=0 controllerNumber=1 scan=1073 ECA2869             2
-##  2 controllerType=0 controllerNumber=1 scan=1073 ECA4278             2
-##  3 controllerType=0 controllerNumber=1 scan=6578 ECA3480             2
-##  4 controllerType=0 controllerNumber=1 scan=6578 ECA3481             2
-##  5 controllerType=0 controllerNumber=1 scan=5617 ECA4283             2
-##  6 controllerType=0 controllerNumber=1 scan=5617 ECA4292             2
-##  7 controllerType=0 controllerNumber=1 scan=3926 ECA0216             2
-##  8 controllerType=0 controllerNumber=1 scan=3926 ECA4035             2
-##  9 controllerType=0 controllerNumber=1 scan=4784 ECA0216             2
-## 10 controllerType=0 controllerNumber=1 scan=4784 ECA4035             2
-## # … with 75 more rows</code></pre>
-<div class="sourceCode" id="cb144"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb144-1"><a href="identification-data.html#cb144-1" aria-hidden="true" tabindex="-1"></a>id_tbl <span class="ot">&lt;-</span></span>
-<span id="cb144-2"><a href="identification-data.html#cb144-2" aria-hidden="true" tabindex="-1"></a>    id_tbl <span class="sc">%&gt;%</span></span>
-<span id="cb144-3"><a href="identification-data.html#cb144-3" aria-hidden="true" tabindex="-1"></a>    <span class="fu">filter</span>(<span class="sc">!</span>spectrumID <span class="sc">%in%</span> mltm<span class="sc">$</span>spectrumID)</span>
-<span id="cb144-4"><a href="identification-data.html#cb144-4" aria-hidden="true" tabindex="-1"></a>id_tbl</span></code></pre></div>
-<pre><code>## # A tibble: 2,666 × 35
-##    sequence     spectrumID     chargeState  rank passThreshold experimentalMass…
-##    &lt;chr&gt;        &lt;chr&gt;                &lt;int&gt; &lt;int&gt; &lt;lgl&gt;                     &lt;dbl&gt;
-##  1 RQCRTDFLNYLR controllerTyp…           3     1 TRUE                       548.
-##  2 ESVALADQVTC… controllerTyp…           2     1 TRUE                      1288.
-##  3 QRMARTSDKQQ… controllerTyp…           3     1 TRUE                       913.
-##  4 DGGPAIYGHER… controllerTyp…           3     1 TRUE                       969.
-##  5 QRMARTSDKQQ… controllerTyp…           2     1 TRUE                      1369.
-##  6 CIDRARHVEVQ… controllerTyp…           3     1 TRUE                      1285.
-##  7 CIDRARHVEVQ… controllerTyp…           3     1 TRUE                      1285.
-##  8 VGRCRPIINYL… controllerTyp…           2     1 TRUE                      1102.
-##  9 QRLDEHCVGVG… controllerTyp…           3     1 TRUE                       713.
-## 10 VDYQGKKVVII… controllerTyp…           4     1 TRUE                       870.
-## # … with 2,656 more rows, and 29 more variables: calculatedMassToCharge &lt;dbl&gt;,
-## #   peptideRef &lt;chr&gt;, modNum &lt;int&gt;, isDecoy &lt;lgl&gt;, post &lt;chr&gt;, pre &lt;chr&gt;,
-## #   start &lt;int&gt;, end &lt;int&gt;, DatabaseAccess &lt;chr&gt;, DBseqLength &lt;int&gt;,
-## #   DatabaseSeq &lt;chr&gt;, DatabaseDescription &lt;chr&gt;, scan.number.s. &lt;dbl&gt;,
-## #   acquisitionNum &lt;dbl&gt;, spectrumFile &lt;chr&gt;, idFile &lt;chr&gt;,
-## #   MS.GF.RawScore &lt;dbl&gt;, MS.GF.DeNovoScore &lt;dbl&gt;, MS.GF.SpecEValue &lt;dbl&gt;,
-## #   MS.GF.EValue &lt;dbl&gt;, MS.GF.QValue &lt;dbl&gt;, MS.GF.PepQValue &lt;dbl&gt;, …</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<p>Which leaves us with 2666 PSMs.</p>
-<p>This can also be achieved with the <code>filterPSMs()</code> function:</p>
-<div class="sourceCode" id="cb146"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb146-1"><a href="identification-data.html#cb146-1" aria-hidden="true" tabindex="-1"></a>id_filtered <span class="ot">&lt;-</span> <span class="fu">filterPSMs</span>(id)</span></code></pre></div>
-<pre><code>## Starting with 5802 PSMs:</code></pre>
-<pre><code>##  removed 2896 decoy hits</code></pre>
-<pre><code>##  removed 155 PSMs with rank &gt; 1</code></pre>
-<pre><code>##  removed 85 non-proteotypic peptides</code></pre>
-<pre><code>## 2666 PSMs left.</code></pre>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Compare the distribution of raw idenfication scores of the decoy and
-non-decoy hits. Interpret the figure.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-15" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-15', 'sol-start-15')"></span>
-</p>
-<div id="sol-body-15" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb152"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb152-1"><a href="identification-data.html#cb152-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(ggplot2)</span>
-<span id="cb152-2"><a href="identification-data.html#cb152-2" aria-hidden="true" tabindex="-1"></a><span class="fu">as_tibble</span>(id) <span class="sc">%&gt;%</span></span>
-<span id="cb152-3"><a href="identification-data.html#cb152-3" aria-hidden="true" tabindex="-1"></a>    <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> MS.GF.RawScore,</span>
-<span id="cb152-4"><a href="identification-data.html#cb152-4" aria-hidden="true" tabindex="-1"></a>               <span class="at">colour =</span> isDecoy)) <span class="sc">+</span></span>
-<span id="cb152-5"><a href="identification-data.html#cb152-5" aria-hidden="true" tabindex="-1"></a>    <span class="fu">geom_density</span>()</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-43-1.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>The <em><a href="https://CRAN.R-project.org/package=tidyverse">tidyverse</a></em>
-tools are fit for data wrangling with identification data. Using the
-above identification dataframe, calculate the length of each peptide
-(you can use <code>nchar</code> with the peptide sequence <code>sequence</code>) and the
-number of peptides for each protein (defined as
-<code>DatabaseDescription</code>). Plot the length of the proteins against their
-respective number of peptides.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-16" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-16', 'sol-start-16')"></span>
-</p>
-<div id="sol-body-16" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb153"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb153-1"><a href="identification-data.html#cb153-1" aria-hidden="true" tabindex="-1"></a><span class="fu">suppressPackageStartupMessages</span>(<span class="fu">library</span>(<span class="st">"dplyr"</span>))</span>
-<span id="cb153-2"><a href="identification-data.html#cb153-2" aria-hidden="true" tabindex="-1"></a>iddf <span class="ot">&lt;-</span> <span class="fu">as_tibble</span>(id_filtered) <span class="sc">%&gt;%</span></span>
-<span id="cb153-3"><a href="identification-data.html#cb153-3" aria-hidden="true" tabindex="-1"></a>    <span class="fu">mutate</span>(<span class="at">peplen =</span> <span class="fu">nchar</span>(sequence))</span>
-<span id="cb153-4"><a href="identification-data.html#cb153-4" aria-hidden="true" tabindex="-1"></a>npeps <span class="ot">&lt;-</span> iddf <span class="sc">%&gt;%</span></span>
-<span id="cb153-5"><a href="identification-data.html#cb153-5" aria-hidden="true" tabindex="-1"></a>    <span class="fu">group_by</span>(DatabaseAccess) <span class="sc">%&gt;%</span></span>
-<span id="cb153-6"><a href="identification-data.html#cb153-6" aria-hidden="true" tabindex="-1"></a>    tally</span>
-<span id="cb153-7"><a href="identification-data.html#cb153-7" aria-hidden="true" tabindex="-1"></a>iddf <span class="ot">&lt;-</span> <span class="fu">full_join</span>(iddf, npeps)</span></code></pre></div>
-<pre><code>## Joining, by = "DatabaseAccess"</code></pre>
-<div class="sourceCode" id="cb155"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb155-1"><a href="identification-data.html#cb155-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"ggplot2"</span>)</span>
-<span id="cb155-2"><a href="identification-data.html#cb155-2" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(iddf, <span class="fu">aes</span>(<span class="at">x =</span> n, <span class="at">y =</span> DBseqLength)) <span class="sc">+</span> <span class="fu">geom_point</span>()</span></code></pre></div>
-<div class="figure">
-<span style="display:block;" id="fig:answid1"></span>
-<p class="caption marginnote shownote">
-Figure 4.1: Identifcation data wrangling.
-</p>
-<img src="R4MS_files/figure-html/answid1-1.png" alt="Identifcation data wrangling." width="672">
-</div>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-</div>
-<div id="low-level-access-to-id-data-optional" class="section level2" number="4.4">
-<h2>
-<span class="header-section-number">4.4</span> Low level access to id data (optional)<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('low-level-access-to-id-data-optional')" onmouseout="reset_tooltip('low-level-access-to-id-data-optional-tooltip')"><span class="tooltiptext" id="low-level-access-to-id-data-optional-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>There are two packages that can be used to parse <code>mzIdentML</code> files,
-namely <code>mzR</code> (that we have already used for raw data) and <code>mzID</code>. The
-major difference is that the former leverages C++ code from
-<code>proteowizard</code> and is hence faster than the latter (which uses the
-<code>XML</code> R package). They both work in similar ways.</p>
-<pre><code>|Data type      |File format |Data structure |Package |
-|:--------------|:-----------|:--------------|:-------|
-|Identification |mzIdentML   |mzRident       |mzR     |
-|Identification |mzIdentML   |mzID           |mzID    |</code></pre>
-<p>Which of these packages is used by <code>readPSMs()</code> can be defined by the
-<code>parser</code> argument.</p>
-<div id="mzid" class="section level3 unnumbered">
-<h3>
-<code>mzID</code><div class="tooltip"><button class="internal-link-btn" onclick="copy_link('mzid')" onmouseout="reset_tooltip('mzid-tooltip')"><span class="tooltiptext" id="mzid-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>The main functions are <code>mzID</code> to read the data into a dedicated data
-class and <code>flatten</code> to transform it into a <code>data.frame</code>.</p>
-<div class="sourceCode" id="cb157"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb157-1"><a href="identification-data.html#cb157-1" aria-hidden="true" tabindex="-1"></a>idf</span></code></pre></div>
-<pre><code>## [1] "/home/lgatto/R/x86_64-pc-linux-gnu-library/4.1/msdata/ident/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzid"</code></pre>
-<div class="sourceCode" id="cb159"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb159-1"><a href="identification-data.html#cb159-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"mzID"</span>)</span></code></pre></div>
-<pre><code>## 
-## Attaching package: 'mzID'</code></pre>
-<pre><code>## The following object is masked from 'package:dplyr':
-## 
-##     id</code></pre>
-<div class="sourceCode" id="cb162"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb162-1"><a href="identification-data.html#cb162-1" aria-hidden="true" tabindex="-1"></a>id <span class="ot">&lt;-</span> <span class="fu">mzID</span>(idf)</span></code></pre></div>
-<pre><code>## reading TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzid...</code></pre>
-<pre><code>## Warning in type.convert.default(...): 'as.is' should be specified by the caller;
-## using TRUE
-
-## Warning in type.convert.default(...): 'as.is' should be specified by the caller;
-## using TRUE
-
-## Warning in type.convert.default(...): 'as.is' should be specified by the caller;
-## using TRUE
-
-## Warning in type.convert.default(...): 'as.is' should be specified by the caller;
-## using TRUE
-
-## Warning in type.convert.default(...): 'as.is' should be specified by the caller;
-## using TRUE
-
-## Warning in type.convert.default(...): 'as.is' should be specified by the caller;
-## using TRUE
-
-## Warning in type.convert.default(...): 'as.is' should be specified by the caller;
-## using TRUE
-
-## Warning in type.convert.default(...): 'as.is' should be specified by the caller;
-## using TRUE
-
-## Warning in type.convert.default(...): 'as.is' should be specified by the caller;
-## using TRUE
-
-## Warning in type.convert.default(...): 'as.is' should be specified by the caller;
-## using TRUE
-
-## Warning in type.convert.default(...): 'as.is' should be specified by the caller;
-## using TRUE
-
-## Warning in type.convert.default(...): 'as.is' should be specified by the caller;
-## using TRUE
-
-## Warning in type.convert.default(...): 'as.is' should be specified by the caller;
-## using TRUE</code></pre>
-<pre><code>##  DONE!</code></pre>
-<div class="sourceCode" id="cb166"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb166-1"><a href="identification-data.html#cb166-1" aria-hidden="true" tabindex="-1"></a>id</span></code></pre></div>
-<pre><code>## An mzID object
-## 
-## Software used:   MS-GF+ (version: Beta (v10072))
-## 
-## Rawfile:         /home/lg390/dev/01_svn/workflows/proteomics/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML
-## 
-## Database:        /home/lg390/dev/01_svn/workflows/proteomics/erwinia_carotovora.fasta
-## 
-## Number of scans: 5343
-## Number of PSM's: 5656</code></pre>
-<p>Various data can be extracted from the <code>mzID</code> object, using one the
-accessor functions such as <code>database</code>, <code>software</code>, <code>scans</code>, <code>peptides</code>,
-… The object can also be converted into a <code>data.frame</code> using the
-<code>flatten</code> function.</p>
-<div class="sourceCode" id="cb168"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb168-1"><a href="identification-data.html#cb168-1" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(<span class="fu">flatten</span>(id))</span></code></pre></div>
-<pre><code>##                                      spectrumid scan number(s) acquisitionnum
-## 1 controllerType=0 controllerNumber=1 scan=5782           5782           5782
-## 2 controllerType=0 controllerNumber=1 scan=6037           6037           6037
-## 3 controllerType=0 controllerNumber=1 scan=5235           5235           5235
-##   passthreshold rank calculatedmasstocharge experimentalmasstocharge
-## 1          TRUE    1               1080.232                 1080.233
-## 2          TRUE    1               1002.212                 1002.209
-## 3          TRUE    1               1189.280                 1189.284
-##   chargestate ms-gf:denovoscore ms-gf:evalue ms-gf:pepqvalue ms-gf:qvalue
-## 1           3               174 1.086033e-20               0            0
-## 2           3               245 1.988774e-19               0            0
-## 3           3               264 5.129649e-19               0            0
-##   ms-gf:rawscore ms-gf:specevalue assumeddissociationmethod isotopeerror
-## 1            147     3.764831e-27                       HCD            0
-## 2            214     6.902626e-26                       HCD            0
-## 3            211     1.778789e-25                       HCD            0
-##   isdecoy post pre end start accession length
-## 1   FALSE    S   R  84    50   ECA1932    155
-## 2   FALSE    R   K 315   288   ECA1147    434
-## 3   FALSE    A   R 224   192   ECA0013    295
-##                          description                              pepseq
-## 1         outer membrane lipoprotein PVQIQAGEDSNVIGALGGAVLGGFLGNTIGGGSGR
-## 2                     trigger factor        TQVLDGLINANDIEVPVALIDGEIDVLR
-## 3 ribose-binding periplasmic protein   TKGLNVMQNLLTAHPDVQAVFAQNDEMALGALR
-##   modified modification
-## 1    FALSE         &lt;NA&gt;
-## 2    FALSE         &lt;NA&gt;
-## 3    FALSE         &lt;NA&gt;
-##                                                                idFile
-## 1 TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzid
-## 2 TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzid
-## 3 TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzid
-##                                                          spectrumFile
-## 1 TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML
-## 2 TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML
-## 3 TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML
-##               databaseFile
-## 1 erwinia_carotovora.fasta
-## 2 erwinia_carotovora.fasta
-## 3 erwinia_carotovora.fasta
-##  [ reached 'max' / getOption("max.print") -- omitted 3 rows ]</code></pre>
-</div>
-<div id="mzr" class="section level3 unnumbered">
-<h3>
-<code>mzR</code><div class="tooltip"><button class="internal-link-btn" onclick="copy_link('mzr')" onmouseout="reset_tooltip('mzr-tooltip')"><span class="tooltiptext" id="mzr-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>The <code>mzR</code> interface provides a similar interface. It is however much
-faster as it does not read all the data into memory and only extracts
-relevant data on demand. It has also accessor functions such as
-<code>softwareInfo</code>, <code>mzidInfo</code>, … (use <code>showMethods(classes = "mzRident", where = "package:mzR")</code>)
-to see all available methods.</p>
-<div class="sourceCode" id="cb170"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb170-1"><a href="identification-data.html#cb170-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"mzR"</span>)</span>
-<span id="cb170-2"><a href="identification-data.html#cb170-2" aria-hidden="true" tabindex="-1"></a>id2 <span class="ot">&lt;-</span> <span class="fu">openIDfile</span>(idf)</span>
-<span id="cb170-3"><a href="identification-data.html#cb170-3" aria-hidden="true" tabindex="-1"></a>id2</span></code></pre></div>
-<pre><code>## Identification file handle.
-## Filename:  TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzid 
-## Number of psms:  5759</code></pre>
-<div class="sourceCode" id="cb172"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb172-1"><a href="identification-data.html#cb172-1" aria-hidden="true" tabindex="-1"></a><span class="fu">softwareInfo</span>(id2)</span></code></pre></div>
-<pre><code>## [1] "MS-GF+ Beta (v10072) "                      
-## [2] "ProteoWizard MzIdentML 3.0.501 ProteoWizard"</code></pre>
-<p>The identification data can be accessed as a <code>data.frame</code> with the
-<code>psms</code> accessor.</p>
-<div class="sourceCode" id="cb174"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb174-1"><a href="identification-data.html#cb174-1" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(<span class="fu">psms</span>(id2))</span></code></pre></div>
-<pre><code>##                                      spectrumID chargeState rank passThreshold
-## 1 controllerType=0 controllerNumber=1 scan=5782           3    1          TRUE
-## 2 controllerType=0 controllerNumber=1 scan=6037           3    1          TRUE
-## 3 controllerType=0 controllerNumber=1 scan=5235           3    1          TRUE
-## 4 controllerType=0 controllerNumber=1 scan=5397           3    1          TRUE
-## 5 controllerType=0 controllerNumber=1 scan=6075           3    1          TRUE
-##   experimentalMassToCharge calculatedMassToCharge
-## 1                1080.2325              1080.2321
-## 2                1002.2089              1002.2115
-## 3                1189.2836              1189.2800
-## 4                 960.5365               960.5365
-## 5                1264.3409              1264.3419
-##                              sequence peptideRef modNum isDecoy post pre start
-## 1 PVQIQAGEDSNVIGALGGAVLGGFLGNTIGGGSGR       Pep1      0   FALSE    S   R    50
-## 2        TQVLDGLINANDIEVPVALIDGEIDVLR       Pep2      0   FALSE    R   K   288
-## 3   TKGLNVMQNLLTAHPDVQAVFAQNDEMALGALR       Pep3      0   FALSE    A   R   192
-## 4         SQILQQAGTSVLSQANQVPQTVLSLLR       Pep4      0   FALSE    -   R   264
-## 5 PIIGDNPFVVVLPDVVLDESTADQTQENLALLISR       Pep5      0   FALSE    F   R   119
-##   end DatabaseAccess DBseqLength DatabaseSeq
-## 1  84        ECA1932         155            
-## 2 315        ECA1147         434            
-## 3 224        ECA0013         295            
-## 4 290        ECA1731         290            
-## 5 153        ECA1443         298            
-##                                    DatabaseDescription scan.number.s.
-## 1                   ECA1932 outer membrane lipoprotein           5782
-## 2                               ECA1147 trigger factor           6037
-## 3           ECA0013 ribose-binding periplasmic protein           5235
-## 4                                    ECA1731 flagellin           5397
-## 5 ECA1443 UTP--glucose-1-phosphate uridylyltransferase           6075
-##   acquisitionNum
-## 1           5782
-## 2           6037
-## 3           5235
-## 4           5397
-## 5           6075
-##  [ reached 'max' / getOption("max.print") -- omitted 1 rows ]</code></pre>
-</div>
-</div>
-<div id="msms-database-search" class="section level2" number="4.5">
-<h2>
-<span class="header-section-number">4.5</span> MS/MS database search<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('msms-database-search')" onmouseout="reset_tooltip('msms-database-search-tooltip')"><span class="tooltiptext" id="msms-database-search-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>Searches are generally performed using third-party software
-independently of R or can be started from R using a <code>system</code> call.
-The <em><a href="https://bioconductor.org/packages/3.14/MSGFplus">MSGFplus</a></em> package can be used to perform a search
-using the MSGF+ engine directly from R and <em><a href="https://bioconductor.org/packages/3.14/MSGFgui">MSGFgui</a></em> can
-be used to explore the identification results.</p>
-</div>
-<div id="adding-identification-data-to-raw-data" class="section level2" number="4.6">
-<h2>
-<span class="header-section-number">4.6</span> Adding identification data to raw data<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('adding-identification-data-to-raw-data')" onmouseout="reset_tooltip('adding-identification-data-to-raw-data-tooltip')"><span class="tooltiptext" id="adding-identification-data-to-raw-data-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>We are goind to use the <code>sp</code> object created in the previous chapter
-and the <code>id_filtered</code> variable generated above.</p>
-<p>Identification data (as a <code>DataFrame</code>) can be merged into raw data (as
-a <code>Spectra</code> object) by adding new spectra variables to the appropriate
-MS2 spectra. Scans and peptide-spectrum matches can be matched by
-their spectrum identifers.</p>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Identify the spectum identifier columns in the <code>sp</code> the <code>id_filtered</code>
-variables.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-17" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-17', 'sol-start-17')"></span>
-</p>
-<div id="sol-body-17" class="solution-body" style="display: none;">
-<p>In the raw data, it is encoded as <code>spectrumId</code>, while in the
-identification data, we have <code>spectrumID</code>.</p>
-<div class="sourceCode" id="cb176"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb176-1"><a href="identification-data.html#cb176-1" aria-hidden="true" tabindex="-1"></a><span class="fu">spectraVariables</span>(sp)</span></code></pre></div>
-<pre><code>##  [1] "msLevel"                  "rtime"                   
-##  [3] "acquisitionNum"           "scanIndex"               
-##  [5] "dataStorage"              "dataOrigin"              
-##  [7] "centroided"               "smoothed"                
-##  [9] "polarity"                 "precScanNum"             
-## [11] "precursorMz"              "precursorIntensity"      
-## [13] "precursorCharge"          "collisionEnergy"         
-## [15] "isolationWindowLowerMz"   "isolationWindowTargetMz" 
-## [17] "isolationWindowUpperMz"   "peaksCount"              
-## [19] "totIonCurrent"            "basePeakMZ"              
-## [21] "basePeakIntensity"        "ionisationEnergy"        
-## [23] "lowMZ"                    "highMZ"                  
-## [25] "mergedScan"               "mergedResultScanNum"     
-## [27] "mergedResultStartScanNum" "mergedResultEndScanNum"  
-## [29] "injectionTime"            "filterString"            
-## [31] "spectrumId"               "ionMobilityDriftTime"    
-## [33] "scanWindowLowerLimit"     "scanWindowUpperLimit"</code></pre>
-<div class="sourceCode" id="cb178"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb178-1"><a href="identification-data.html#cb178-1" aria-hidden="true" tabindex="-1"></a><span class="fu">names</span>(id_filtered)</span></code></pre></div>
-<pre><code>##  [1] "sequence"                 "spectrumID"              
-##  [3] "chargeState"              "rank"                    
-##  [5] "passThreshold"            "experimentalMassToCharge"
-##  [7] "calculatedMassToCharge"   "peptideRef"              
-##  [9] "modNum"                   "isDecoy"                 
-## [11] "post"                     "pre"                     
-## [13] "start"                    "end"                     
-## [15] "DatabaseAccess"           "DBseqLength"             
-## [17] "DatabaseSeq"              "DatabaseDescription"     
-## [19] "scan.number.s."           "acquisitionNum"          
-## [21] "spectrumFile"             "idFile"                  
-## [23] "MS.GF.RawScore"           "MS.GF.DeNovoScore"       
-## [25] "MS.GF.SpecEValue"         "MS.GF.EValue"            
-## [27] "MS.GF.QValue"             "MS.GF.PepQValue"         
-## [29] "modPeptideRef"            "modName"                 
-## [31] "modMass"                  "modLocation"             
-## [33] "subOriginalResidue"       "subReplacementResidue"   
-## [35] "subLocation"</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<p>These two data can thus simply be joined using:</p>
-<div class="sourceCode" id="cb180"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb180-1"><a href="identification-data.html#cb180-1" aria-hidden="true" tabindex="-1"></a>sp <span class="ot">&lt;-</span> <span class="fu">joinSpectraData</span>(sp, id_filtered,</span>
-<span id="cb180-2"><a href="identification-data.html#cb180-2" aria-hidden="true" tabindex="-1"></a>                      <span class="at">by.x =</span> <span class="st">"spectrumId"</span>,</span>
-<span id="cb180-3"><a href="identification-data.html#cb180-3" aria-hidden="true" tabindex="-1"></a>                      <span class="at">by.y =</span> <span class="st">"spectrumID"</span>)</span>
-<span id="cb180-4"><a href="identification-data.html#cb180-4" aria-hidden="true" tabindex="-1"></a><span class="fu">spectraVariables</span>(sp)</span></code></pre></div>
-<pre><code>##  [1] "msLevel"                  "rtime"                   
-##  [3] "acquisitionNum"           "scanIndex"               
-##  [5] "dataStorage"              "dataOrigin"              
-##  [7] "centroided"               "smoothed"                
-##  [9] "polarity"                 "precScanNum"             
-## [11] "precursorMz"              "precursorIntensity"      
-## [13] "precursorCharge"          "collisionEnergy"         
-## [15] "isolationWindowLowerMz"   "isolationWindowTargetMz" 
-## [17] "isolationWindowUpperMz"   "peaksCount"              
-## [19] "totIonCurrent"            "basePeakMZ"              
-## [21] "basePeakIntensity"        "ionisationEnergy"        
-## [23] "lowMZ"                    "highMZ"                  
-## [25] "mergedScan"               "mergedResultScanNum"     
-## [27] "mergedResultStartScanNum" "mergedResultEndScanNum"  
-## [29] "injectionTime"            "filterString"            
-## [31] "spectrumId"               "ionMobilityDriftTime"    
-## [33] "scanWindowLowerLimit"     "scanWindowUpperLimit"    
-## [35] "sequence"                 "chargeState"             
-## [37] "rank"                     "passThreshold"           
-## [39] "experimentalMassToCharge" "calculatedMassToCharge"  
-## [41] "peptideRef"               "modNum"                  
-## [43] "isDecoy"                  "post"                    
-## [45] "pre"                      "start"                   
-## [47] "end"                      "DatabaseAccess"          
-## [49] "DBseqLength"              "DatabaseSeq"             
-## [51] "DatabaseDescription"      "scan.number.s."          
-## [53] "acquisitionNum.y"         "spectrumFile"            
-## [55] "idFile"                   "MS.GF.RawScore"          
-## [57] "MS.GF.DeNovoScore"        "MS.GF.SpecEValue"        
-## [59] "MS.GF.EValue"             "MS.GF.QValue"            
-## [61] "MS.GF.PepQValue"          "modPeptideRef"           
-## [63] "modName"                  "modMass"                 
-## [65] "modLocation"              "subOriginalResidue"      
-## [67] "subReplacementResidue"    "subLocation"</code></pre>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Verify that the identification data has been added to the correct
-spectra.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-18" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-18', 'sol-start-18')"></span>
-</p>
-<div id="sol-body-18" class="solution-body" style="display: none;">
-<p>Let’s first verify that no identification data has been added to the
-MS1 scans.</p>
-<div class="sourceCode" id="cb182"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb182-1"><a href="identification-data.html#cb182-1" aria-hidden="true" tabindex="-1"></a><span class="fu">all</span>(<span class="fu">is.na</span>(<span class="fu">filterMsLevel</span>(sp, <span class="dv">1</span>)<span class="sc">$</span>sequence))</span></code></pre></div>
-<pre><code>## [1] TRUE</code></pre>
-<p>They have indeed been added to 56% of the MS2 spectra.</p>
-<div class="sourceCode" id="cb184"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb184-1"><a href="identification-data.html#cb184-1" aria-hidden="true" tabindex="-1"></a>sp_2 <span class="ot">&lt;-</span> <span class="fu">filterMsLevel</span>(sp, <span class="dv">2</span>)</span>
-<span id="cb184-2"><a href="identification-data.html#cb184-2" aria-hidden="true" tabindex="-1"></a><span class="fu">table</span>(<span class="fu">is.na</span>(sp_2<span class="sc">$</span>sequence))</span></code></pre></div>
-<pre><code>## 
-## FALSE  TRUE 
-##  2646  3457</code></pre>
-<p>Let’s compare the precursor/peptide mass to charges</p>
-<div class="sourceCode" id="cb186"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb186-1"><a href="identification-data.html#cb186-1" aria-hidden="true" tabindex="-1"></a>sp_2 <span class="ot">&lt;-</span> sp_2[<span class="sc">!</span><span class="fu">is.na</span>(sp_2<span class="sc">$</span>sequence)]</span>
-<span id="cb186-2"><a href="identification-data.html#cb186-2" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(sp_2<span class="sc">$</span>precursorMz <span class="sc">-</span> sp_2<span class="sc">$</span>experimentalMassToCharge)</span></code></pre></div>
-<pre><code>##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-##  0.0000  0.0000  0.0000  0.0053  0.0000  2.0297</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-</div>
-<div id="visualising-peptide-spectrum-matches" class="section level2" number="4.7">
-<h2>
-<span class="header-section-number">4.7</span> Visualising peptide-spectrum matches<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('visualising-peptide-spectrum-matches')" onmouseout="reset_tooltip('visualising-peptide-spectrum-matches-tooltip')"><span class="tooltiptext" id="visualising-peptide-spectrum-matches-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>Let’s choose a MS2 spectrum with a high identication score and plot
-it.</p>
-<div class="sourceCode" id="cb188"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb188-1"><a href="identification-data.html#cb188-1" aria-hidden="true" tabindex="-1"></a>i <span class="ot">&lt;-</span> <span class="fu">which</span>(sp<span class="sc">$</span>MS.GF.RawScore <span class="sc">&gt;</span> <span class="dv">100</span>)[<span class="dv">1</span>]</span>
-<span id="cb188-2"><a href="identification-data.html#cb188-2" aria-hidden="true" tabindex="-1"></a><span class="fu">plotSpectra</span>(sp[i])</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-49-1.png" width="672"></p>
-<p>We have seen above that we can add labels to each peak using the
-<code>labels</code> argument in <code>plotSpectra()</code>. The <code>addFragments()</code> function
-takes a spectrum as input (that is a <code>Spectra</code> object of length 1) and
-annotates its peaks.</p>
-<div class="sourceCode" id="cb189"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb189-1"><a href="identification-data.html#cb189-1" aria-hidden="true" tabindex="-1"></a><span class="fu">addFragments</span>(sp[i])</span></code></pre></div>
-<pre><code>##   [1] NA    NA    NA    "b1"  NA    NA    NA    NA    NA    NA    NA    NA   
-##  [13] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
-##  [25] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
-##  [37] NA    NA    NA    NA    NA    NA    NA    "y1_" NA    NA    NA    NA   
-##  [49] NA    "y1"  NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
-##  [61] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
-##  [73] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
-##  [85] NA    NA    "b2"  NA    NA    NA    NA    NA    NA    NA    NA    NA   
-##  [97] NA    NA    NA    NA   
-##  [ reached getOption("max.print") -- omitted 227 entries ]</code></pre>
-<p>It can be directly used with <code>plotSpectra()</code>:</p>
-<div class="sourceCode" id="cb191"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb191-1"><a href="identification-data.html#cb191-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plotSpectra</span>(sp[i], <span class="at">labels =</span> addFragments,</span>
-<span id="cb191-2"><a href="identification-data.html#cb191-2" aria-hidden="true" tabindex="-1"></a>            <span class="at">labelPos =</span> <span class="dv">3</span>, <span class="at">labelCol =</span> <span class="st">"steelblue"</span>)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-51-1.png" width="672"></p>
-<p>When a precursor peptide ion is fragmented in a CID cell, it breaks at
-specific bonds, producing sets of peaks (<em>a</em>, <em>b</em>, <em>c</em> and <em>x</em>, <em>y</em>,
-<em>z</em>) that can be predicted.</p>
-<div class="figure">
-<p class="caption marginnote shownote">
-(#fig:frag_img)Peptide fragmentation.
-</p>
-<img src="img/frag.png" alt="Peptide fragmentation." width="80%">
-</div>
-<p>The annotation of spectra is obtained by simulating fragmentation of a
-peptide and matching observed peaks to fragments:</p>
-<div class="sourceCode" id="cb192"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb192-1"><a href="identification-data.html#cb192-1" aria-hidden="true" tabindex="-1"></a>sp[i]<span class="sc">$</span>sequence</span></code></pre></div>
-<pre><code>## [1] "THSQEEMQHMQR"</code></pre>
-<div class="sourceCode" id="cb194"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb194-1"><a href="identification-data.html#cb194-1" aria-hidden="true" tabindex="-1"></a><span class="fu">calculateFragments</span>(sp[i]<span class="sc">$</span>sequence)</span></code></pre></div>
-<pre><code>## Modifications used: C=57.02146</code></pre>
-<pre><code>##           mz ion type pos z         seq
-## 1   102.0550  b1    b   1 1           T
-## 2   239.1139  b2    b   2 1          TH
-## 3   326.1459  b3    b   3 1         THS
-## 4   454.2045  b4    b   4 1        THSQ
-## 5   583.2471  b5    b   5 1       THSQE
-## 6   712.2897  b6    b   6 1      THSQEE
-## 7   843.3301  b7    b   7 1     THSQEEM
-## 8   971.3887  b8    b   8 1    THSQEEMQ
-## 9  1108.4476  b9    b   9 1   THSQEEMQH
-## 10 1239.4881 b10    b  10 1  THSQEEMQHM
-## 11 1367.5467 b11    b  11 1 THSQEEMQHMQ
-## 12  175.1190  y1    y   1 1           R
-## 13  303.1775  y2    y   2 1          QR
-## 14  434.2180  y3    y   3 1         MQR
-## 15  571.2769  y4    y   4 1        HMQR
-## 16  699.3355  y5    y   5 1       QHMQR
-##  [ reached 'max' / getOption("max.print") -- omitted 42 rows ]</code></pre>
-</div>
-<div id="comparing-spectra" class="section level2" number="4.8">
-<h2>
-<span class="header-section-number">4.8</span> Comparing spectra<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('comparing-spectra')" onmouseout="reset_tooltip('comparing-spectra-tooltip')"><span class="tooltiptext" id="comparing-spectra-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>The <code>compareSpectra()</code> can be used to compare spectra (by default,
-computing the normalised dot product).</p>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ol style="list-style-type: decimal">
-<li>Create a new <code>Spectra</code> object containing the MS2 spectra with
-sequences <code>"SQILQQAGTSVLSQANQVPQTVLSLLR"</code> and
-<code>"TKGLNVMQNLLTAHPDVQAVFAQNDEMALGALR"</code>.</li>
-</ol>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-19" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-19', 'sol-start-19')"></span>
-</p>
-<div id="sol-body-19" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb197"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb197-1"><a href="identification-data.html#cb197-1" aria-hidden="true" tabindex="-1"></a>k <span class="ot">&lt;-</span> <span class="fu">which</span>(sp<span class="sc">$</span>sequence <span class="sc">%in%</span> <span class="fu">c</span>(<span class="st">"SQILQQAGTSVLSQANQVPQTVLSLLR"</span>, <span class="st">"TKGLNVMQNLLTAHPDVQAVFAQNDEMALGALR"</span>))</span>
-<span id="cb197-2"><a href="identification-data.html#cb197-2" aria-hidden="true" tabindex="-1"></a>sp_k <span class="ot">&lt;-</span> sp[k]</span>
-<span id="cb197-3"><a href="identification-data.html#cb197-3" aria-hidden="true" tabindex="-1"></a>sp_k</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 5 spectra in a MsBackendMzR backend:
-##     msLevel     rtime scanIndex
-##   &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1         2   2687.42      5230
-## 2         2   2688.88      5235
-## 3         2   2748.75      5397
-## 4         2   2765.26      5442
-## 5         2   2768.17      5449
-##  ... 67 more variables/columns.
-## 
-## file(s):
-## b87c573dec94f_TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ol start="2" style="list-style-type: decimal">
-<li>Calculate the 5 by 5 distance matrix
-between all spectra using <code>compareSpectra</code>. See the <code>?Spectra</code> man
-page for details. Draw a heatmap of that distance matrix</li>
-</ol>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-20" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-20', 'sol-start-20')"></span>
-</p>
-<div id="sol-body-20" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb199"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb199-1"><a href="identification-data.html#cb199-1" aria-hidden="true" tabindex="-1"></a>distmat <span class="ot">&lt;-</span> <span class="fu">compareSpectra</span>(sp_k)</span>
-<span id="cb199-2"><a href="identification-data.html#cb199-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rownames</span>(distmat) <span class="ot">&lt;-</span> <span class="fu">colnames</span>(distmat) <span class="ot">&lt;-</span> <span class="fu">strtrim</span>(sp_k<span class="sc">$</span>sequence, <span class="dv">2</span>)</span>
-<span id="cb199-3"><a href="identification-data.html#cb199-3" aria-hidden="true" tabindex="-1"></a>distmat</span></code></pre></div>
-<pre><code>##              TK          TK           SQ          SQ           SQ
-## TK 1.0000000000 0.109126094 0.0009373465 0.001261338 0.0008256185
-## TK 0.1091260942 1.000000000 0.0025314670 0.001459654 0.0017613212
-## SQ 0.0009373465 0.002531467 1.0000000000 0.432133016 0.6879331218
-## SQ 0.0012613380 0.001459654 0.4321330158 1.000000000 0.4467153012
-## SQ 0.0008256185 0.001761321 0.6879331218 0.446715301 1.0000000000</code></pre>
-<div class="sourceCode" id="cb201"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb201-1"><a href="identification-data.html#cb201-1" aria-hidden="true" tabindex="-1"></a>pheatmap<span class="sc">::</span><span class="fu">pheatmap</span>(distmat)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-53-1.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ol start="3" style="list-style-type: decimal">
-<li>Compare the spectra with the plotting function seen previously.</li>
-</ol>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-21" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-21', 'sol-start-21')"></span>
-</p>
-<div id="sol-body-21" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb202"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb202-1"><a href="identification-data.html#cb202-1" aria-hidden="true" tabindex="-1"></a><span class="fu">filterIntensity</span>(sp_k, <span class="fl">1e3</span>) <span class="sc">%&gt;%</span> <span class="fu">plotSpectra</span>(<span class="at">main =</span> sp_k<span class="sc">$</span>sequence)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-54-1.png" width="672"></p>
-<div class="sourceCode" id="cb203"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb203-1"><a href="identification-data.html#cb203-1" aria-hidden="true" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">3</span>, <span class="dv">1</span>))</span>
-<span id="cb203-2"><a href="identification-data.html#cb203-2" aria-hidden="true" tabindex="-1"></a><span class="fu">plotSpectraMirror</span>(sp_k[<span class="dv">1</span>], sp_k[<span class="dv">2</span>], <span class="at">main =</span> <span class="st">"TK..."</span>)</span>
-<span id="cb203-3"><a href="identification-data.html#cb203-3" aria-hidden="true" tabindex="-1"></a><span class="fu">plotSpectraMirror</span>(sp_k[<span class="dv">3</span>], sp_k[<span class="dv">4</span>], <span class="at">main =</span> <span class="st">"SQ..."</span>)</span>
-<span id="cb203-4"><a href="identification-data.html#cb203-4" aria-hidden="true" tabindex="-1"></a><span class="fu">plotSpectraMirror</span>(sp_k[<span class="dv">3</span>], sp_k[<span class="dv">4</span>], <span class="at">main =</span> <span class="st">"SQ..."</span>)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-55-1.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-</div>
-<div id="summary-exercice" class="section level2" number="4.9">
-<h2>
-<span class="header-section-number">4.9</span> Summary exercice<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('summary-exercice')" onmouseout="reset_tooltip('summary-exercice-tooltip')"><span class="tooltiptext" id="summary-exercice-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Download the 3 first mzML and mzID files from the
-<a href="https://www.ebi.ac.uk/pride/archive/projects/PXD022816">PXD022816</a>
-project <span class="citation">(<a href="#ref-Morgenstern:2020" role="doc-biblioref">Morgenstern, Barzilay, and Levin 2021</a>)</span>.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-22" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-22', 'sol-start-22')"></span>
-</p>
-<div id="sol-body-22" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb204"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb204-1"><a href="identification-data.html#cb204-1" aria-hidden="true" tabindex="-1"></a><span class="do">## Getting data from PX/PRIDE</span></span>
-<span id="cb204-2"><a href="identification-data.html#cb204-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(rpx)</span>
-<span id="cb204-3"><a href="identification-data.html#cb204-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb204-4"><a href="identification-data.html#cb204-4" aria-hidden="true" tabindex="-1"></a><span class="do">## https://www.ebi.ac.uk/pride/archive/projects/PXD022816</span></span>
-<span id="cb204-5"><a href="identification-data.html#cb204-5" aria-hidden="true" tabindex="-1"></a><span class="do">## RawBeans: A Simple, Vendor-Independent, Raw-Data Quality-Control</span></span>
-<span id="cb204-6"><a href="identification-data.html#cb204-6" aria-hidden="true" tabindex="-1"></a><span class="do">## Tool ()</span></span>
-<span id="cb204-7"><a href="identification-data.html#cb204-7" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb204-8"><a href="identification-data.html#cb204-8" aria-hidden="true" tabindex="-1"></a>PXD022816 <span class="ot">&lt;-</span> <span class="fu">PXDataset</span>(<span class="st">"PXD022816"</span>)</span></code></pre></div>
-<pre><code>## Loading PXD022816 from cache.</code></pre>
-<div class="sourceCode" id="cb206"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb206-1"><a href="identification-data.html#cb206-1" aria-hidden="true" tabindex="-1"></a>PXD022816</span></code></pre></div>
-<pre><code>## Project PXD022816 with 31 files
-## </code></pre>
-<pre><code>## Resource ID BFC1700 in cache in /home/lgatto/.cache/R/rpx.</code></pre>
-<pre><code>##  [1] 'checksum.txt' ... [31] 'QEP2LC6_HeLa_50ng_251120_10-calib.mzML'
-##  Use 'pxfiles(.)' to see all files.</code></pre>
-<div class="sourceCode" id="cb210"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb210-1"><a href="identification-data.html#cb210-1" aria-hidden="true" tabindex="-1"></a><span class="fu">pxfiles</span>(PXD022816)</span></code></pre></div>
-<pre><code>## Project PXD022816 files (31):
-##  [remote] checksum.txt
-##  [remote] QEP2LC6_HeLa_50ng_251120_01.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_02.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_03.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_04.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_05.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_06.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_07.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_08.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_09.raw
-##  ...</code></pre>
-<div class="sourceCode" id="cb212"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb212-1"><a href="identification-data.html#cb212-1" aria-hidden="true" tabindex="-1"></a>(mzids <span class="ot">&lt;-</span> <span class="fu">pxget</span>(PXD022816, <span class="fu">grep</span>(<span class="st">"mzID"</span>, <span class="fu">pxfiles</span>(PXD022816))[<span class="dv">1</span><span class="sc">:</span><span class="dv">3</span>]))</span></code></pre></div>
-<pre><code>## Project PXD022816 files (31):
-##  [remote] checksum.txt
-##  [remote] QEP2LC6_HeLa_50ng_251120_01.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_02.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_03.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_04.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_05.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_06.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_07.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_08.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_09.raw
-##  ...</code></pre>
-<pre><code>## Loading QEP2LC6_HeLa_50ng_251120_01-calib.mzID.gz from cache.</code></pre>
-<pre><code>## Loading QEP2LC6_HeLa_50ng_251120_02-calib.mzID.gz from cache.</code></pre>
-<pre><code>## Loading QEP2LC6_HeLa_50ng_251120_03-calib.mzID.gz from cache.</code></pre>
-<pre><code>## [1] "/home/lgatto/.cache/R/rpx/13706ba650b9_QEP2LC6_HeLa_50ng_251120_01-calib.mzID.gz"
-## [2] "/home/lgatto/.cache/R/rpx/137039853acf_QEP2LC6_HeLa_50ng_251120_02-calib.mzID.gz"
-## [3] "/home/lgatto/.cache/R/rpx/13706596a57b_QEP2LC6_HeLa_50ng_251120_03-calib.mzID.gz"</code></pre>
-<div class="sourceCode" id="cb218"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb218-1"><a href="identification-data.html#cb218-1" aria-hidden="true" tabindex="-1"></a>(mzmls <span class="ot">&lt;-</span> <span class="fu">pxget</span>(PXD022816, <span class="fu">grep</span>(<span class="st">"mzML"</span>, <span class="fu">pxfiles</span>(PXD022816))[<span class="dv">1</span><span class="sc">:</span><span class="dv">3</span>]))</span></code></pre></div>
-<pre><code>## Project PXD022816 files (31):
-##  [remote] checksum.txt
-##  [remote] QEP2LC6_HeLa_50ng_251120_01.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_02.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_03.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_04.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_05.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_06.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_07.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_08.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_09.raw
-##  ...</code></pre>
-<pre><code>## Loading QEP2LC6_HeLa_50ng_251120_01-calib.mzML from cache.</code></pre>
-<pre><code>## Loading QEP2LC6_HeLa_50ng_251120_02-calib.mzML from cache.</code></pre>
-<pre><code>## Loading QEP2LC6_HeLa_50ng_251120_03-calib.mzML from cache.</code></pre>
-<pre><code>## [1] "/home/lgatto/.cache/R/rpx/13701d85f1bc_QEP2LC6_HeLa_50ng_251120_01-calib.mzML"
-## [2] "/home/lgatto/.cache/R/rpx/13707d04ce6b_QEP2LC6_HeLa_50ng_251120_02-calib.mzML"
-## [3] "/home/lgatto/.cache/R/rpx/1370516b50aa_QEP2LC6_HeLa_50ng_251120_03-calib.mzML"</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Generate a <code>Spectra</code> object and a table of filtered PSMs. Visualise
-the total ion chromatograms and check the quality of the
-identification data by comparing the density of the decoy and target
-PSMs id scores for each file.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-23" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-23', 'sol-start-23')"></span>
-</p>
-<div id="sol-body-23" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb224"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb224-1"><a href="identification-data.html#cb224-1" aria-hidden="true" tabindex="-1"></a><span class="do">## Loading raw data</span></span>
-<span id="cb224-2"><a href="identification-data.html#cb224-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"Spectra"</span>)</span>
-<span id="cb224-3"><a href="identification-data.html#cb224-3" aria-hidden="true" tabindex="-1"></a>sp <span class="ot">&lt;-</span> <span class="fu">Spectra</span>(mzmls)</span>
-<span id="cb224-4"><a href="identification-data.html#cb224-4" aria-hidden="true" tabindex="-1"></a>sp</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 87647 spectra in a MsBackendMzR backend:
-##         msLevel     rtime scanIndex
-##       &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1             1  0.177987         1
-## 2             1  0.599870         2
-## 3             1  0.978849         3
-## 4             1  1.363217         4
-## 5             1  1.742965         5
-## ...         ...       ...       ...
-## 87643         1   4198.64     28736
-## 87644         1   4199.02     28737
-## 87645         2   4199.28     28738
-## 87646         1   4199.44     28739
-## 87647         1   4199.82     28740
-##  ... 33 more variables/columns.
-## 
-## file(s):
-## 13701d85f1bc_QEP2LC6_HeLa_50ng_251120_01-calib.mzML
-## 13707d04ce6b_QEP2LC6_HeLa_50ng_251120_02-calib.mzML
-## 1370516b50aa_QEP2LC6_HeLa_50ng_251120_03-calib.mzML</code></pre>
-<div class="sourceCode" id="cb226"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb226-1"><a href="identification-data.html#cb226-1" aria-hidden="true" tabindex="-1"></a><span class="do">## number of spectra per file</span></span>
-<span id="cb226-2"><a href="identification-data.html#cb226-2" aria-hidden="true" tabindex="-1"></a><span class="fu">table</span>(<span class="fu">basename</span>(sp<span class="sc">$</span>dataOrigin))</span></code></pre></div>
-<pre><code>## 
-## 13701d85f1bc_QEP2LC6_HeLa_50ng_251120_01-calib.mzML 
-##                                               29575 
-## 1370516b50aa_QEP2LC6_HeLa_50ng_251120_03-calib.mzML 
-##                                               28740 
-## 13707d04ce6b_QEP2LC6_HeLa_50ng_251120_02-calib.mzML 
-##                                               29332</code></pre>
-<div class="sourceCode" id="cb228"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb228-1"><a href="identification-data.html#cb228-1" aria-hidden="true" tabindex="-1"></a><span class="do">## all levels are centroided</span></span>
-<span id="cb228-2"><a href="identification-data.html#cb228-2" aria-hidden="true" tabindex="-1"></a><span class="fu">table</span>(sp<span class="sc">$</span>centroided, sp<span class="sc">$</span>msLevel)</span></code></pre></div>
-<pre><code>##       
-##            1     2
-##   TRUE 19607 68040</code></pre>
-<div class="sourceCode" id="cb230"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb230-1"><a href="identification-data.html#cb230-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"ggplot2"</span>)</span>
-<span id="cb230-2"><a href="identification-data.html#cb230-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"tidyr"</span>)</span>
-<span id="cb230-3"><a href="identification-data.html#cb230-3" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"magrittr"</span>)</span>
-<span id="cb230-4"><a href="identification-data.html#cb230-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb230-5"><a href="identification-data.html#cb230-5" aria-hidden="true" tabindex="-1"></a><span class="do">## Chromatograms</span></span>
-<span id="cb230-6"><a href="identification-data.html#cb230-6" aria-hidden="true" tabindex="-1"></a><span class="fu">filterMsLevel</span>(sp, <span class="dv">1</span>) <span class="sc">%&gt;%</span></span>
-<span id="cb230-7"><a href="identification-data.html#cb230-7" aria-hidden="true" tabindex="-1"></a>    <span class="fu">spectraData</span>() <span class="sc">%&gt;%</span></span>
-<span id="cb230-8"><a href="identification-data.html#cb230-8" aria-hidden="true" tabindex="-1"></a>    <span class="fu">as_tibble</span>() <span class="sc">%&gt;%</span></span>
-<span id="cb230-9"><a href="identification-data.html#cb230-9" aria-hidden="true" tabindex="-1"></a>    <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> rtime,</span>
-<span id="cb230-10"><a href="identification-data.html#cb230-10" aria-hidden="true" tabindex="-1"></a>               <span class="at">y =</span> totIonCurrent,</span>
-<span id="cb230-11"><a href="identification-data.html#cb230-11" aria-hidden="true" tabindex="-1"></a>               <span class="at">colour =</span> <span class="fu">basename</span>(dataOrigin))) <span class="sc">+</span></span>
-<span id="cb230-12"><a href="identification-data.html#cb230-12" aria-hidden="true" tabindex="-1"></a>    <span class="fu">geom_line</span>()</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-58-1.png" width="1152"></p>
-<div class="sourceCode" id="cb231"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb231-1"><a href="identification-data.html#cb231-1" aria-hidden="true" tabindex="-1"></a><span class="do">## Identification data</span></span>
-<span id="cb231-2"><a href="identification-data.html#cb231-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"PSM"</span>)</span>
-<span id="cb231-3"><a href="identification-data.html#cb231-3" aria-hidden="true" tabindex="-1"></a>id <span class="ot">&lt;-</span> <span class="fu">PSM</span>(mzids)</span>
-<span id="cb231-4"><a href="identification-data.html#cb231-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb231-5"><a href="identification-data.html#cb231-5" aria-hidden="true" tabindex="-1"></a><span class="do">## Number of PSMs per acquisition</span></span>
-<span id="cb231-6"><a href="identification-data.html#cb231-6" aria-hidden="true" tabindex="-1"></a><span class="fu">table</span>(id<span class="sc">$</span>idFile)</span></code></pre></div>
-<pre><code>## 
-## 137039853acf_QEP2LC6_HeLa_50ng_251120_02-calib.mzID.gz 
-##                                                  25083 
-## 13706596a57b_QEP2LC6_HeLa_50ng_251120_03-calib.mzID.gz 
-##                                                  24436 
-## 13706ba650b9_QEP2LC6_HeLa_50ng_251120_01-calib.mzID.gz 
-##                                                  25231</code></pre>
-<div class="sourceCode" id="cb233"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb233-1"><a href="identification-data.html#cb233-1" aria-hidden="true" tabindex="-1"></a>tidyr<span class="sc">::</span><span class="fu">as_tibble</span>(id) <span class="sc">%&gt;%</span></span>
-<span id="cb233-2"><a href="identification-data.html#cb233-2" aria-hidden="true" tabindex="-1"></a>    <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> MetaMorpheus.score,</span>
-<span id="cb233-3"><a href="identification-data.html#cb233-3" aria-hidden="true" tabindex="-1"></a>               <span class="at">colour =</span> isDecoy)) <span class="sc">+</span></span>
-<span id="cb233-4"><a href="identification-data.html#cb233-4" aria-hidden="true" tabindex="-1"></a>    <span class="fu">geom_density</span>() <span class="sc">+</span></span>
-<span id="cb233-5"><a href="identification-data.html#cb233-5" aria-hidden="true" tabindex="-1"></a>    <span class="fu">facet_wrap</span>(<span class="sc">~</span> spectrumFile)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-59-1.png" width="1152"></p>
-<div class="sourceCode" id="cb234"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb234-1"><a href="identification-data.html#cb234-1" aria-hidden="true" tabindex="-1"></a>id_filtered <span class="ot">&lt;-</span> <span class="fu">filterPSMs</span>(id)</span></code></pre></div>
-<pre><code>## Starting with 74750 PSMs:</code></pre>
-<pre><code>##  removed 543 decoy hits</code></pre>
-<pre><code>##  removed 0 PSMs with rank &gt; 1</code></pre>
-<pre><code>##  removed 70513 non-proteotypic peptides</code></pre>
-<pre><code>## 3694 PSMs left.</code></pre>
-<div class="sourceCode" id="cb240"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb240-1"><a href="identification-data.html#cb240-1" aria-hidden="true" tabindex="-1"></a><span class="fu">max</span>(id_filtered<span class="sc">$</span>PSM.level.q.value)</span></code></pre></div>
-<pre><code>## [1] 0.009628536</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Join the raw and identification data. Beware though that the joining
-must now be performed by spectrum ids and by files.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-24" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-24', 'sol-start-24')"></span>
-</p>
-<div id="sol-body-24" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb242"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb242-1"><a href="identification-data.html#cb242-1" aria-hidden="true" tabindex="-1"></a><span class="do">## primary key for spectra</span></span>
-<span id="cb242-2"><a href="identification-data.html#cb242-2" aria-hidden="true" tabindex="-1"></a>sp<span class="sc">$</span>pkey <span class="ot">&lt;-</span></span>
-<span id="cb242-3"><a href="identification-data.html#cb242-3" aria-hidden="true" tabindex="-1"></a>    <span class="fu">paste0</span>(<span class="fu">sub</span>(<span class="st">"^.+_QEP"</span>, <span class="st">"QEP"</span>, <span class="fu">basename</span>(<span class="fu">dataOrigin</span>(sp))),</span>
-<span id="cb242-4"><a href="identification-data.html#cb242-4" aria-hidden="true" tabindex="-1"></a>           <span class="fu">gsub</span>(<span class="st">"^.+="</span>, <span class="st">"::"</span>, sp<span class="sc">$</span>spectrumId))</span>
-<span id="cb242-5"><a href="identification-data.html#cb242-5" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(sp<span class="sc">$</span>pkey)</span></code></pre></div>
-<pre><code>## [1] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::1"
-## [2] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::2"
-## [3] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::3"
-## [4] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::4"
-## [5] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::5"
-## [6] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::6"</code></pre>
-<div class="sourceCode" id="cb244"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb244-1"><a href="identification-data.html#cb244-1" aria-hidden="true" tabindex="-1"></a><span class="do">## primary key for PSMs</span></span>
-<span id="cb244-2"><a href="identification-data.html#cb244-2" aria-hidden="true" tabindex="-1"></a>id_filtered<span class="sc">$</span>pkey <span class="ot">&lt;-</span></span>
-<span id="cb244-3"><a href="identification-data.html#cb244-3" aria-hidden="true" tabindex="-1"></a>    <span class="fu">paste0</span>(<span class="fu">gsub</span>(<span class="st">"^.+</span><span class="sc">\\</span><span class="st">QEP"</span>, <span class="st">"QEP"</span>, id_filtered<span class="sc">$</span>spectrumFile),</span>
-<span id="cb244-4"><a href="identification-data.html#cb244-4" aria-hidden="true" tabindex="-1"></a>           <span class="fu">sub</span>(<span class="st">"^.+="</span>, <span class="st">"::"</span>, id_filtered<span class="sc">$</span>spectrumID))</span>
-<span id="cb244-5"><a href="identification-data.html#cb244-5" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(id_filtered<span class="sc">$</span>pkey)</span></code></pre></div>
-<pre><code>## [1] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::15857"
-## [2] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::15857"
-## [3] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::8552" 
-## [4] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::8552" 
-## [5] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::11533"
-## [6] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::20301"</code></pre>
-<div class="sourceCode" id="cb246"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb246-1"><a href="identification-data.html#cb246-1" aria-hidden="true" tabindex="-1"></a><span class="do">## For simplicity, let keep single hits per spectrum id.</span></span>
-<span id="cb246-2"><a href="identification-data.html#cb246-2" aria-hidden="true" tabindex="-1"></a>id_filtered <span class="ot">&lt;-</span> id_filtered[<span class="sc">!</span><span class="fu">duplicated</span>(id_filtered<span class="sc">$</span>pkey), ]</span>
-<span id="cb246-3"><a href="identification-data.html#cb246-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb246-4"><a href="identification-data.html#cb246-4" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(id_filtered<span class="sc">$</span>pkey)</span></code></pre></div>
-<pre><code>## [1] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::15857"
-## [2] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::8552" 
-## [3] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::11533"
-## [4] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::20301"
-## [5] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::26984"
-## [6] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::9223"</code></pre>
-<div class="sourceCode" id="cb248"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb248-1"><a href="identification-data.html#cb248-1" aria-hidden="true" tabindex="-1"></a>sp <span class="ot">&lt;-</span> <span class="fu">joinSpectraData</span>(sp, id_filtered, <span class="at">by.x =</span> <span class="st">"pkey"</span>)</span>
-<span id="cb248-2"><a href="identification-data.html#cb248-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb248-3"><a href="identification-data.html#cb248-3" aria-hidden="true" tabindex="-1"></a><span class="do">## Number of MS2 scans with a PSM</span></span>
-<span id="cb248-4"><a href="identification-data.html#cb248-4" aria-hidden="true" tabindex="-1"></a><span class="fu">table</span>(<span class="sc">!</span><span class="fu">is.na</span>(<span class="fu">filterMsLevel</span>(sp, <span class="dv">2</span>)<span class="sc">$</span>sequence))</span></code></pre></div>
-<pre><code>## 
-## FALSE  TRUE 
-## 64665  3375</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Extract the PSMs that have been matched to peptides from protein
-<code>O43175</code> and compare and cluster the scans. Hint: once you have
-created the smaller <code>Spectra</code> object with the scans of interest,
-switch to an in-memory backend to seed up the calculations.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-25" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-25', 'sol-start-25')"></span>
-</p>
-<div id="sol-body-25" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb250"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb250-1"><a href="identification-data.html#cb250-1" aria-hidden="true" tabindex="-1"></a>sp_O43175 <span class="ot">&lt;-</span> sp[<span class="fu">which</span>(sp<span class="sc">$</span>DatabaseAccess <span class="sc">==</span> <span class="st">"O43175"</span>)]</span>
-<span id="cb250-2"><a href="identification-data.html#cb250-2" aria-hidden="true" tabindex="-1"></a>sp_O43175 <span class="ot">&lt;-</span> <span class="fu">setBackend</span>(sp_O43175, <span class="fu">MsBackendDataFrame</span>())</span>
-<span id="cb250-3"><a href="identification-data.html#cb250-3" aria-hidden="true" tabindex="-1"></a>sp_O43175</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 13 spectra in a MsBackendDataFrame backend:
-##       msLevel     rtime scanIndex
-##     &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1           2   3060.06     23281
-## 2           2   3149.74     24091
-## 3           2   3548.59     26872
-## 4           2   3564.40     27015
-## 5           2   3057.83     23115
-## ...       ...       ...       ...
-## 9           2   3566.21     26886
-## 10          2   3242.61     24333
-## 11          2   3436.78     25513
-## 12          2   3547.90     26170
-## 13          2   3563.78     26314
-##  ... 65 more variables/columns.
-## Processing:
-##  Switch backend from MsBackendMzR to MsBackendDataFrame [Tue Aug 31 11:36:15 2021]</code></pre>
-<div class="sourceCode" id="cb252"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb252-1"><a href="identification-data.html#cb252-1" aria-hidden="true" tabindex="-1"></a>cmat <span class="ot">&lt;-</span> <span class="fu">compareSpectra</span>(sp_O43175)</span>
-<span id="cb252-2"><a href="identification-data.html#cb252-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rownames</span>(cmat) <span class="ot">&lt;-</span></span>
-<span id="cb252-3"><a href="identification-data.html#cb252-3" aria-hidden="true" tabindex="-1"></a>    <span class="fu">colnames</span>(cmat) <span class="ot">&lt;-</span> <span class="fu">strtrim</span>(sp_O43175<span class="sc">$</span>sequence, <span class="dv">3</span>)</span>
-<span id="cb252-4"><a href="identification-data.html#cb252-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb252-5"><a href="identification-data.html#cb252-5" aria-hidden="true" tabindex="-1"></a>pheatmap<span class="sc">::</span><span class="fu">pheatmap</span>(cmat)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-62-1.png" width="672"></p>
-<div class="sourceCode" id="cb253"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb253-1"><a href="identification-data.html#cb253-1" aria-hidden="true" tabindex="-1"></a>(i <span class="ot">&lt;-</span> <span class="fu">which</span>(<span class="fu">rownames</span>(cmat) <span class="sc">==</span> <span class="st">"DLP"</span>))</span></code></pre></div>
-<pre><code>## [1]  2  4  9 10 13</code></pre>
-<div class="sourceCode" id="cb255"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb255-1"><a href="identification-data.html#cb255-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plotSpectra</span>(sp_O43175[i], <span class="at">labels =</span> addFragments,</span>
-<span id="cb255-2"><a href="identification-data.html#cb255-2" aria-hidden="true" tabindex="-1"></a>            <span class="at">labelPos =</span> <span class="dv">3</span>, <span class="at">labelCol =</span> <span class="st">"steelblue"</span>,</span>
-<span id="cb255-3"><a href="identification-data.html#cb255-3" aria-hidden="true" tabindex="-1"></a>            <span class="at">main =</span> sp_O43175<span class="sc">$</span>sequence[i])</span>
-<span id="cb255-4"><a href="identification-data.html#cb255-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb255-5"><a href="identification-data.html#cb255-5" aria-hidden="true" tabindex="-1"></a><span class="fu">spectraData</span>(sp_O43175[i])<span class="sc">$</span>precursorCharge</span></code></pre></div>
-<pre><code>## [1] 2 2 2 2 2</code></pre>
-<div class="sourceCode" id="cb257"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb257-1"><a href="identification-data.html#cb257-1" aria-hidden="true" tabindex="-1"></a><span class="fu">spectraData</span>(sp_O43175[i])<span class="sc">$</span>precursorMz</span></code></pre></div>
-<pre><code>## [1] 493.8055 515.3086 515.3084 493.8390 515.3087</code></pre>
-<div class="sourceCode" id="cb259"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb259-1"><a href="identification-data.html#cb259-1" aria-hidden="true" tabindex="-1"></a><span class="fu">spectraData</span>(sp_O43175[i])<span class="sc">$</span>modName</span></code></pre></div>
-<pre><code>## [1] NA         "Carbamyl" "Carbamyl" NA         "Carbamyl"</code></pre>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-62-2.png" width="672"></p>
-<div class="sourceCode" id="cb261"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb261-1"><a href="identification-data.html#cb261-1" aria-hidden="true" tabindex="-1"></a><span class="do">## Directly compare spectra with/without Carbamyl</span></span>
-<span id="cb261-2"><a href="identification-data.html#cb261-2" aria-hidden="true" tabindex="-1"></a><span class="fu">plotSpectraMirror</span>(sp_O43175[<span class="dv">4</span>], sp_O43175[<span class="dv">9</span>])</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-63-1.png" width="672"></p>
-<div class="sourceCode" id="cb262"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb262-1"><a href="identification-data.html#cb262-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plotSpectraMirror</span>(sp_O43175[<span class="dv">2</span>], sp_O43175[<span class="dv">10</span>])</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-63-2.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-</div>
-<div id="exploration-and-assessment-of-identifications-using-msnid" class="section level2" number="4.10">
-<h2>
-<span class="header-section-number">4.10</span> Exploration and Assessment of Identifications using <code>MSnID</code>
-<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('exploration-and-assessment-of-identifications-using-msnid')" onmouseout="reset_tooltip('exploration-and-assessment-of-identifications-using-msnid-tooltip')"><span class="tooltiptext" id="exploration-and-assessment-of-identifications-using-msnid-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>The <code>MSnID</code> package extracts MS/MS ID data from mzIdentML (leveraging
-the <code>mzID</code> package) or text files. After collating the search results
-from multiple datasets it assesses their identification quality and
-optimises filtering criteria to achieve the maximum number of
-identifications while not exceeding a specified false discovery
-rate. It also contains a number of utilities to explore the MS/MS
-results and assess missed and irregular enzymatic cleavages, mass
-measurement accuracy, etc.</p>
-<div id="step-by-step-work-flow" class="section level3" number="4.10.1">
-<h3>
-<span class="header-section-number">4.10.1</span> Step-by-step work-flow<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('step-by-step-work-flow')" onmouseout="reset_tooltip('step-by-step-work-flow-tooltip')"><span class="tooltiptext" id="step-by-step-work-flow-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Let’s reproduce parts of the analysis described the <code>MSnID</code>
-vignette. You can explore more with</p>
-<div class="sourceCode" id="cb263"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb263-1"><a href="identification-data.html#cb263-1" aria-hidden="true" tabindex="-1"></a><span class="fu">vignette</span>(<span class="st">"msnid_vignette"</span>, <span class="at">package =</span> <span class="st">"MSnID"</span>)</span></code></pre></div>
-<p>The <em><a href="https://bioconductor.org/packages/3.14/MSnID">MSnID</a></em> package can be used for post-search filtering
-of MS/MS identifications. One starts with the construction of an
-<code>MSnID</code> object that is populated with identification results that can
-be imported from a <code>data.frame</code> or from <code>mzIdenML</code> files. Here, we
-will use the example identification data provided with the package.</p>
-<div class="sourceCode" id="cb264"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb264-1"><a href="identification-data.html#cb264-1" aria-hidden="true" tabindex="-1"></a>mzids <span class="ot">&lt;-</span> <span class="fu">system.file</span>(<span class="st">"extdata"</span>, <span class="st">"c_elegans.mzid.gz"</span>, <span class="at">package=</span><span class="st">"MSnID"</span>)</span>
-<span id="cb264-2"><a href="identification-data.html#cb264-2" aria-hidden="true" tabindex="-1"></a><span class="fu">basename</span>(mzids)</span></code></pre></div>
-<pre><code>## [1] "c_elegans.mzid.gz"</code></pre>
-<p>We start by loading the package, initialising the <code>MSnID</code> object, and
-add the identification result from our <code>mzid</code> file (there could of
-course be more that one).</p>
-<div class="sourceCode" id="cb266"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb266-1"><a href="identification-data.html#cb266-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"MSnID"</span>)</span></code></pre></div>
-<pre><code>## 
-## Attaching package: 'MSnID'</code></pre>
-<pre><code>## The following object is masked from 'package:ProtGenerics':
-## 
-##     peptides</code></pre>
-<div class="sourceCode" id="cb269"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb269-1"><a href="identification-data.html#cb269-1" aria-hidden="true" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">MSnID</span>(<span class="st">"."</span>)</span></code></pre></div>
-<pre><code>## Note, the anticipated/suggested columns in the
-## peptide-to-spectrum matching results are:
-## -----------------------------------------------
-## accession
-## calculatedMassToCharge
-## chargeState
-## experimentalMassToCharge
-## isDecoy
-## peptide
-## spectrumFile
-## spectrumID</code></pre>
-<div class="sourceCode" id="cb271"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb271-1"><a href="identification-data.html#cb271-1" aria-hidden="true" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">read_mzIDs</span>(msnid, mzids)</span></code></pre></div>
-<pre><code>## Loaded cached data</code></pre>
-<div class="sourceCode" id="cb273"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb273-1"><a href="identification-data.html#cb273-1" aria-hidden="true" tabindex="-1"></a><span class="fu">show</span>(msnid)</span></code></pre></div>
-<pre><code>## MSnID object
-## Working directory: "."
-## #Spectrum Files:  1 
-## #PSMs: 12263 at 36 % FDR
-## #peptides: 9489 at 44 % FDR
-## #accessions: 7414 at 76 % FDR</code></pre>
-<p>Printing the <code>MSnID</code> object returns some basic information such as</p>
-<ul>
-<li>Working directory.</li>
-<li>Number of spectrum files used to generate data.</li>
-<li>Number of peptide-to-spectrum matches and corresponding FDR.</li>
-<li>Number of unique peptide sequences and corresponding FDR.</li>
-<li>Number of unique proteins or amino acid sequence accessions and corresponding FDR.</li>
-</ul>
-<p>The package then enables to define, optimise and apply filtering based
-for example on missed cleavages, identification scores, precursor mass
-errors, etc. and assess PSM, peptide and protein FDR levels. To
-properly function, it expects to have access to the following data</p>
-<pre><code>## [1] "accession"                "calculatedMassToCharge"  
-## [3] "chargeState"              "experimentalMassToCharge"
-## [5] "isDecoy"                  "peptide"                 
-## [7] "spectrumFile"             "spectrumID"</code></pre>
-<p>which are indeed present in our data:</p>
-<div class="sourceCode" id="cb276"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb276-1"><a href="identification-data.html#cb276-1" aria-hidden="true" tabindex="-1"></a><span class="fu">names</span>(msnid)</span></code></pre></div>
-<pre><code>##  [1] "spectrumID"                "scan number(s)"           
-##  [3] "acquisitionNum"            "passThreshold"            
-##  [5] "rank"                      "calculatedMassToCharge"   
-##  [7] "experimentalMassToCharge"  "chargeState"              
-##  [9] "MS-GF:DeNovoScore"         "MS-GF:EValue"             
-## [11] "MS-GF:PepQValue"           "MS-GF:QValue"             
-## [13] "MS-GF:RawScore"            "MS-GF:SpecEValue"         
-## [15] "AssumedDissociationMethod" "IsotopeError"             
-## [17] "isDecoy"                   "post"                     
-## [19] "pre"                       "end"                      
-## [21] "start"                     "accession"                
-## [23] "length"                    "description"              
-## [25] "pepSeq"                    "modified"                 
-## [27] "modification"              "idFile"                   
-## [29] "spectrumFile"              "databaseFile"             
-## [31] "peptide"</code></pre>
-<p>Here, we summarise a few steps and redirect the reader to the
-package’s vignette for more details:</p>
-</div>
-<div id="analysis-of-peptide-sequences" class="section level3" number="4.10.2">
-<h3>
-<span class="header-section-number">4.10.2</span> Analysis of peptide sequences<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('analysis-of-peptide-sequences')" onmouseout="reset_tooltip('analysis-of-peptide-sequences-tooltip')"><span class="tooltiptext" id="analysis-of-peptide-sequences-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Cleaning irregular cleavages at the termini of the peptides and
-missing cleavage site within the peptide sequences. The following two
-function call create the new <code>numMisCleavages</code> and <code>numIrregCleavages</code>
-columns in the <code>MSnID</code> object</p>
-<div class="sourceCode" id="cb278"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb278-1"><a href="identification-data.html#cb278-1" aria-hidden="true" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">assess_termini</span>(msnid, <span class="at">validCleavagePattern=</span><span class="st">"[KR]</span><span class="sc">\\</span><span class="st">.[^P]"</span>)</span>
-<span id="cb278-2"><a href="identification-data.html#cb278-2" aria-hidden="true" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">assess_missed_cleavages</span>(msnid, <span class="at">missedCleavagePattern=</span><span class="st">"[KR](?=[^P$])"</span>)</span></code></pre></div>
-</div>
-<div id="trimming-the-data" class="section level3" number="4.10.3">
-<h3>
-<span class="header-section-number">4.10.3</span> Trimming the data<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('trimming-the-data')" onmouseout="reset_tooltip('trimming-the-data-tooltip')"><span class="tooltiptext" id="trimming-the-data-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Now, we can use the <code>apply_filter</code> function to effectively apply
-filters. The strings passed to the function represent expressions that
-will be evaluated, thus keeping only PSMs that have 0 irregular
-cleavages and 2 or less missed cleavages.</p>
-<div class="sourceCode" id="cb279"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb279-1"><a href="identification-data.html#cb279-1" aria-hidden="true" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">apply_filter</span>(msnid, <span class="st">"numIrregCleavages == 0"</span>)</span>
-<span id="cb279-2"><a href="identification-data.html#cb279-2" aria-hidden="true" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">apply_filter</span>(msnid, <span class="st">"numMissCleavages &lt;= 2"</span>)</span>
-<span id="cb279-3"><a href="identification-data.html#cb279-3" aria-hidden="true" tabindex="-1"></a><span class="fu">show</span>(msnid)</span></code></pre></div>
-<pre><code>## MSnID object
-## Working directory: "."
-## #Spectrum Files:  1 
-## #PSMs: 7838 at 17 % FDR
-## #peptides: 5598 at 23 % FDR
-## #accessions: 3759 at 53 % FDR</code></pre>
-</div>
-<div id="parent-ion-mass-errors" class="section level3" number="4.10.4">
-<h3>
-<span class="header-section-number">4.10.4</span> Parent ion mass errors<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('parent-ion-mass-errors')" onmouseout="reset_tooltip('parent-ion-mass-errors-tooltip')"><span class="tooltiptext" id="parent-ion-mass-errors-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Using <code>"calculatedMassToCharge"</code> and <code>"experimentalMassToCharge"</code>, the
-<code>mass_measurement_error</code> function calculates the parent ion mass
-measurement error in parts per million.</p>
-<div class="sourceCode" id="cb281"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb281-1"><a href="identification-data.html#cb281-1" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(<span class="fu">mass_measurement_error</span>(msnid))</span></code></pre></div>
-<pre><code>##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-## -2184.0640    -0.6992     0.0000    17.6146     0.7512  2012.5178</code></pre>
-<p>We then filter any matches that do not fit the +/- 20 ppm tolerance</p>
-<div class="sourceCode" id="cb283"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb283-1"><a href="identification-data.html#cb283-1" aria-hidden="true" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">apply_filter</span>(msnid, <span class="st">"abs(mass_measurement_error(msnid)) &lt; 20"</span>)</span>
-<span id="cb283-2"><a href="identification-data.html#cb283-2" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(<span class="fu">mass_measurement_error</span>(msnid))</span></code></pre></div>
-<pre><code>##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-## -19.7797  -0.5866   0.0000  -0.2970   0.5713  19.6758</code></pre>
-</div>
-<div id="filtering-criteria" class="section level3" number="4.10.5">
-<h3>
-<span class="header-section-number">4.10.5</span> Filtering criteria<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('filtering-criteria')" onmouseout="reset_tooltip('filtering-criteria-tooltip')"><span class="tooltiptext" id="filtering-criteria-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Filtering of the identification data will rely on</p>
-<ul>
-<li>-log10 transformed MS-GF+ Spectrum E-value, reflecting the goodness
-of match experimental and theoretical fragmentation patterns</li>
-</ul>
-<div class="sourceCode" id="cb285"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb285-1"><a href="identification-data.html#cb285-1" aria-hidden="true" tabindex="-1"></a>msnid<span class="sc">$</span>msmsScore <span class="ot">&lt;-</span> <span class="sc">-</span><span class="fu">log10</span>(msnid<span class="sc">$</span><span class="st">`</span><span class="at">MS-GF:SpecEValue</span><span class="st">`</span>)</span></code></pre></div>
-<ul>
-<li>the absolute mass measurement error (in ppm units) of the parent ion</li>
-</ul>
-<div class="sourceCode" id="cb286"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb286-1"><a href="identification-data.html#cb286-1" aria-hidden="true" tabindex="-1"></a>msnid<span class="sc">$</span>absParentMassErrorPPM <span class="ot">&lt;-</span> <span class="fu">abs</span>(<span class="fu">mass_measurement_error</span>(msnid))</span></code></pre></div>
-</div>
-<div id="setting-filters" class="section level3" number="4.10.6">
-<h3>
-<span class="header-section-number">4.10.6</span> Setting filters<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('setting-filters')" onmouseout="reset_tooltip('setting-filters-tooltip')"><span class="tooltiptext" id="setting-filters-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>MS2 filters are handled by a special <code>MSnIDFilter</code> class objects, where
-individual filters are set by name (that is present in <code>names(msnid)</code>)
-and comparison operator (&gt;, &lt;, = , …) defining if we should retain
-hits with higher or lower given the threshold and finally the
-threshold value itself.</p>
-<div class="sourceCode" id="cb287"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb287-1"><a href="identification-data.html#cb287-1" aria-hidden="true" tabindex="-1"></a>filtObj <span class="ot">&lt;-</span> <span class="fu">MSnIDFilter</span>(msnid)</span>
-<span id="cb287-2"><a href="identification-data.html#cb287-2" aria-hidden="true" tabindex="-1"></a>filtObj<span class="sc">$</span>absParentMassErrorPPM <span class="ot">&lt;-</span> <span class="fu">list</span>(<span class="at">comparison=</span><span class="st">"&lt;"</span>, <span class="at">threshold=</span><span class="fl">10.0</span>)</span>
-<span id="cb287-3"><a href="identification-data.html#cb287-3" aria-hidden="true" tabindex="-1"></a>filtObj<span class="sc">$</span>msmsScore <span class="ot">&lt;-</span> <span class="fu">list</span>(<span class="at">comparison=</span><span class="st">"&gt;"</span>, <span class="at">threshold=</span><span class="fl">10.0</span>)</span>
-<span id="cb287-4"><a href="identification-data.html#cb287-4" aria-hidden="true" tabindex="-1"></a><span class="fu">show</span>(filtObj)</span></code></pre></div>
-<pre><code>## MSnIDFilter object
-## (absParentMassErrorPPM &lt; 10) &amp; (msmsScore &gt; 10)</code></pre>
-<p>We can then evaluate the filter on the identification data object,
-which return the false discovery rate and number of retained
-identifications for the filtering criteria at hand.</p>
-<div class="sourceCode" id="cb289"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb289-1"><a href="identification-data.html#cb289-1" aria-hidden="true" tabindex="-1"></a><span class="fu">evaluate_filter</span>(msnid, filtObj)</span></code></pre></div>
-<pre><code>##           fdr    n
-## PSM         0 3807
-## peptide     0 2455
-## accession   0 1009</code></pre>
-</div>
-<div id="filter-optimisation" class="section level3" number="4.10.7">
-<h3>
-<span class="header-section-number">4.10.7</span> Filter optimisation<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('filter-optimisation')" onmouseout="reset_tooltip('filter-optimisation-tooltip')"><span class="tooltiptext" id="filter-optimisation-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Rather than setting filtering values by hand, as shown above, these
-can be set automatically to meet a specific false discovery rate.</p>
-<div class="sourceCode" id="cb291"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb291-1"><a href="identification-data.html#cb291-1" aria-hidden="true" tabindex="-1"></a>filtObj.grid <span class="ot">&lt;-</span> <span class="fu">optimize_filter</span>(filtObj, msnid, <span class="at">fdr.max=</span><span class="fl">0.01</span>,</span>
-<span id="cb291-2"><a href="identification-data.html#cb291-2" aria-hidden="true" tabindex="-1"></a>                                <span class="at">method=</span><span class="st">"Grid"</span>, <span class="at">level=</span><span class="st">"peptide"</span>,</span>
-<span id="cb291-3"><a href="identification-data.html#cb291-3" aria-hidden="true" tabindex="-1"></a>                                <span class="at">n.iter=</span><span class="dv">500</span>)</span>
-<span id="cb291-4"><a href="identification-data.html#cb291-4" aria-hidden="true" tabindex="-1"></a><span class="fu">show</span>(filtObj.grid)</span></code></pre></div>
-<pre><code>## MSnIDFilter object
-## (absParentMassErrorPPM &lt; 3) &amp; (msmsScore &gt; 7.4)</code></pre>
-<div class="sourceCode" id="cb293"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb293-1"><a href="identification-data.html#cb293-1" aria-hidden="true" tabindex="-1"></a><span class="fu">evaluate_filter</span>(msnid, filtObj.grid)</span></code></pre></div>
-<pre><code>##                   fdr    n
-## PSM       0.004097561 5146
-## peptide   0.006447651 3278
-## accession 0.021996616 1208</code></pre>
-<p>Filters can eventually be applied (rather than just evaluated) using
-the <code>apply_filter</code> function.</p>
-<div class="sourceCode" id="cb295"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb295-1"><a href="identification-data.html#cb295-1" aria-hidden="true" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">apply_filter</span>(msnid, filtObj.grid)</span>
-<span id="cb295-2"><a href="identification-data.html#cb295-2" aria-hidden="true" tabindex="-1"></a><span class="fu">show</span>(msnid)</span></code></pre></div>
-<pre><code>## MSnID object
-## Working directory: "."
-## #Spectrum Files:  1 
-## #PSMs: 5146 at 0.41 % FDR
-## #peptides: 3278 at 0.64 % FDR
-## #accessions: 1208 at 2.2 % FDR</code></pre>
-<p>And finally, identifications that matched decoy and contaminant
-protein sequences are removed</p>
-<div class="sourceCode" id="cb297"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb297-1"><a href="identification-data.html#cb297-1" aria-hidden="true" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">apply_filter</span>(msnid, <span class="st">"isDecoy == FALSE"</span>)</span>
-<span id="cb297-2"><a href="identification-data.html#cb297-2" aria-hidden="true" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">apply_filter</span>(msnid, <span class="st">"!grepl('Contaminant',accession)"</span>)</span>
-<span id="cb297-3"><a href="identification-data.html#cb297-3" aria-hidden="true" tabindex="-1"></a><span class="fu">show</span>(msnid)</span></code></pre></div>
-<pre><code>## MSnID object
-## Working directory: "."
-## #Spectrum Files:  1 
-## #PSMs: 5117 at 0 % FDR
-## #peptides: 3251 at 0 % FDR
-## #accessions: 1179 at 0 % FDR</code></pre>
-</div>
-<div id="export-msnid-data" class="section level3" number="4.10.8">
-<h3>
-<span class="header-section-number">4.10.8</span> Export <code>MSnID</code> data<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('export-msnid-data')" onmouseout="reset_tooltip('export-msnid-data-tooltip')"><span class="tooltiptext" id="export-msnid-data-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>The resulting filtered identification data can be exported to a
-<code>data.frame</code> (or to a dedicated <code>MSnSet</code> data structure from the
-<code>MSnbase</code> package) for quantitative MS data, described below, and
-further processed and analyses using appropriate statistical tests.</p>
-<div class="sourceCode" id="cb299"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb299-1"><a href="identification-data.html#cb299-1" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(<span class="fu">psms</span>(msnid))</span></code></pre></div>
-<pre><code>##   spectrumID scan number(s) acquisitionNum passThreshold rank
-## 1 index=7151           8819           7151          TRUE    1
-## 2 index=8520          10419           8520          TRUE    1
-##   calculatedMassToCharge experimentalMassToCharge chargeState MS-GF:DeNovoScore
-## 1               1270.318                 1270.318           3               287
-## 2               1426.737                 1426.739           3               270
-##   MS-GF:EValue MS-GF:PepQValue MS-GF:QValue MS-GF:RawScore MS-GF:SpecEValue
-## 1 1.709082e-24               0            0            239     1.007452e-31
-## 2 3.780745e-24               0            0            230     2.217275e-31
-##   AssumedDissociationMethod IsotopeError isDecoy post pre end start accession
-## 1                       CID            0   FALSE    A   K 283   249   CE02347
-## 2                       CID            0   FALSE    A   K 182   142   CE07055
-##   length
-## 1    393
-## 2    206
-##                                                                                                                           description
-## 1 WBGene00001993; locus:hpd-1; 4-hydroxyphenylpyruvate dioxygenase; status:Confirmed; UniProt:Q22633; protein_id:CAA90315.1; T21C12.2
-## 2           WBGene00001755; locus:gst-7; glutathione S-transferase; status:Confirmed; UniProt:P91253; protein_id:AAB37846.1; F11G11.2
-##                                      pepSeq modified modification
-## 1       AISQIQEYVDYYGGSGVQHIALNTSDIITAIEALR    FALSE         &lt;NA&gt;
-## 2 SAGSGYLVGDSLTFVDLLVAQHTADLLAANAALLDEFPQFK    FALSE         &lt;NA&gt;
-##              idFile                                   spectrumFile
-## 1 c_elegans.mzid.gz c_elegans_A_3_1_21Apr10_Draco_10-03-04_dta.txt
-## 2 c_elegans.mzid.gz c_elegans_A_3_1_21Apr10_Draco_10-03-04_dta.txt
-##               databaseFile                                       peptide
-## 1 ID_004174_E48C5B52.fasta       K.AISQIQEYVDYYGGSGVQHIALNTSDIITAIEALR.A
-## 2 ID_004174_E48C5B52.fasta K.SAGSGYLVGDSLTFVDLLVAQHTADLLAANAALLDEFPQFK.A
-##   numIrregCleavages numMissCleavages msmsScore absParentMassErrorPPM
-## 1                 0                0  30.99678             0.3843772
-## 2                 0                0  30.65418             1.3689451
-##  [ reached 'max' / getOption("max.print") -- omitted 4 rows ]</code></pre>
-
-</div>
-</div>
-</div>
-<h3>References<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('NA')" onmouseout="reset_tooltip('NA-tooltip')"><span class="tooltiptext" id="NA-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<div id="refs" class="references csl-bib-body hanging-indent">
-<div id="ref-Morgenstern:2020" class="csl-entry">
-Morgenstern, David, Rotem Barzilay, and Yishai Levin. 2021. <span>“<span>RawBeans</span>: A Simple, Vendor-Independent, Raw-Data Quality-Control Tool.”</span> <em>Journal of Proteome Research</em>. <a href="https://doi.org/10.1021/acs.jproteome.0c00956">https://doi.org/10.1021/acs.jproteome.0c00956</a>.
-</div>
-</div>
-</body></html>
-
-<p style="text-align: center;">
-<a href="raw-ms-data.html"><button class="btn btn-default">Previous</button></a>
-<a href="sec:quant.html"><button class="btn btn-default">Next</button></a>
-</p>
-<p class="build-date">Page built: 
-2021-08-31
- using 
-R version 4.1.0 (2021-05-18)
-</p>
-</div>
-</div>
-
-
-
-</body>
-</html>
diff --git a/docs/index.html b/docs/index.html
deleted file mode 100644
index 45ecb9b..0000000
--- a/docs/index.html
+++ /dev/null
@@ -1,325 +0,0 @@
-<!DOCTYPE html>
-<html lang="" xml:lang="">
-<head>
-
-<meta charset="utf-8" />
-<meta name="generator" content="pandoc" />
-<meta name="viewport" content="width=device-width, initial-scale=1" />
-<meta property="og:title" content="Chapter 1 Preamble | R for Mass Spectrometry" />
-<meta property="og:type" content="book" />
-
-
-
-
-<meta name="author" content="Laurent Gatto, Sebastian Gibb, Johannes Rainer" />
-
-<meta name="date" content="2023-09-06" />
-
-
-<meta name="description" content="Chapter 1 Preamble | R for Mass Spectrometry">
-
-<title>Chapter 1 Preamble | R for Mass Spectrometry</title>
-
-<link href="libs/tufte-css-2015.12.29/tufte.css" rel="stylesheet" />
-<link href="libs/tufte-css-2015.12.29/envisioned.css" rel="stylesheet" />
-<link href="libs/msmb-css-0/msmb.css" rel="stylesheet" />
-<script>
-function toggle_visibility(id1, id2) {
-var e = document.getElementById(id1);
-var f = document.getElementById(id2);
-
-e.style.display = ((e.style.display!='none') ? 'none' : 'block');
-
-if(f.classList.contains('fa-plus-square')) {
-    f.classList.add('fa-minus-square')
-    f.classList.remove('fa-plus-square')
-} else {
-    f.classList.add('fa-plus-square')
-    f.classList.remove('fa-minus-square')
-}
-
-}
-</script>
-<script>
-function copy_link(id) {
-  var dummy = document.createElement('input'),
-  text = window.location.href.split(/[?#]/)[0] + '#' + id;
-  document.body.appendChild(dummy);
-  dummy.value = text;
-  dummy.select();
-  document.execCommand('copy');
-  document.body.removeChild(dummy);
-  
-  var tooltip = document.getElementById(id + '-tooltip');
-  tooltip.innerHTML = 'Copied!';
-}
-
-function reset_tooltip(id) {
-  var tooltip = document.getElementById(id);
-  tooltip.innerHTML = 'Copy link';
-}
-</script>
-
-
-<style type="text/css">code{white-space: pre;}</style>
-<style type="text/css">
-pre > code.sourceCode { white-space: pre; position: relative; }
-pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
-pre > code.sourceCode > span:empty { height: 1.2em; }
-
-code.sourceCode > span { color: inherit; text-decoration: inherit; }
-div.sourceCode { margin: 1em 0; }
-pre.sourceCode { margin: 0; }
-@media screen {
-div.sourceCode { overflow: auto; }
-}
-@media print {
-pre > code.sourceCode { white-space: pre-wrap; }
-pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
-}
-pre.numberSource code
-  { counter-reset: source-line 0; }
-pre.numberSource code > span
-  { position: relative; left: -4em; counter-increment: source-line; }
-pre.numberSource code > span > a:first-child::before
-  { content: counter(source-line);
-    position: relative; left: -1em; text-align: right; vertical-align: baseline;
-    border: none; display: inline-block;
-    -webkit-touch-callout: none; -webkit-user-select: none;
-    -khtml-user-select: none; -moz-user-select: none;
-    -ms-user-select: none; user-select: none;
-    padding: 0 4px; width: 4em;
-    color: #aaaaaa;
-  }
-pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
-div.sourceCode
-  {  background-color: #f8f8f8; }
-@media screen {
-pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
-}
-code span.al { color: #ef2929; } /* Alert */
-code span.an { color: #8f5902; font-weight: bold; font-style: italic; } /* Annotation */
-code span.at { color: #204a87; } /* Attribute */
-code span.bn { color: #0000cf; } /* BaseN */
-code span.cf { color: #204a87; font-weight: bold; } /* ControlFlow */
-code span.ch { color: #4e9a06; } /* Char */
-code span.cn { color: #8f5902; } /* Constant */
-code span.co { color: #8f5902; font-style: italic; } /* Comment */
-code span.cv { color: #8f5902; font-weight: bold; font-style: italic; } /* CommentVar */
-code span.do { color: #8f5902; font-weight: bold; font-style: italic; } /* Documentation */
-code span.dt { color: #204a87; } /* DataType */
-code span.dv { color: #0000cf; } /* DecVal */
-code span.er { color: #a40000; font-weight: bold; } /* Error */
-code span.ex { } /* Extension */
-code span.fl { color: #0000cf; } /* Float */
-code span.fu { color: #204a87; font-weight: bold; } /* Function */
-code span.im { } /* Import */
-code span.in { color: #8f5902; font-weight: bold; font-style: italic; } /* Information */
-code span.kw { color: #204a87; font-weight: bold; } /* Keyword */
-code span.op { color: #ce5c00; font-weight: bold; } /* Operator */
-code span.ot { color: #8f5902; } /* Other */
-code span.pp { color: #8f5902; font-style: italic; } /* Preprocessor */
-code span.sc { color: #ce5c00; font-weight: bold; } /* SpecialChar */
-code span.ss { color: #4e9a06; } /* SpecialString */
-code span.st { color: #4e9a06; } /* String */
-code span.va { color: #000000; } /* Variable */
-code span.vs { color: #4e9a06; } /* VerbatimString */
-code span.wa { color: #8f5902; font-weight: bold; font-style: italic; } /* Warning */
-</style>
-
-
-<style type="text/css">
-/* Used with Pandoc 2.11+ new --citeproc when CSL is used */
-div.csl-bib-body { }
-div.csl-entry {
-  clear: both;
-}
-.hanging div.csl-entry {
-  margin-left:2em;
-  text-indent:-2em;
-}
-div.csl-left-margin {
-  min-width:2em;
-  float:left;
-}
-div.csl-right-inline {
-  margin-left:2em;
-  padding-left:1em;
-}
-div.csl-indent {
-  margin-left: 2em;
-}
-</style>
-
-<link rel="stylesheet" href="style.css" type="text/css" />
-
-</head>
-
-<body>
-
-
-
-<div class="row">
-<div class="col-sm-12">
-<div id="TOC">
-<ul class="navbar">
-<li class="msmb"><p class="title">R for Mass Spectrometry<p><p class="author">Laurent Gatto, Sebastian Gibb, Johannes Rainer</p>
-<li class="dropdown" style="float:right">
-<a href="javascript:void(0)" class="dropbtn">&#x25BE; Chapters</a>
-<div class="dropdown-content">
-<a id="active-page" href="index.html" id="toc-preamble"><span class="toc-section-number">1</span> Preamble</a><ul class="toc-sections">
-<li class="toc"><a href="#targeted-audience-and-assumed-background">Targeted audience and assumed background</a></li>
-<li class="toc"><a href="#setup">Setup</a></li>
-<li class="toc"><a href="#acknowledgments">Acknowledgments</a></li>
-<li class="toc"><a href="#license">License</a></li>
-</ul>
-<a href="sec-msintro.html" id="toc-sec-msintro"><span class="toc-section-number">2</span> Introduction</a>
-<a href="sec-raw.html" id="toc-sec-raw"><span class="toc-section-number">3</span> Raw MS data</a>
-<a href="sec-id.html" id="toc-sec-id"><span class="toc-section-number">4</span> Identification data</a>
-<a href="sec-quant.html" id="toc-sec-quant"><span class="toc-section-number">5</span> Quantitative data</a>
-<a href="sec-anx.html" id="toc-sec-anx"><span class="toc-section-number">6</span> Annex</a>
-<a href="sec-si.html" id="toc-sec-si"><span class="toc-section-number">7</span> Additional materials and session information</a>
-</div>
-</li>
-</ul>
-</div>
-</div>
-</div>
-<div class="row">
-<div class="col-sm-12">
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
-<!--bookdown:title:end--><!--bookdown:title:start--><html><body><div id="preamble" class="section level1" number="1">
-<h1>
-<span class="header-section-number">Chapter 1</span> Preamble</h1>
-<p>The aim of the <a href="https://www.rformassspectrometry.org/">R for Mass
-Spectrometry</a> initiative is to
-provide efficient, thoroughly documented, tested and flexible R
-software for the analysis and interpretation of high throughput mass
-spectrometry assays, including proteomics and metabolomics
-experiments. The project formalises the longtime collaborative
-development efforts of its core members under the RforMassSpectrometry
-organisation to facilitate dissemination and accessibility of their
-work.</p>
-<p>
-<span class="marginnote shownote">
-<!--
-<div class="figure">--><span style="display:block;" id="fig:sticker"></span>
-<img src="https://github.com/rformassspectrometry/stickers/raw/master/sticker/RforMassSpectrometry.png" alt="The *R for Mass Spectrometry* intiative sticker, designed by Johannes Rainer." width="50%"><!--
-<p class="caption marginnote">-->Figure 1.1: The <em>R for Mass Spectrometry</em> intiative sticker, designed by Johannes Rainer.<!--</p>-->
-<!--</div>--></span>
-</p>
-<p>This material introduces participants to the analysis and exploration
-of mass spectrometry (MS) based proteomics data using R and
-Bioconductor. The course will cover all levels of MS data, from raw
-data to identification and quantitation data, up to the statistical
-interpretation of a typical shotgun MS experiment and will focus on
-hands-on tutorials. At the end of this course, the participants will
-be able to manipulate MS data in R and use existing packages for their
-exploratory and statistical proteomics data analysis.</p>
-<div id="targeted-audience-and-assumed-background" class="section level2 unnumbered">
-<h2>Targeted audience and assumed background<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('targeted-audience-and-assumed-background')" onmouseout="reset_tooltip('targeted-audience-and-assumed-background-tooltip')"><span class="tooltiptext" id="targeted-audience-and-assumed-background-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>The course material is targeted to either proteomics practitioners or
-data analysts/bioinformaticians that would like to learn how to use R
-and Bioconductor to analyse proteomics data. Familiarity with MS or
-proteomics in general is desirable, but not essential as we will walk
-through and describe a typical MS data as part of learning about the
-tools. For approachable introductions to sample preparation, mass
-spectrometry, data interpretation and analysis, readers are redirected
-to:</p>
-<ul>
-<li>
-<em>A beginner’s guide to mass spectrometry–based proteomics</em> <span class="citation">(<label for="tufte-mn-1" class="margin-toggle">⊕</label><input type="checkbox" id="tufte-mn-1" class="margin-toggle">Sinha and Mann 2020<span class="marginnote">Sinha, Ankit, and Matthias Mann. 2020. <span>“<span class="nocase">A beginner’s guide to mass spectrometry–based proteomics</span>.”</span> <em>The Biochemist</em>, September. <a href="https://doi.org/10.1042/BIO20200057">https://doi.org/10.1042/BIO20200057</a>.</span>)</span>
-</li>
-<li>
-<em>The ABC’s (and XYZ’s) of peptide sequencing</em> <span class="citation">(<label for="tufte-mn-2" class="margin-toggle">⊕</label><input type="checkbox" id="tufte-mn-2" class="margin-toggle">Steen and Mann 2004<span class="marginnote">Steen, Hanno, and Matthias Mann. 2004. <span>“The <span class="nocase">ABC’s</span> (and <span class="nocase">XYZ’s</span>) of Peptide Sequencing.”</span> <em>Nat. Rev. Mol. Cell Biol.</em> 5 (9): 699–711.</span>)</span>
-</li>
-<li>
-<em>How do shotgun proteomics algorithms identify proteins?</em> <span class="citation">(<label for="tufte-mn-3" class="margin-toggle">⊕</label><input type="checkbox" id="tufte-mn-3" class="margin-toggle">Marcotte 2007<span class="marginnote">Marcotte, Edward M. 2007. <span>“How Do Shotgun Proteomics Algorithms Identify Proteins?”</span> <em>Nat. Biotechnol.</em> 25 (7): 755–57.</span>)</span>
-</li>
-<li>
-<em>An Introduction to Mass Spectrometry-Based Proteomics</em> <span class="citation">(<label for="tufte-mn-4" class="margin-toggle">⊕</label><input type="checkbox" id="tufte-mn-4" class="margin-toggle">Shuken 2023<span class="marginnote">Shuken, Steven R. 2023. <span>“An Introduction to Mass <span>Spectrometry-Based</span> Proteomics.”</span> <em>J. Proteome Res.</em>, June.</span>)</span>
-</li>
-</ul>
-<p>A working knowledge of R (R syntax, commonly used functions, basic
-data structures such as data frames, vectors, matrices, … and their
-manipulation) is required. Familiarity with other Bioconductor omics
-data classes and the tidyverse syntax is useful, but not necessary.</p>
-</div>
-<div id="setup" class="section level2 unnumbered">
-<h2>Setup<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('setup')" onmouseout="reset_tooltip('setup-tooltip')"><span class="tooltiptext" id="setup-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>This material uses the latest version of the R for Mass Spectrometry
-package and their dependencies. It might thus be possible that even
-the latest Bioconductor stable version isn’t recent enough.</p>
-<p>To install all the necessary package, please use the latest release of
-R and execute:</p>
-<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="index.html#cb1-1" tabindex="-1"></a><span class="cf">if</span> (<span class="sc">!</span><span class="fu">requireNamespace</span>(<span class="st">"BiocManager"</span>, <span class="at">quietly =</span> <span class="cn">TRUE</span>))</span>
-<span id="cb1-2"><a href="index.html#cb1-2" tabindex="-1"></a>    <span class="fu">install.packages</span>(<span class="st">"BiocManager"</span>)</span>
-<span id="cb1-3"><a href="index.html#cb1-3" tabindex="-1"></a></span>
-<span id="cb1-4"><a href="index.html#cb1-4" tabindex="-1"></a>BiocManager<span class="sc">::</span><span class="fu">install</span>(<span class="st">"tidyverse"</span>)</span>
-<span id="cb1-5"><a href="index.html#cb1-5" tabindex="-1"></a>BiocManager<span class="sc">::</span><span class="fu">install</span>(<span class="st">"factoextra"</span>)</span>
-<span id="cb1-6"><a href="index.html#cb1-6" tabindex="-1"></a>BiocManager<span class="sc">::</span><span class="fu">install</span>(<span class="st">"msdata"</span>)</span>
-<span id="cb1-7"><a href="index.html#cb1-7" tabindex="-1"></a>BiocManager<span class="sc">::</span><span class="fu">install</span>(<span class="st">"mzR"</span>)</span>
-<span id="cb1-8"><a href="index.html#cb1-8" tabindex="-1"></a>BiocManager<span class="sc">::</span><span class="fu">install</span>(<span class="st">"rhdf5"</span>)</span>
-<span id="cb1-9"><a href="index.html#cb1-9" tabindex="-1"></a>BiocManager<span class="sc">::</span><span class="fu">install</span>(<span class="st">"rpx"</span>)</span>
-<span id="cb1-10"><a href="index.html#cb1-10" tabindex="-1"></a>BiocManager<span class="sc">::</span><span class="fu">install</span>(<span class="st">"MsCoreUtils"</span>)</span>
-<span id="cb1-11"><a href="index.html#cb1-11" tabindex="-1"></a>BiocManager<span class="sc">::</span><span class="fu">install</span>(<span class="st">"QFeatures"</span>)</span>
-<span id="cb1-12"><a href="index.html#cb1-12" tabindex="-1"></a>BiocManager<span class="sc">::</span><span class="fu">install</span>(<span class="st">"Spectra"</span>)</span>
-<span id="cb1-13"><a href="index.html#cb1-13" tabindex="-1"></a>BiocManager<span class="sc">::</span><span class="fu">install</span>(<span class="st">"ProtGenerics"</span>)</span>
-<span id="cb1-14"><a href="index.html#cb1-14" tabindex="-1"></a>BiocManager<span class="sc">::</span><span class="fu">install</span>(<span class="st">"PSMatch"</span>)</span>
-<span id="cb1-15"><a href="index.html#cb1-15" tabindex="-1"></a>BiocManager<span class="sc">::</span><span class="fu">install</span>(<span class="st">"pheatmap"</span>)</span>
-<span id="cb1-16"><a href="index.html#cb1-16" tabindex="-1"></a>BiocManager<span class="sc">::</span><span class="fu">install</span>(<span class="st">"limma"</span>)</span>
-<span id="cb1-17"><a href="index.html#cb1-17" tabindex="-1"></a>BiocManager<span class="sc">::</span><span class="fu">install</span>(<span class="st">"MSnID"</span>)</span>
-<span id="cb1-18"><a href="index.html#cb1-18" tabindex="-1"></a>BiocManager<span class="sc">::</span><span class="fu">install</span>(<span class="st">"RforMassSpectrometry/SpectraVis"</span>)</span></code></pre></div>
-<p>Follow the instructions in <a href="https://gist.github.com/lgatto/b1875458ed4e478ff6e87ce3b346352e">this
-script</a>
-to install the packages and download some of the data used in the
-following chapters. All software versions used to generate this
-document are recoded at the end of the book in <a href="sec-si.html#sec-si">7</a>.</p>
-<p>To compile and render the teaching material, you will also need
-the <em><a href="https://bioconductor.org/packages/3.17/BiocStyle">BiocStyle</a></em> package and the (slighly
-modified) <a href="https://www-huber.embl.de/users/msmith/msmbstyle/">Modern Statistics for Model Biology (msmb) HTML Book
-Style</a> by Mike
-Smith:</p>
-<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="index.html#cb2-1" tabindex="-1"></a>BiocManager<span class="sc">::</span><span class="fu">install</span>(<span class="fu">c</span>(<span class="st">"bookdown"</span>, <span class="st">"BiocStyle"</span>, <span class="st">"lgatto/msmbstyle"</span>))</span></code></pre></div>
-<p>Run the <a href="https://github.com/rformassspectrometry/docs/blob/main/install_docs_deps.R">installation
-script</a>
-by executing the line below to install all requirements to compile the
-book:</p>
-<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="index.html#cb3-1" tabindex="-1"></a><span class="fu">source</span>(<span class="st">"https://raw.githubusercontent.com/rformassspectrometry/docs/main/install_docs_deps.R"</span>)</span></code></pre></div>
-</div>
-<div id="acknowledgments" class="section level2 unnumbered">
-<h2>Acknowledgments<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('acknowledgments')" onmouseout="reset_tooltip('acknowledgments-tooltip')"><span class="tooltiptext" id="acknowledgments-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>Thank you to <a href="https://github.com/csoneson">Charlotte Soneson</a> for
-fixing many typos in a previous version of this book.</p>
-</div>
-<div id="license" class="section level2 unnumbered">
-<h2>License<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('license')" onmouseout="reset_tooltip('license-tooltip')"><span class="tooltiptext" id="license-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p><a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/"><img alt="Creative Commons Licence" style="border-width:0" src="https://i.creativecommons.org/l/by-sa/4.0/88x31.png"></a><br>This material is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">Creative Commons
-Attribution-ShareAlike 4.0 International License</a>. You are free to
-<strong>share</strong> (copy and redistribute the material in any medium or format)
-and <strong>adapt</strong> (remix, transform, and build upon the material) for any
-purpose, even commercially, as long as you give appropriate credit and
-distribute your contributions under the same license as the original.</p>
-
-</div>
-</div></body></html>
-
-<p style="text-align: center;">
-<a href="sec-msintro.html"><button class="btn btn-default">Next</button></a>
-</p>
-<p class="build-date">Page built: 
-2023-09-06
- using 
-R version 4.3.1 Patched (2023-07-10 r84676)
-</p>
-</div>
-</div>
-
-
-
-</body>
-</html>
diff --git a/docs/libs/accessible-code-block-0.0.1/empty-anchor.js b/docs/libs/accessible-code-block-0.0.1/empty-anchor.js
deleted file mode 100644
index ca349fd..0000000
--- a/docs/libs/accessible-code-block-0.0.1/empty-anchor.js
+++ /dev/null
@@ -1,15 +0,0 @@
-// Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
-// v0.0.1
-// Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.
-
-document.addEventListener('DOMContentLoaded', function() {
-  const codeList = document.getElementsByClassName("sourceCode");
-  for (var i = 0; i < codeList.length; i++) {
-    var linkList = codeList[i].getElementsByTagName('a');
-    for (var j = 0; j < linkList.length; j++) {
-      if (linkList[j].innerHTML === "") {
-        linkList[j].setAttribute('aria-hidden', 'true');
-      }
-    }
-  }
-});
diff --git a/docs/libs/header-attrs-2.10/header-attrs.js b/docs/libs/header-attrs-2.10/header-attrs.js
deleted file mode 100644
index dd57d92..0000000
--- a/docs/libs/header-attrs-2.10/header-attrs.js
+++ /dev/null
@@ -1,12 +0,0 @@
-// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
-// be compatible with the behavior of Pandoc < 2.8).
-document.addEventListener('DOMContentLoaded', function(e) {
-  var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
-  var i, h, a;
-  for (i = 0; i < hs.length; i++) {
-    h = hs[i];
-    if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
-    a = h.attributes;
-    while (a.length > 0) h.removeAttribute(a[0].name);
-  }
-});
diff --git a/docs/libs/header-attrs-2.11/header-attrs.js b/docs/libs/header-attrs-2.11/header-attrs.js
deleted file mode 100644
index dd57d92..0000000
--- a/docs/libs/header-attrs-2.11/header-attrs.js
+++ /dev/null
@@ -1,12 +0,0 @@
-// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
-// be compatible with the behavior of Pandoc < 2.8).
-document.addEventListener('DOMContentLoaded', function(e) {
-  var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
-  var i, h, a;
-  for (i = 0; i < hs.length; i++) {
-    h = hs[i];
-    if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
-    a = h.attributes;
-    while (a.length > 0) h.removeAttribute(a[0].name);
-  }
-});
diff --git a/docs/libs/header-attrs-2.12/header-attrs.js b/docs/libs/header-attrs-2.12/header-attrs.js
deleted file mode 100644
index dd57d92..0000000
--- a/docs/libs/header-attrs-2.12/header-attrs.js
+++ /dev/null
@@ -1,12 +0,0 @@
-// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
-// be compatible with the behavior of Pandoc < 2.8).
-document.addEventListener('DOMContentLoaded', function(e) {
-  var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
-  var i, h, a;
-  for (i = 0; i < hs.length; i++) {
-    h = hs[i];
-    if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
-    a = h.attributes;
-    while (a.length > 0) h.removeAttribute(a[0].name);
-  }
-});
diff --git a/docs/libs/header-attrs-2.7/header-attrs.js b/docs/libs/header-attrs-2.7/header-attrs.js
deleted file mode 100644
index dd57d92..0000000
--- a/docs/libs/header-attrs-2.7/header-attrs.js
+++ /dev/null
@@ -1,12 +0,0 @@
-// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
-// be compatible with the behavior of Pandoc < 2.8).
-document.addEventListener('DOMContentLoaded', function(e) {
-  var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
-  var i, h, a;
-  for (i = 0; i < hs.length; i++) {
-    h = hs[i];
-    if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
-    a = h.attributes;
-    while (a.length > 0) h.removeAttribute(a[0].name);
-  }
-});
diff --git a/docs/libs/header-attrs-2.9/header-attrs.js b/docs/libs/header-attrs-2.9/header-attrs.js
deleted file mode 100644
index dd57d92..0000000
--- a/docs/libs/header-attrs-2.9/header-attrs.js
+++ /dev/null
@@ -1,12 +0,0 @@
-// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
-// be compatible with the behavior of Pandoc < 2.8).
-document.addEventListener('DOMContentLoaded', function(e) {
-  var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
-  var i, h, a;
-  for (i = 0; i < hs.length; i++) {
-    h = hs[i];
-    if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
-    a = h.attributes;
-    while (a.length > 0) h.removeAttribute(a[0].name);
-  }
-});
diff --git a/docs/libs/msmb-css-0/msmb.css b/docs/libs/msmb-css-0/msmb.css
deleted file mode 100644
index 096e139..0000000
--- a/docs/libs/msmb-css-0/msmb.css
+++ /dev/null
@@ -1,452 +0,0 @@
-@import 'https://fonts.googleapis.com/css?family=Istok+Web|Oxygen|Source+Sans+Pro';
-@import 'https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.1/css/all.min.css';
-body {
-  font-family: 'Source Sans Pro', Arial, Helvetica, sans-serif;
-  background-color: #fefefe;
-  margin-top: 0;
-}
-
-.title {
-    font-size: 2.5rem;
-}
-
-.author {
-    font-size: 1.5rem;
-    padding-top: 10px;
-}
-
-.dedication {
-    font-style: italic;
-    text-align: center;
-}
-
-.proposition, .question-begin, .question-end, .exercise {
-  font-size: 1.4rem;
-  font-weight: bold;
-  margin-top: 1.2rem;
-  color: green;
-}
-
-.solution, .solution-begin, .solution-end {
-  font-size: 1.4rem;
-  font-weight: bold;
-  color: red;
-}
-
-/* we can remove this eventually
-its only here while we sort out the book*/
-.solution-body {
-    font-weight: normal;
-    color: black;
-}
-
-.solution-end, .question-end {
-/*  width: 55%; */
-/*  text-align: right; */
-/*  float: right; */
-/*  margin-top: -30px; */
-    height: 1em;
-}
-
-.solution-icon {
-    float: right;
-}
-
-.clickable {
-    cursor: pointer;
-}
-
-.margintab {
-  margin: 0;
-  width: 30%;
-  position: relative;
-  right: 10%;
-}
-
-.MJXc-display {
-  width: 55%;
-  font-size: 1.1rem;
-}
-
-.math {
-  font-size: 1.1rem;
-}
-
-h1 {
-  color: var(--main-bg-color);
-  width: 55%;
-  border-bottom-color: var(--main-bg-color);
-  border-bottom-style: solid;
-  padding-bottom: 0.5rem;
-}
-
-h2 {
-  color: var(--main-bg-color);
-  border-bottom-color: var(--main-bg-color);
-  border-bottom-style: solid;
-  width: 55%;
-  padding-bottom: 0.5rem;
-  margin-top: 3em;
-  font-weight: bold;
-}
-
-.tooltip>.internal-link-btn {
-  display: none;
-  float: none;
-  vertical-align: middle;
-  padding: 0;
-  margin-left: 10px;
-  border: none;
-  background: none;
-  color: #1881c2;
-}
-h2:hover>.tooltip>.internal-link-btn { display: inline }
-h3:hover>.tooltip>.internal-link-btn { display: inline }
-
-.internal-link>a:link {
-  text-decoration: none;
-  background-size: 0;
-}
-
-h3 {
-  color: var(--main-bg-color);
-  font-size: 1.5rem;
-}
-
-h4 {
-  font-size: 1.4rem;
-}
-
-li p {
-  margin-bottom: 0;
-}
-
-video {
-  width: 100%;
-}
-
-.shaka-video-container {
-  width: 55%;
-  max-width: 100%;
-}
-
-
-/* header bar */
-
-ul.navbar {
-    list-style-type: none;
-    margin: 0;
-    padding: 0;
-    overflow: hidden;
-    background-color: var(--main-bg-color);
-    top: 0;
-}
-
-li .dropbtn {
-    display: inline-block;
-    color: white;
-    text-align: center;
-    text-decoration: none;
-    text-shadow: none;
-    line-height: 2.0rem;
-    margin-right: 16px;
-    padding-top: 55px;
-    padding-bottom: 15px;
-}
-
-li .dropdown:hover .dropbtn {
-    background-color: red;
-}
-
-li.dropdown {
-    display: inline-block;
-    color: white;
-    padding: 0;
-}
-
-
-li .marginnote,li .sidenote {
-    width: 61%;
-    margin-right: -84.4%;
-}
-
-blockquote {
-  width: 45%;
-  -webkit-padding-start: 5%;
-  -webkit-padding-end: 5%;
-  -moz-padding-start: 5%;
-  -moz-padding-end: 5%;
-  display: block;
-  margin-block-end: 0;
-  margin-block-start: 0;
-  margin-inline-start: 0;
-  scroll-margin-inline-end: 0;
-}
-
-blockquote p {
-  width: 100%;
-  background-color: #F7F7F7;
-  font-size: 1.3rem;
-  font-style: italic;
-}
-
-blockquote .marginnote, blockquote .sidenote {
-    width: 61%;
-    margin-right: -84.4%;
-}
-
-.msmb {
-    display: inline-block;
-    color: white;
-    padding: 14px 16px;
-}
-
-.dropdown-content {
-    display: none;
-    position: absolute;
-    background-color: #f9f9f9;
-    width: 27.5%;
-    z-index: 1;
-    margin-right: 140px;
-    /*modify margin as window resized */
-    right: calc((87.5% - 1400px)/2);
-    max-width: 440px;
-    font-size: 1.2rem;
-}
-
-/* keeps the dropdown menu aligned with header
-when window exceeds 1615px */
-@media screen and (max-width: 1615px) {
-  .dropdown-content {
-    margin-right: 0;
-    right: 8.75%;
-  }
-}
-
-.dropdown-content a {
-    color: black;
-    padding: 4px 16px;
-    text-decoration: none;
-    display: block;
-    text-align: left;
-    text-shadow: none;
-}
-
-.dropdown-content a:hover {
-    background-color: rgb(24,129,194,0.4);
-}
-
-.dropdown:hover .dropdown-content {
-    display: block;
-}
-
-.dropdown a:link {
-    background-position-y: 0;
-    text-shadow: none;
-    background-size: 0;
-}
-
-#active-page {
-    background-color: var(--main-bg-color);
-    color: white;
-}
-
-.toc-sections {
-    font-size: 0.7rem;
-    padding-left: 0;
-    padding-right: 0;
-    background-color: rgb(24,129,194,0.3);
-    list-style-type: none;
-}
-
-li.toc {
-    padding: 0;
-    font-size: 1.1rem;
-}
-
-li.toc a {
-    padding-left: 10%;
-    padding-top: 1px;
-    padding-bottom: 1px;
-}
-
-/* formatting tables */
-.console {
-  width: 80%;
-  table-layout: fixed;
-  border: 0;
-  font-family: monospace;
-}
-
-.kable_wrapper {
-  border: 0;
-  table-layout: fixed;
-}
-
-.code {
-    font-size: 0.9em;
-}
-
-div.sourceCode {
-    width: 56%;
-    /*padding-left: 1%;*/
-}
-
-pre {
-    width: auto;
-    padding-top: 2px;
-    padding-bottom: 2px;
-    /*padding-left: 1%;
-    padding-right: 1%;*/
-    background-color: #F7F7F7;
-    margin-bottom: 10px;
-    margin-top: 3px;
-    margin-top: 4px;
-    overflow-x: scroll;
-}
-
-.build-date {
-  font-size: 0.8em;
-  float: left;
-  line-height: 1em;
-  margin-bottom: 2em;
-  margin-top: 0;
-}
-
-.tooltip {
-  position: relative;
-  display: inline-block;
-}
-
-.tooltip .tooltiptext {
-  visibility: hidden;
-  width: 70px;
-  background-color: #555;
-  color: #fff;
-  text-align: center;
-  border-radius: 6px;
-  padding: 5px;
-  position: absolute;
-  z-index: 1;
-  bottom: 100%;
-  left: 50%;
-  margin-left: -35px;
-  opacity: 0;
-  transition: opacity 0.3s;
-}
-
-.tooltip .tooltiptext::after {
-  content: "";
-  position: absolute;
-  top: 100%;
-  left: 50%;
-  margin-left: -5px;
-  border-width: 5px;
-  border-style: solid;
-  border-color: #555 transparent transparent transparent;
-}
-
-.tooltip:hover .tooltiptext {
-  visibility: visible;
-  opacity: 1;
-}
-
-
-.tooltip-eqn {
-  position: relative;
-  z-index: 100;
-}
-
-.tooltip-eqn .tooltiptext {
-  visibility: hidden;
-  width: 70px;
-  background-color: #555;
-  color: #fff;
-  text-align: center;
-  border-radius: 6px;
-  padding: 5px;
-  position: absolute;
-  z-index: 1;
-  bottom: 150%;
-  margin-left: -35px;
-  opacity: 0;
-  transition: opacity 0.3s;
-}
-
-.tooltip-eqn .tooltiptext::after {
-  content: "";
-  position: absolute;
-  top: 100%;
-  left: 50%;
-  margin-left: -5px;
-  border-width: 5px;
-  border-style: solid;
-  border-color: #555 transparent transparent transparent;
-}
-
-.tooltip-eqn:hover .tooltiptext {
-  visibility: visible;
-  opacity: 1;
-}
-
-.tooltip-eqn>.internal-link-eqn {
-  display: none;
-  float: none;
-  vertical-align: middle;
-  padding: 0;
-  margin-left: 10px;
-  margin-top: -10px;
-  border: none;
-  background: none;
-  color: black;
-  position: absolute;
-}
-.eqn-mouseover:hover>.tooltip-eqn>.internal-link-eqn { display: inline }
-
-.eqn-mouseover {
-  display: flex;
-  align-items: center;
-}
-
-@media (max-width: 760px) {
-    body {
-        padding-left: 0;
-        margin-left: 2%;
-        margin-right: 2%;
-        width: auto;
-    }
-    p {
-        width: auto;
-        font-size: 1.2em;
-    }
-    .marginnote, .sidenote {
-        left: 0;
-    }
-    .sourceCode {
-        display: inline-block;
-        width: 90%;
-    }
-    .title, .author, .dropdown {
-        text-align: center;
-    }
-    .dropdown {
-        width: 100%;
-    }
-    .dropdown-content {
-        position: inherit;
-        width: 100%;
-    }
-    li .dropbtn {
-        padding-top: 0;
-    }
-    h1, h2, h3 {
-        width: 100%;
-    }
-    h3 {
-        display: inline-block;
-    }
-    #TOC {
-        max-width: 100%;
-    }
-
-}
diff --git a/docs/libs/tufte-css-2015.12.29/envisioned.css b/docs/libs/tufte-css-2015.12.29/envisioned.css
deleted file mode 100644
index b35e122..0000000
--- a/docs/libs/tufte-css-2015.12.29/envisioned.css
+++ /dev/null
@@ -1,7 +0,0 @@
-@import 'https://fonts.googleapis.com/css?family=Roboto+Condensed';
-body {
-  font-family: 'Roboto Condensed', Arial, Helvetica, sans-serif;
-  background-color: #fefefe;
-  color: #222;
-}
-.numeral, .sidenote-number { font-family: "Roboto Condensed"; }
diff --git a/docs/libs/tufte-css-2015.12.29/tufte.css b/docs/libs/tufte-css-2015.12.29/tufte.css
deleted file mode 100644
index 101c8f2..0000000
--- a/docs/libs/tufte-css-2015.12.29/tufte.css
+++ /dev/null
@@ -1,223 +0,0 @@
-/* Import ET Book styles
-   adapted from https://github.com/edwardtufte/et-book/blob/gh-pages/et-book.css */
-
-@charset "UTF-8";
-
-/* Tufte CSS styles */
-html { font-size: 15px; }
-
-body { width: 87.5%;
-       margin-left: auto;
-       margin-right: auto;
-       padding-left: 12.5%;
-       color: #111;
-       max-width: 1400px;
-       counter-reset: sidenote-counter; }
-
-h1.title { font-weight: 400;
-     font-style: normal;
-     margin-top: 4rem;
-     margin-bottom: 1.5rem;
-     font-size: 3.2rem;
-     line-height: 1; }
-
-h1 {
-     font-weight: 400;
-     margin-top: 2.1rem;
-     margin-bottom: 0;
-     font-size: 2.2rem;
-     line-height: 1; }
-
-h2 {
-     font-weight: 400;
-     font-size: 1.7rem;
-     margin-top: 2rem;
-     margin-bottom: 0;
-     line-height: 1; }
-
-h3.subtitle {
-             font-weight: 400;
-             margin-top: 1rem;
-             margin-bottom: 1rem;
-             font-size: 1.8rem;
-             display: block;
-             line-height: 1; }
-
-h4.author, h4.date {
-    font-size: 1.4rem;
-    font-weight: 400;
-    margin: 1rem auto;
-    line-height: 1;
-}
-
-.danger { color: red; }
-
-article { position: relative;
-          padding: 5rem 0rem; }
-
-section { padding-top: 1rem;
-          padding-bottom: 1rem; }
-
-p, ol, ul { font-size: 1.4rem; }
-
-p { line-height: 2rem;
-    margin-top: 1.4rem;
-    margin-bottom: 1.4rem;
-    padding-right: 0;
-    vertical-align: baseline; }
-
-blockquote { font-size: 1.4rem; }
-
-blockquote p { width: 50%; }
-
-blockquote footer { width: 50%;
-                    font-size: 1.1rem;
-                    text-align: right; }
-
-ol, ul { width: 45%;
-         -webkit-padding-start: 5%;
-         -webkit-padding-end: 5%; }
-
-li { padding: 0.5rem 0; }
-
-table {
-  border-top: 2px solid #111;
-  border-bottom: 2px solid #111;
-  font-size: 1.1rem;
-}
-
-th {
-  border-bottom: 1px solid #111;
-}
-
-div.figure {
-         padding: 0;
-         border: 0;
-         font-size: 100%;
-         font: inherit;
-         vertical-align: baseline;
-         max-width: 55%;
-         -webkit-margin-start: 0;
-         -webkit-margin-end: 0;
-         margin: 0 0 3em 0;
-         }
-
-/* Links: replicate underline that clears descenders */
-a:link, a:visited { color: inherit; }
-
-a:link { text-decoration: none;
-         background: -webkit-linear-gradient(#fffff8, #fffff8), -webkit-linear-gradient(#fffff8, #fffff8), -webkit-linear-gradient(#333, #333);
-         background: linear-gradient(#fffff8, #fffff8), linear-gradient(#fffff8, #fffff8), linear-gradient(#333, #333);
-         -webkit-background-size: 0.05em 1px, 0.05em 1px, 1px 1px;
-         -moz-background-size: 0.05em 1px, 0.05em 1px, 1px 1px;
-         background-size: 0.05em 1px, 0.05em 1px, 1px 1px;
-         background-repeat: no-repeat, no-repeat, repeat-x;
-         text-shadow: 0.03em 0 #fffff8, -0.03em 0 #fffff8, 0 0.03em #fffff8, 0 -0.03em #fffff8, 0.06em 0 #fffff8, -0.06em 0 #fffff8, 0.09em 0 #fffff8, -0.09em 0 #fffff8, 0.12em 0 #fffff8, -0.12em 0 #fffff8, 0.15em 0 #fffff8, -0.15em 0 #fffff8;
-         background-position: 0% 93%, 100% 93%, 0% 93%; }
-
-@media screen and (-webkit-min-device-pixel-ratio: 0) { a:link { background-position-y: 87%, 87%, 87%; } }
-
-a:link::selection { text-shadow: 0.03em 0 #b4d5fe, -0.03em 0 #b4d5fe, 0 0.03em #b4d5fe, 0 -0.03em #b4d5fe, 0.06em 0 #b4d5fe, -0.06em 0 #b4d5fe, 0.09em 0 #b4d5fe, -0.09em 0 #b4d5fe, 0.12em 0 #b4d5fe, -0.12em 0 #b4d5fe, 0.15em 0 #b4d5fe, -0.15em 0 #b4d5fe;
-                    background: #b4d5fe; }
-
-a:link::-moz-selection { text-shadow: 0.03em 0 #b4d5fe, -0.03em 0 #b4d5fe, 0 0.03em #b4d5fe, 0 -0.03em #b4d5fe, 0.06em 0 #b4d5fe, -0.06em 0 #b4d5fe, 0.09em 0 #b4d5fe, -0.09em 0 #b4d5fe, 0.12em 0 #b4d5fe, -0.12em 0 #b4d5fe, 0.15em 0 #b4d5fe, -0.15em 0 #b4d5fe;
-                         background: #b4d5fe; }
-
-/* Sidenotes, margin notes, figures, captions */
-img {max-width: 100%;}
-
-.marginnote img { display: block; }
-
-.sidenote, .marginnote { float: right;
-                         clear: right;
-                         margin-right: -60%;
-                         width: 50%;
-                         margin-top: 0;
-                         margin-bottom: 1rem;
-                         font-size: 1.1rem;
-                         line-height: 1.3;
-                         vertical-align: baseline;
-                         position: relative; }
-
-.sidenote-number {
-                                           position: relative;
-                                           vertical-align: baseline; }
-
-.sidenote-number { font-size: 1rem;
-                         top: -0.5rem;
-                         left: 0.1rem; }
-
-p, footer, table, hr { width: 55%; }
-hr { margin-left: 0; }
-table table, li p, li pre { width: auto; }
-li p, li pre {margin-top: auto; }
-
-div.fullwidth, table.fullwidth { max-width: 90%; }
-div.fullwidth > * { width: auto; }
-
-#TOC, h1.title { max-width: 90%; }
-#TOC ol, #TOC ul { width: auto; }
-
-div.fullwidth p.caption {
-  margin-right: 0;
-  max-width: 33%;
-}
-
-p.caption { text-align: left; }
-
-@media screen and (max-width: 760px) { p, footer, ol, ul, table, hr { width: 90%; }
-                                       pre { width: 87.5%; }
-                                       ul { width: 85%; }
-                                       figure { max-width: 90%; }
-                                       div.fullwidth p.caption { max-width: none; }
-                                       blockquote p, blockquote footer { width: 90%; }}
-
-.sans { font-family: "Gill Sans", "Gill Sans MT", Calibri, sans-serif;
-        letter-spacing: .03em; }
-
-code { font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
-        font-size: 1.125rem;
-        line-height: 1.6; }
-
-pre code { font-size: 1rem; }
-
-p code { white-space: inherit; }
-
-h1 code, h2 code, h3 code { font-size: 0.80em; }
-
-.marginnote code, .sidenote code { font-size: 1rem; }
-
-pre { width: 52.5%;
-           overflow-x: auto; }
-
-.fullwidth { max-width: 90%;
-             clear:both; }
-
-span.newthought { font-variant: small-caps;
-                  font-size: 1.2em; }
-
-input.margin-toggle { display: none; }
-
-label.sidenote-number { display: inline; }
-
-label.margin-toggle:not(.sidenote-number) { display: none; }
-
-@media (max-width: 760px) { label.margin-toggle:not(.sidenote-number) { display: inline; }
-                            .sidenote, .marginnote { display: none; }
-                            .shownote,
-                            .margin-toggle:checked + .sidenote,
-                            .margin-toggle:checked + .marginnote {
-                                                                   display: block;
-                                                                   float: left;
-                                                                   left: 1rem;
-                                                                   clear: both;
-                                                                   width: 95%;
-                                                                   margin: 1rem 2.5%;
-                                                                   vertical-align: baseline;
-                                                                   position: relative;
-                                                                   }
-                            label { cursor: pointer; }
-                            div.figure { max-width: 90%; }
-                            pre { width: 90%;
-                                       padding: 0; }
-                            }
diff --git a/docs/raw-ms-data.html b/docs/raw-ms-data.html
deleted file mode 100644
index 3572809..0000000
--- a/docs/raw-ms-data.html
+++ /dev/null
@@ -1,1156 +0,0 @@
-<!DOCTYPE html>
-<html lang="" xml:lang="">
-<head>
-
-<meta charset="utf-8" />
-<meta name="generator" content="pandoc" />
-<meta name="viewport" content="width=device-width, initial-scale=1" />
-<meta property="og:title" content="Chapter 3 Raw MS data | R for Mass Spectrometry" />
-<meta property="og:type" content="book" />
-
-
-
-
-
-<meta name="author" content="Laurent Gatto, Sebastian Gibb, Johannes Rainer" />
-
-<meta name="date" content="2021-08-31" />
-
-
-<meta name="description" content="Chapter 3 Raw MS data | R for Mass Spectrometry">
-
-<title>Chapter 3 Raw MS data | R for Mass Spectrometry</title>
-
-<script src="libs/header-attrs-2.10/header-attrs.js"></script>
-<link href="libs/tufte-css-2015.12.29/tufte.css" rel="stylesheet" />
-<link href="libs/tufte-css-2015.12.29/envisioned.css" rel="stylesheet" />
-<link href="libs/msmb-css-0/msmb.css" rel="stylesheet" />
-<script>
-function toggle_visibility(id1, id2) {
-var e = document.getElementById(id1);
-var f = document.getElementById(id2);
-
-e.style.display = ((e.style.display!='none') ? 'none' : 'block');
-
-if(f.classList.contains('fa-plus-square')) {
-    f.classList.add('fa-minus-square')
-    f.classList.remove('fa-plus-square')
-} else {
-    f.classList.add('fa-plus-square')
-    f.classList.remove('fa-minus-square')
-}
-
-}
-</script>
-<script>
-function copy_link(id) {
-  var dummy = document.createElement('input'),
-  text = window.location.href.split(/[?#]/)[0] + '#' + id;
-  document.body.appendChild(dummy);
-  dummy.value = text;
-  dummy.select();
-  document.execCommand('copy');
-  document.body.removeChild(dummy);
-  
-  var tooltip = document.getElementById(id + '-tooltip');
-  tooltip.innerHTML = 'Copied!';
-}
-
-function reset_tooltip(id) {
-  var tooltip = document.getElementById(id);
-  tooltip.innerHTML = 'Copy link';
-}
-</script>
-
-
-<style type="text/css">code{white-space: pre;}</style>
-<style type="text/css">
-pre > code.sourceCode { white-space: pre; position: relative; }
-pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
-pre > code.sourceCode > span:empty { height: 1.2em; }
-
-code.sourceCode > span { color: inherit; text-decoration: inherit; }
-div.sourceCode { margin: 1em 0; }
-pre.sourceCode { margin: 0; }
-@media screen {
-div.sourceCode { overflow: auto; }
-}
-@media print {
-pre > code.sourceCode { white-space: pre-wrap; }
-pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
-}
-pre.numberSource code
-  { counter-reset: source-line 0; }
-pre.numberSource code > span
-  { position: relative; left: -4em; counter-increment: source-line; }
-pre.numberSource code > span > a:first-child::before
-  { content: counter(source-line);
-    position: relative; left: -1em; text-align: right; vertical-align: baseline;
-    border: none; display: inline-block;
-    -webkit-touch-callout: none; -webkit-user-select: none;
-    -khtml-user-select: none; -moz-user-select: none;
-    -ms-user-select: none; user-select: none;
-    padding: 0 4px; width: 4em;
-    color: #aaaaaa;
-  }
-pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
-div.sourceCode
-  {  background-color: #f8f8f8; }
-@media screen {
-pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
-}
-code span.al { color: #ef2929; } /* Alert */
-code span.an { color: #8f5902; font-weight: bold; font-style: italic; } /* Annotation */
-code span.at { color: #c4a000; } /* Attribute */
-code span.bn { color: #0000cf; } /* BaseN */
-code span.cf { color: #204a87; font-weight: bold; } /* ControlFlow */
-code span.ch { color: #4e9a06; } /* Char */
-code span.cn { color: #000000; } /* Constant */
-code span.co { color: #8f5902; font-style: italic; } /* Comment */
-code span.cv { color: #8f5902; font-weight: bold; font-style: italic; } /* CommentVar */
-code span.do { color: #8f5902; font-weight: bold; font-style: italic; } /* Documentation */
-code span.dt { color: #204a87; } /* DataType */
-code span.dv { color: #0000cf; } /* DecVal */
-code span.er { color: #a40000; font-weight: bold; } /* Error */
-code span.ex { } /* Extension */
-code span.fl { color: #0000cf; } /* Float */
-code span.fu { color: #000000; } /* Function */
-code span.im { } /* Import */
-code span.in { color: #8f5902; font-weight: bold; font-style: italic; } /* Information */
-code span.kw { color: #204a87; font-weight: bold; } /* Keyword */
-code span.op { color: #ce5c00; font-weight: bold; } /* Operator */
-code span.ot { color: #8f5902; } /* Other */
-code span.pp { color: #8f5902; font-style: italic; } /* Preprocessor */
-code span.sc { color: #000000; } /* SpecialChar */
-code span.ss { color: #4e9a06; } /* SpecialString */
-code span.st { color: #4e9a06; } /* String */
-code span.va { color: #000000; } /* Variable */
-code span.vs { color: #4e9a06; } /* VerbatimString */
-code span.wa { color: #8f5902; font-weight: bold; font-style: italic; } /* Warning */
-</style>
-
-
-<style type="text/css">
-/* Used with Pandoc 2.11+ new --citeproc when CSL is used */
-div.csl-bib-body { }
-div.csl-entry {
-  clear: both;
-}
-.hanging div.csl-entry {
-  margin-left:2em;
-  text-indent:-2em;
-}
-div.csl-left-margin {
-  min-width:2em;
-  float:left;
-}
-div.csl-right-inline {
-  margin-left:2em;
-  padding-left:1em;
-}
-div.csl-indent {
-  margin-left: 2em;
-}
-</style>
-
-<link rel="stylesheet" href="style.css" type="text/css" />
-
-</head>
-
-<body>
-
-
-
-<div class="row">
-<div class="col-sm-12">
-<div id="TOC">
-<ul class="navbar">
-<li class="msmb"><p class="title">R for Mass Spectrometry<p><p class="author">Laurent Gatto, Sebastian Gibb, Johannes Rainer</p>
-<li class="dropdown" style="float:right">
-<a href="javascript:void(0)" class="dropbtn">&#x25BE; Chapters</a>
-<div class="dropdown-content">
-<a href="index.html"><span class="toc-section-number">1</span> Preamble</a>
-<a href="sec:msintro.html"><span class="toc-section-number">2</span> Introduction</a>
-<a id="active-page" href="raw-ms-data.html"><span class="toc-section-number">3</span> Raw MS data</a><ul class="toc-sections">
-<li class="toc"><a href="#what-is-raw-data-in-r"> What is raw data in R</a></li>
-<li class="toc"><a href="#under-the-hood-mzr-optional"> Under the hood: <code>mzR</code> (optional)</a></li>
-<li class="toc"><a href="#visualisation-of-raw-ms-data"> Visualisation of raw MS data</a></li>
-<li class="toc"><a href="#raw-data-processing-and-manipulation"> Raw data processing and manipulation</a></li>
-<li class="toc"><a href="#a-note-on-efficiency"> A note on efficiency</a></li>
-</ul>
-<a href="identification-data.html"><span class="toc-section-number">4</span> Identification data</a>
-<a href="sec:quant.html"><span class="toc-section-number">5</span> Quantitative data</a>
-<a href="sec:si.html"><span class="toc-section-number">6</span> References and session information</a>
-</div>
-</li>
-</ul>
-</div>
-</div>
-</div>
-<div class="row">
-<div class="col-sm-12">
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
-<html><body>
-<div id="raw-ms-data" class="section level1" number="3">
-<h1>
-<span class="header-section-number">Chapter 3</span> Raw MS data</h1>
-<p>In this section, we will learn how to read raw data from in one of the
-commonly used open formats (<code>mzML</code>, <code>mzXML</code> and <code>netCDF</code>) into R.</p>
-<pre><code>|Data type  |File format   |Data structure               |Package           |
-|:----------|:-------------|:----------------------------|:-----------------|
-|Raw        |mzXML or mzML |mzRpwiz or mzRramp           |mzR               |
-|Raw        |mzXML or mzML |list of MassSpectrum objects |MALDIquantForeign |
-|Raw        |mzXML or mzML |MSnExp                       |MSnbase           |
-|Peak lists |mgf           |MSnExp                       |MSnbase           |
-|Raw        |several       |Spectra                      |Spectra           |</code></pre>
-<div id="what-is-raw-data-in-r" class="section level2" number="3.1">
-<h2>
-<span class="header-section-number">3.1</span> What is raw data in R<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('what-is-raw-data-in-r')" onmouseout="reset_tooltip('what-is-raw-data-in-r-tooltip')"><span class="tooltiptext" id="what-is-raw-data-in-r-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>When we manipulate complex data, we need a way to abstract it.</p>
-<p>The need for and abstraction saves us from having to know about all
-the details of that data <strong>and</strong> its associated metadata. This allows
-to rely on a few easy-to-remember conventions to make mundane and
-repetitive tasks trivial and be able to complete more complex things
-easily. Abstractions provide a smoother approaches to handle complex
-data using common patterns.</p>
-<div class="figure" style="text-align: center">
-<span style="display:block;" id="fig:unnamed-chunk-8"></span>
-<p class="caption marginnote shownote">
-Figure 3.1: Schematic representation of what is referred to by <em>raw data</em>: a collection of mass spectra and a table containing spectrum-level annotations along the lines. Raw data are imported from one of the many community-maintained open standards formats (mzML, mzXML, mzData or ANDI-MS/netCDF) (Figure taken from <span class="citation">(<a href="#ref-Gatto:2020" role="doc-biblioref">Gatto, Gibb, and Rainer 2020</a>)</span>).
-</p>
-<img src="img/raw.png" alt="Schematic representation of what is referred to by *raw data*: a collection of mass spectra and a table containing spectrum-level annotations along the lines. Raw data are imported from one of the many community-maintained open standards formats (mzML, mzXML, mzData or ANDI-MS/netCDF) (Figure taken from [@Gatto:2020])." width="100%">
-</div>
-<div id="the-spectra-class" class="section level3" number="3.1.1">
-<h3>
-<span class="header-section-number">3.1.1</span> The <code>Spectra</code> class<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('the-spectra-class')" onmouseout="reset_tooltip('the-spectra-class-tooltip')"><span class="tooltiptext" id="the-spectra-class-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>We are going to use the
-<a href="https://rformassspectrometry.github.io/Spectra/"><code>Spectra</code></a> package
-as an abstraction to raw mass spectrometry data.</p>
-<div class="sourceCode" id="cb29"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb29-1"><a href="raw-ms-data.html#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(Spectra)</span></code></pre></div>
-<p><code>Spectra</code> is part of the <a href="https://www.rformassspectrometry.org/">R for Mass Spectrometry
-initiative</a> initiative. It
-defines the <code>Spectra</code> class that is used as a raw data abstration, to
-maniputate MS data and metadata. The best way to learn about a data
-structure is to create one by hand.</p>
-<p>Let’s create a <code>DataFrame</code><label for="tufte-sn-4" class="margin-toggle sidenote-number">4</label><input type="checkbox" id="tufte-sn-4" class="margin-toggle"><span class="sidenote"><span class="sidenote-number">4</span> As defined in the Bioconductor <code>S4Vectors</code>
-package.</span> containing MS levels, retention time, m/z and intensities
-for 2 spectra:</p>
-<div class="sourceCode" id="cb30"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb30-1"><a href="raw-ms-data.html#cb30-1" aria-hidden="true" tabindex="-1"></a>spd <span class="ot">&lt;-</span> <span class="fu">DataFrame</span>(<span class="at">msLevel =</span> <span class="fu">c</span>(1L, 2L), <span class="at">rtime =</span> <span class="fu">c</span>(<span class="fl">1.1</span>, <span class="fl">1.2</span>))</span>
-<span id="cb30-2"><a href="raw-ms-data.html#cb30-2" aria-hidden="true" tabindex="-1"></a>spd<span class="sc">$</span>mz <span class="ot">&lt;-</span> <span class="fu">list</span>(<span class="fu">c</span>(<span class="dv">100</span>, <span class="fl">103.2</span>, <span class="fl">104.3</span>, <span class="fl">106.5</span>), <span class="fu">c</span>(<span class="fl">45.6</span>, <span class="fl">120.4</span>, <span class="fl">190.2</span>))</span>
-<span id="cb30-3"><a href="raw-ms-data.html#cb30-3" aria-hidden="true" tabindex="-1"></a>spd<span class="sc">$</span>intensity <span class="ot">&lt;-</span> <span class="fu">list</span>(<span class="fu">c</span>(<span class="dv">200</span>, <span class="dv">400</span>, <span class="fl">34.2</span>, <span class="dv">17</span>), <span class="fu">c</span>(<span class="fl">12.3</span>, <span class="fl">15.2</span>, <span class="fl">6.8</span>))</span>
-<span id="cb30-4"><a href="raw-ms-data.html#cb30-4" aria-hidden="true" tabindex="-1"></a>spd</span></code></pre></div>
-<pre><code>## DataFrame with 2 rows and 4 columns
-##     msLevel     rtime                    mz             intensity
-##   &lt;integer&gt; &lt;numeric&gt;                &lt;list&gt;                &lt;list&gt;
-## 1         1       1.1 100.0,103.2,104.3,... 200.0,400.0, 34.2,...
-## 2         2       1.2      45.6,120.4,190.2        12.3,15.2, 6.8</code></pre>
-<p>And now convert this <code>DataFrame</code> into a <code>Spectra</code> object:</p>
-<div class="sourceCode" id="cb32"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb32-1"><a href="raw-ms-data.html#cb32-1" aria-hidden="true" tabindex="-1"></a>sp0 <span class="ot">&lt;-</span> <span class="fu">Spectra</span>(spd)</span>
-<span id="cb32-2"><a href="raw-ms-data.html#cb32-2" aria-hidden="true" tabindex="-1"></a>sp0</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 2 spectra in a MsBackendDataFrame backend:
-##     msLevel     rtime scanIndex
-##   &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1         1       1.1        NA
-## 2         2       1.2        NA
-##  ... 16 more variables/columns.</code></pre>
-<div id="exercise" class="section level4 unnumbered">
-<h4>Exercise</h4>
-<p>Explore the newly created object using</p>
-<ul>
-<li>
-<code>spectraVariables</code> to extract all the metadata variables.</li>
-<li>
-<code>spectraData</code> to extract all the metadata.</li>
-<li>
-<code>peaksData</code> to extract a list containing thet raw data.</li>
-<li>
-<code>[</code> to create subsets.</li>
-</ul>
-</div>
-</div>
-<div id="spectra-from-mzml-files" class="section level3" number="3.1.2">
-<h3>
-<span class="header-section-number">3.1.2</span> <code>Spectra</code> from mzML files<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('spectra-from-mzml-files')" onmouseout="reset_tooltip('spectra-from-mzml-files-tooltip')"><span class="tooltiptext" id="spectra-from-mzml-files-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Let’s now create a new object using the mzML data previously
-downloaded and available in the <code>mzf</code> file.</p>
-<div class="sourceCode" id="cb34"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb34-1"><a href="raw-ms-data.html#cb34-1" aria-hidden="true" tabindex="-1"></a>mzf</span></code></pre></div>
-<pre><code>## [1] "/home/lgatto/.cache/R/rpx/b87c573dec94f_TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML"</code></pre>
-<div class="sourceCode" id="cb36"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb36-1"><a href="raw-ms-data.html#cb36-1" aria-hidden="true" tabindex="-1"></a>sp <span class="ot">&lt;-</span> <span class="fu">Spectra</span>(mzf)</span>
-<span id="cb36-2"><a href="raw-ms-data.html#cb36-2" aria-hidden="true" tabindex="-1"></a>sp</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 7534 spectra in a MsBackendMzR backend:
-##        msLevel     rtime scanIndex
-##      &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1            1    0.4584         1
-## 2            1    0.9725         2
-## 3            1    1.8524         3
-## 4            1    2.7424         4
-## 5            1    3.6124         5
-## ...        ...       ...       ...
-## 7530         2   3600.47      7530
-## 7531         2   3600.83      7531
-## 7532         2   3601.18      7532
-## 7533         2   3601.57      7533
-## 7534         2   3601.98      7534
-##  ... 33 more variables/columns.
-## 
-## file(s):
-## b87c573dec94f_TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML</code></pre>
-<div id="exercise-1" class="section level4 unnumbered">
-<h4>Exercise</h4>
-<ul>
-<li>Repeat the data manipulations above.</li>
-<li>Check the number of scans in the object with <code>length()</code>.</li>
-<li>Note the difference in the first line when showing the object in the
-console. We will get back to this idea of backend later.</li>
-</ul>
-<p>Mass spectrometry data in <code>Spectra</code> objects can be thought of as a
-list of individual spectra, with each spectrum having a set of
-variables associated with it. Besides <em>core</em> spectra variables (such
-as MS level or retention time) an arbitrary number of optional
-variables can be assigned to a spectrum. The core spectra variables
-all have their own accessor method and it is guaranteed that a value
-is returned by it (or <code>NA</code> if the information is not available). The
-core variables and their data type are (alphabetically ordered):</p>
-<ul>
-<li>
-<em>acquisitionNum</em> <code>integer(1)</code>: the index of acquisition of a
-spectrum during a MS run.</li>
-<li>
-<em>centroided</em> <code>logical(1)</code>: whether the spectrum is in profile or
-centroid mode.</li>
-<li>
-<em>collisionEnergy</em> <code>numeric(1)</code>: collision energy used to create an
-MSn spectrum.</li>
-<li>
-<em>dataOrigin</em> <code>character(1)</code>: the <em>origin</em> of the spectrum’s data,
-e.g. the mzML file from which it was read.</li>
-<li>
-<em>dataStorage</em> <code>character(1)</code>: the (current) storage location of the
-spectrum data. This value depends on the backend used to handle and
-provide the data. For an <em>in-memory</em> backend like the
-<code>MsBackendDataFrame</code> this will be <code>"&lt;memory&gt;"</code>, for an on-disk
-backend such as the <code>MsBackendHdf5Peaks</code> it will be the name of the
-HDF5 file where the spectrum’s peak data is stored.</li>
-<li>
-<em>intensity</em> <code>numeric</code>: intensity values for the spectrum’s peaks.</li>
-<li>
-<em>isolationWindowLowerMz</em> <code>numeric(1)</code>: lower m/z for the isolation
-window in which the (MSn) spectrum was measured.</li>
-<li>
-<em>isolationWindowTargetMz</em> <code>numeric(1)</code>: the target m/z for the
-isolation window in which the (MSn) spectrum was measured.</li>
-<li>
-<em>isolationWindowUpperMz</em> <code>numeric(1)</code>: upper m/z for the isolation
-window in which the (MSn) spectrum was measured.</li>
-<li>
-<em>msLevel</em> <code>integer(1)</code>: the MS level of the spectrum.</li>
-<li>
-<em>mz</em> <code>numeric</code>: the m/z values for the spectrum’s peaks.</li>
-<li>
-<em>polarity</em> <code>integer(1)</code>: the polarity of the spectrum (<code>0</code> and <code>1</code>
-representing negative and positive polarity, respectively).</li>
-<li>
-<em>precScanNum</em> <code>integer(1)</code>: the scan (acquisition) number of the
-precursor for an MSn spectrum.</li>
-<li>
-<em>precursorCharge</em> <code>integer(1)</code>: the charge of the precursor of an
-MSn spectrum.</li>
-<li>
-<em>precursorIntensity</em> <code>numeric(1)</code>: the intensity of the precursor of
-an MSn spectrum.</li>
-<li>
-<em>precursorMz</em> <code>numeric(1)</code>: the m/z of the precursor of an MSn
-spectrum.</li>
-<li>
-<em>rtime</em> <code>numeric(1)</code>: the retention time of a spectrum.</li>
-<li>
-<em>scanIndex</em> <code>integer(1)</code>: the index of a spectrum within a (raw)
-file.</li>
-<li>
-<em>smoothed</em> <code>logical(1)</code>: whether the spectrum was smoothed.</li>
-</ul>
-<p>For details on the individual variables and their getter/setter
-function see the help for <code>Spectra</code> (<code>?Spectra</code>). Also note that these
-variables are suggested, but not required to characterize a
-spectrum. Also, some only make sense for MSn, but not for MS1 spectra.</p>
-</div>
-<div id="exercise-2" class="section level4 unnumbered">
-<h4>Exercise</h4>
-<ul>
-<li>Extract a set of spectra variables using the accessor (for example
-<code>msLevel(.)</code>) or using the <code>$</code> notation (for example <code>.$msLevel</code>).</li>
-<li>How many MS level are there, and how many scans of each level?</li>
-<li>Extract the index of the MS2 spectrum with the highest base peak
-intensity.</li>
-<li>Are the data centroided or in profile mode?</li>
-<li>Pick a spectrum of each level and visually check whether it is
-centroided or in profile mode. You can use the <code>plotSpectra()</code>
-function to visualise peaks and set the m/z range with the <code>xlim</code>
-arguments.</li>
-</ul>
-</div>
-<div id="exercise-3" class="section level4 unnumbered">
-<h4>Exercise</h4>
-<p>Using the first raw data file starting with <code>MS3TMT10</code>, answer the
-following questions:</p>
-<ul>
-<li>How many spectra are there in that file?</li>
-<li>How many MS levels, and how many spectra per MS level?</li>
-<li>What is the index of the MS2 spectrum with the highest precursor
-intensity?</li>
-<li>Plot one spectrum of each level. Are they centroided or in profile
-mode?</li>
-</ul>
-<p>These objects and their manipulations are not limited to single files:</p>
-<div class="sourceCode" id="cb38"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb38-1"><a href="raw-ms-data.html#cb38-1" aria-hidden="true" tabindex="-1"></a>(fls <span class="ot">&lt;-</span> <span class="fu">dir</span>(<span class="fu">system.file</span>(<span class="st">"sciex"</span>, <span class="at">package =</span> <span class="st">"msdata"</span>), <span class="at">full.names =</span> <span class="cn">TRUE</span>))</span></code></pre></div>
-<pre><code>## [1] "/home/lgatto/R/x86_64-pc-linux-gnu-library/4.1/msdata/sciex/20171016_POOL_POS_1_105-134.mzML"
-## [2] "/home/lgatto/R/x86_64-pc-linux-gnu-library/4.1/msdata/sciex/20171016_POOL_POS_3_105-134.mzML"</code></pre>
-<div class="sourceCode" id="cb40"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb40-1"><a href="raw-ms-data.html#cb40-1" aria-hidden="true" tabindex="-1"></a>sp_sciex <span class="ot">&lt;-</span> <span class="fu">Spectra</span>(fls)</span>
-<span id="cb40-2"><a href="raw-ms-data.html#cb40-2" aria-hidden="true" tabindex="-1"></a><span class="fu">table</span>(<span class="fu">dataOrigin</span>(sp_sciex))</span></code></pre></div>
-<pre><code>## 
-## /home/lgatto/R/x86_64-pc-linux-gnu-library/4.1/msdata/sciex/20171016_POOL_POS_1_105-134.mzML 
-##                                                                                          931 
-## /home/lgatto/R/x86_64-pc-linux-gnu-library/4.1/msdata/sciex/20171016_POOL_POS_3_105-134.mzML 
-##                                                                                          931</code></pre>
-</div>
-</div>
-<div id="backends" class="section level3" number="3.1.3">
-<h3>
-<span class="header-section-number">3.1.3</span> Backends<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('backends')" onmouseout="reset_tooltip('backends-tooltip')"><span class="tooltiptext" id="backends-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Backends allow to use different <em>backends</em> to store mass spectrometry data while
-providing <em>via</em> the <code>Spectra</code> class a unified interface to use that data. The
-<code>Spectra</code> package defines a set of example backends but any object extending the
-base <code>MsBackend</code> class could be used instead. The default backends are:</p>
-<ul>
-<li>
-<code>MsBackendMzR</code>: this backend keeps only general spectra variables in memory
-and relies on the <em><a href="https://bioconductor.org/packages/3.14/mzR">mzR</a></em> package to read mass peaks (m/z and
-intensity values) from the original MS files on-demand.</li>
-</ul>
-<div class="sourceCode" id="cb42"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb42-1"><a href="raw-ms-data.html#cb42-1" aria-hidden="true" tabindex="-1"></a>sp_sciex</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 1862 spectra in a MsBackendMzR backend:
-##        msLevel     rtime scanIndex
-##      &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1            1     0.280         1
-## 2            1     0.559         2
-## 3            1     0.838         3
-## 4            1     1.117         4
-## 5            1     1.396         5
-## ...        ...       ...       ...
-## 1858         1   258.636       927
-## 1859         1   258.915       928
-## 1860         1   259.194       929
-## 1861         1   259.473       930
-## 1862         1   259.752       931
-##  ... 33 more variables/columns.
-## 
-## file(s):
-## 20171016_POOL_POS_1_105-134.mzML
-## 20171016_POOL_POS_3_105-134.mzML</code></pre>
-<ul>
-<li>
-<code>MsBackendDataFrame</code>: the mass spectrometry data is stored (in-memory) in a
-<code>DataFrame</code>. Keeping the data in memory guarantees high performance but has
-also, depending on the number of mass peaks in each spectrum, a much higher
-memory footprint.</li>
-</ul>
-<div class="sourceCode" id="cb44"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb44-1"><a href="raw-ms-data.html#cb44-1" aria-hidden="true" tabindex="-1"></a><span class="fu">setBackend</span>(sp_sciex, <span class="fu">MsBackendDataFrame</span>())</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 1862 spectra in a MsBackendDataFrame backend:
-##        msLevel     rtime scanIndex
-##      &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1            1     0.280         1
-## 2            1     0.559         2
-## 3            1     0.838         3
-## 4            1     1.117         4
-## 5            1     1.396         5
-## ...        ...       ...       ...
-## 1858         1   258.636       927
-## 1859         1   258.915       928
-## 1860         1   259.194       929
-## 1861         1   259.473       930
-## 1862         1   259.752       931
-##  ... 33 more variables/columns.
-## Processing:
-##  Switch backend from MsBackendMzR to MsBackendDataFrame [Tue Aug 31 11:35:38 2021]</code></pre>
-<ul>
-<li>
-<code>MsBackendHdf5Peaks</code>: similar to <code>MsBackendMzR</code> this backend reads peak data
-only on-demand from disk while all other spectra variables are kept in
-memory. The peak data are stored in Hdf5 files which guarantees scalability.</li>
-</ul>
-<div class="sourceCode" id="cb46"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb46-1"><a href="raw-ms-data.html#cb46-1" aria-hidden="true" tabindex="-1"></a>sp_hdf5 <span class="ot">&lt;-</span> <span class="fu">setBackend</span>(sp_sciex, <span class="fu">MsBackendHdf5Peaks</span>(), <span class="at">hdf5path =</span> <span class="fu">tempdir</span>())</span>
-<span id="cb46-2"><a href="raw-ms-data.html#cb46-2" aria-hidden="true" tabindex="-1"></a>sp_hdf5</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 1862 spectra in a MsBackendHdf5Peaks backend:
-##        msLevel     rtime scanIndex
-##      &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1            1     0.280         1
-## 2            1     0.559         2
-## 3            1     0.838         3
-## 4            1     1.117         4
-## 5            1     1.396         5
-## ...        ...       ...       ...
-## 1858         1   258.636       927
-## 1859         1   258.915       928
-## 1860         1   259.194       929
-## 1861         1   259.473       930
-## 1862         1   259.752       931
-##  ... 33 more variables/columns.
-## 
-## file(s):
-##  20171016_POOL_POS_1_105-134.h5
-##  20171016_POOL_POS_3_105-134.h5
-## Processing:
-##  Switch backend from MsBackendMzR to MsBackendHdf5Peaks [Tue Aug 31 11:35:44 2021]</code></pre>
-<div class="sourceCode" id="cb48"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb48-1"><a href="raw-ms-data.html#cb48-1" aria-hidden="true" tabindex="-1"></a><span class="fu">table</span>(sp_hdf5<span class="sc">$</span>dataOrigin)</span></code></pre></div>
-<pre><code>## 
-## /home/lgatto/R/x86_64-pc-linux-gnu-library/4.1/msdata/sciex/20171016_POOL_POS_1_105-134.mzML 
-##                                                                                          931 
-## /home/lgatto/R/x86_64-pc-linux-gnu-library/4.1/msdata/sciex/20171016_POOL_POS_3_105-134.mzML 
-##                                                                                          931</code></pre>
-<div class="sourceCode" id="cb50"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb50-1"><a href="raw-ms-data.html#cb50-1" aria-hidden="true" tabindex="-1"></a><span class="fu">table</span>(sp_hdf5<span class="sc">$</span>dataStorage)</span></code></pre></div>
-<pre><code>## 
-## /tmp/RtmpFxKO1C/20171016_POOL_POS_1_105-134.h5 
-##                                            931 
-## /tmp/RtmpFxKO1C/20171016_POOL_POS_3_105-134.h5 
-##                                            931</code></pre>
-<p>All of the above mentioned backends support changing all of their their spectra
-variables, <strong>except</strong> the <code>MsBackendMzR</code> that does not support changing m/z or
-intensity values for the mass peaks.</p>
-<p>With the example below we load the data from a single mzML file and use a
-<code>MsBackendHdf5Peaks</code> backend for data storage. The <code>hdf5path</code> parameter allows
-us to specify the storage location of the HDF5 file.</p>
-<p>There is also an (under development) SQLite-based backend called
-<a href="https://rformassspectrometry.github.io/MsBackendSql/articles/MsBackendSqlDb.html"><code>MsBackendSqlDb</code></a>
-that will store all data, i.e. raw and metadata, on disk.</p>
-</div>
-</div>
-<div id="under-the-hood-mzr-optional" class="section level2" number="3.2">
-<h2>
-<span class="header-section-number">3.2</span> Under the hood: <code>mzR</code> (optional)<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('under-the-hood-mzr-optional')" onmouseout="reset_tooltip('under-the-hood-mzr-optional-tooltip')"><span class="tooltiptext" id="under-the-hood-mzr-optional-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>The <code>mzR</code> package in a direct interface to the
-<a href="http://proteowizard.sourceforge.net/">proteowizard</a> code base. It
-includes a substantial proportion of <em>pwiz</em>’s C/C++ code for fast and
-efficient parsing of these large raw data files.</p>
-<p>Let’s start by using some raw data files from the <code>msdata</code>
-package. After loading it, we use the <code>proteomics()</code> function to
-return the full file names for two raw data files. We will start by
-focusing on the second one.</p>
-<div class="sourceCode" id="cb52"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb52-1"><a href="raw-ms-data.html#cb52-1" aria-hidden="true" tabindex="-1"></a>f <span class="ot">&lt;-</span> msdata<span class="sc">::</span><span class="fu">proteomics</span>(<span class="at">full.names =</span> <span class="cn">TRUE</span>)</span>
-<span id="cb52-2"><a href="raw-ms-data.html#cb52-2" aria-hidden="true" tabindex="-1"></a>f</span></code></pre></div>
-<pre><code>## [1] "/home/lgatto/R/x86_64-pc-linux-gnu-library/4.1/msdata/proteomics/MRM-standmix-5.mzML.gz"                                                
-## [2] "/home/lgatto/R/x86_64-pc-linux-gnu-library/4.1/msdata/proteomics/MS3TMT10_01022016_32917-33481.mzML.gz"                                 
-## [3] "/home/lgatto/R/x86_64-pc-linux-gnu-library/4.1/msdata/proteomics/MS3TMT11.mzML"                                                         
-## [4] "/home/lgatto/R/x86_64-pc-linux-gnu-library/4.1/msdata/proteomics/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML.gz"
-## [5] "/home/lgatto/R/x86_64-pc-linux-gnu-library/4.1/msdata/proteomics/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.mzML.gz"</code></pre>
-<div class="sourceCode" id="cb54"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb54-1"><a href="raw-ms-data.html#cb54-1" aria-hidden="true" tabindex="-1"></a>(f2 <span class="ot">&lt;-</span> <span class="fu">grep</span>(<span class="st">"20141210"</span>, f, <span class="at">value =</span> <span class="cn">TRUE</span>))</span></code></pre></div>
-<pre><code>## [1] "/home/lgatto/R/x86_64-pc-linux-gnu-library/4.1/msdata/proteomics/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML.gz"</code></pre>
-<p>The three main functions of <code>mzR</code> are</p>
-<ul>
-<li>
-<code>openMSfile</code> to create a file handle to a raw data file</li>
-<li>
-<code>header</code> to extract metadata about the spectra contained in the file</li>
-<li>
-<code>peaks</code> to extract one or multiple spectra of interest.</li>
-</ul>
-<p>Other functions such as <code>instrumentInfo</code>, or <code>runInfo</code> can be used to
-gather general information about a run.</p>
-<div class="sourceCode" id="cb56"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb56-1"><a href="raw-ms-data.html#cb56-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"mzR"</span>)</span>
-<span id="cb56-2"><a href="raw-ms-data.html#cb56-2" aria-hidden="true" tabindex="-1"></a>ms <span class="ot">&lt;-</span> <span class="fu">openMSfile</span>(f2)</span>
-<span id="cb56-3"><a href="raw-ms-data.html#cb56-3" aria-hidden="true" tabindex="-1"></a>ms</span></code></pre></div>
-<pre><code>## Mass Spectrometry file handle.
-## Filename:  TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML.gz 
-## Number of scans:  7534</code></pre>
-<div class="sourceCode" id="cb58"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb58-1"><a href="raw-ms-data.html#cb58-1" aria-hidden="true" tabindex="-1"></a>hd <span class="ot">&lt;-</span> <span class="fu">header</span>(ms)</span>
-<span id="cb58-2"><a href="raw-ms-data.html#cb58-2" aria-hidden="true" tabindex="-1"></a><span class="fu">dim</span>(hd)</span></code></pre></div>
-<pre><code>## [1] 7534   31</code></pre>
-<div class="sourceCode" id="cb60"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb60-1"><a href="raw-ms-data.html#cb60-1" aria-hidden="true" tabindex="-1"></a><span class="fu">names</span>(hd)</span></code></pre></div>
-<pre><code>##  [1] "seqNum"                     "acquisitionNum"            
-##  [3] "msLevel"                    "polarity"                  
-##  [5] "peaksCount"                 "totIonCurrent"             
-##  [7] "retentionTime"              "basePeakMZ"                
-##  [9] "basePeakIntensity"          "collisionEnergy"           
-## [11] "ionisationEnergy"           "lowMZ"                     
-## [13] "highMZ"                     "precursorScanNum"          
-## [15] "precursorMZ"                "precursorCharge"           
-## [17] "precursorIntensity"         "mergedScan"                
-## [19] "mergedResultScanNum"        "mergedResultStartScanNum"  
-## [21] "mergedResultEndScanNum"     "injectionTime"             
-## [23] "filterString"               "spectrumId"                
-## [25] "centroided"                 "ionMobilityDriftTime"      
-## [27] "isolationWindowTargetMZ"    "isolationWindowLowerOffset"
-## [29] "isolationWindowUpperOffset" "scanWindowLowerLimit"      
-## [31] "scanWindowUpperLimit"</code></pre>
-<div class="sourceCode" id="cb62"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb62-1"><a href="raw-ms-data.html#cb62-1" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(<span class="fu">peaks</span>(ms, <span class="dv">117</span>))</span></code></pre></div>
-<pre><code>##          [,1] [,2]
-## [1,] 399.9976    0
-## [2,] 399.9991    0
-## [3,] 400.0006    0
-## [4,] 400.0021    0
-## [5,] 400.2955    0
-## [6,] 400.2970    0</code></pre>
-<div class="sourceCode" id="cb64"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb64-1"><a href="raw-ms-data.html#cb64-1" aria-hidden="true" tabindex="-1"></a><span class="fu">str</span>(<span class="fu">peaks</span>(ms, <span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>))</span></code></pre></div>
-<pre><code>## List of 5
-##  $ : num [1:25800, 1:2] 400 400 400 400 400 ...
-##  $ : num [1:25934, 1:2] 400 400 400 400 400 ...
-##  $ : num [1:26148, 1:2] 400 400 400 400 400 ...
-##  $ : num [1:26330, 1:2] 400 400 400 400 400 ...
-##  $ : num [1:26463, 1:2] 400 400 400 400 400 ...</code></pre>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Let’s extract the index of the MS2 spectrum with the highest base peak
-intensity and plot its spectrum. Is the data centroided or in profile
-mode?</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-1" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-1', 'sol-start-1')"></span>
-</p>
-<div id="sol-body-1" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb66"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb66-1"><a href="raw-ms-data.html#cb66-1" aria-hidden="true" tabindex="-1"></a>hd2 <span class="ot">&lt;-</span> hd[hd<span class="sc">$</span>msLevel <span class="sc">==</span> <span class="dv">2</span>, ]</span>
-<span id="cb66-2"><a href="raw-ms-data.html#cb66-2" aria-hidden="true" tabindex="-1"></a>i <span class="ot">&lt;-</span> <span class="fu">which.max</span>(hd2<span class="sc">$</span>basePeakIntensity)</span>
-<span id="cb66-3"><a href="raw-ms-data.html#cb66-3" aria-hidden="true" tabindex="-1"></a>hd2[i, ]</span></code></pre></div>
-<pre><code>##      seqNum acquisitionNum msLevel polarity peaksCount totIonCurrent
-## 5404   5404           5404       2        1        275    2283283712
-##      retentionTime basePeakMZ basePeakIntensity collisionEnergy
-## 5404      2751.313   859.5032         354288224              45
-##      ionisationEnergy    lowMZ  highMZ precursorScanNum precursorMZ
-## 5404                0 100.5031 1995.63             5403    859.1722
-##      precursorCharge precursorIntensity mergedScan mergedResultScanNum
-## 5404               3          627820480         NA                  NA
-##      mergedResultStartScanNum mergedResultEndScanNum injectionTime
-## 5404                       NA                     NA    0.03474091
-##                                                  filterString
-## 5404 FTMS + p NSI d Full ms2 859.50@hcd45.00 [100.00-2000.00]
-##                                         spectrumId centroided
-## 5404 controllerType=0 controllerNumber=1 scan=5404       TRUE
-##      ionMobilityDriftTime isolationWindowTargetMZ isolationWindowLowerOffset
-## 5404                   NA                   859.5                          1
-##      isolationWindowUpperOffset scanWindowLowerLimit scanWindowUpperLimit
-## 5404                          1                  100                 2000</code></pre>
-<div class="sourceCode" id="cb68"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb68-1"><a href="raw-ms-data.html#cb68-1" aria-hidden="true" tabindex="-1"></a>pi <span class="ot">&lt;-</span> <span class="fu">peaks</span>(ms, hd2[i, <span class="dv">1</span>])</span>
-<span id="cb68-2"><a href="raw-ms-data.html#cb68-2" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(pi, <span class="at">type =</span> <span class="st">"h"</span>)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/ex_raw-1.png" width="672" style="display: block; margin: auto;"></p>
-<div class="sourceCode" id="cb69"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb69-1"><a href="raw-ms-data.html#cb69-1" aria-hidden="true" tabindex="-1"></a>mz <span class="ot">&lt;-</span> hd2[i, <span class="st">"basePeakMZ"</span>]</span>
-<span id="cb69-2"><a href="raw-ms-data.html#cb69-2" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(pi, <span class="at">type =</span> <span class="st">"h"</span>, <span class="at">xlim =</span> <span class="fu">c</span>(mz <span class="sc">-</span> <span class="fl">0.5</span>, mz <span class="sc">+</span> <span class="fl">0.5</span>))</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/ex_raw-2.png" width="672" style="display: block; margin: auto;"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Pick an MS1 spectrum and visually check whether it is centroided or in
-profile mode.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-2" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-2', 'sol-start-2')"></span>
-</p>
-<div id="sol-body-2" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb70"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb70-1"><a href="raw-ms-data.html#cb70-1" aria-hidden="true" tabindex="-1"></a><span class="do">## Zooming into spectrum 300 (an MS1 spectrum).</span></span>
-<span id="cb70-2"><a href="raw-ms-data.html#cb70-2" aria-hidden="true" tabindex="-1"></a>j <span class="ot">&lt;-</span> <span class="dv">300</span></span>
-<span id="cb70-3"><a href="raw-ms-data.html#cb70-3" aria-hidden="true" tabindex="-1"></a>pj <span class="ot">&lt;-</span> <span class="fu">peaks</span>(ms, j)</span>
-<span id="cb70-4"><a href="raw-ms-data.html#cb70-4" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(pj, <span class="at">type =</span> <span class="st">"l"</span>)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/ex_raw2-1.png" width="672"></p>
-<div class="sourceCode" id="cb71"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb71-1"><a href="raw-ms-data.html#cb71-1" aria-hidden="true" tabindex="-1"></a>mz <span class="ot">&lt;-</span> hd[j, <span class="st">"basePeakMZ"</span>]</span>
-<span id="cb71-2"><a href="raw-ms-data.html#cb71-2" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(pj, <span class="at">type =</span> <span class="st">"l"</span>, <span class="at">xlim =</span> <span class="fu">c</span>(mz <span class="sc">-</span> <span class="fl">0.5</span>, mz <span class="sc">+</span> <span class="fl">0.5</span>))</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/ex_raw2-2.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-</div>
-<div id="visualisation-of-raw-ms-data" class="section level2" number="3.3">
-<h2>
-<span class="header-section-number">3.3</span> Visualisation of raw MS data<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('visualisation-of-raw-ms-data')" onmouseout="reset_tooltip('visualisation-of-raw-ms-data-tooltip')"><span class="tooltiptext" id="visualisation-of-raw-ms-data-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>The importance of flexible access to specialised data becomes visible
-in the figure below (taken from the <code>RforProteomics</code> <a href="http://bioconductor.org/packages/release/data/experiment/vignettes/RforProteomics/inst/doc/RProtVis.html">visualisation
-vignette</a>).
-Not only can we access specific data and understand/visualise them,
-but we can transverse all the data and extracted/visualise/understand
-structured slices of data.</p>
-<p>The figure below show is an illustration of how mass spectrometry
-works:</p>
-<ol style="list-style-type: decimal">
-<li><p>The chromatogram at the top display to total ion current along the
-retention time. The vertical line identifies one scan in particular
-at retention time 1800.68 seconds (the 2807th scan).</p></li>
-<li><p>The spectra on the second line represent the full MS1 spectrum
-marked by the red line. The vertical lines identify the 10
-precursor ions that where selected for MS2 analysis. The zoomed in
-on the right shows one specific precursor peak.</p></li>
-<li><p>The MS2 spectra displayed along the two rows at the bottom are
-those resulting from the fragmentation of the 10 precursor peaks
-identified by the vertical bars above.</p></li>
-</ol>
-<p><img src="img/msvisfig.png" width="100%" style="display: block; margin: auto;"></p>
-<p>We are going to reproduce the figure above trought a set of exercices.</p>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ol style="list-style-type: decimal">
-<li>The chromatogram can be created by extracting the <code>totIonCurrent</code>
-and <code>rtime</code> variables for all MS1 spectra. Annotate the spectrum of
-interest.</li>
-</ol>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-3" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-3', 'sol-start-3')"></span>
-</p>
-<div id="sol-body-3" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb72"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb72-1"><a href="raw-ms-data.html#cb72-1" aria-hidden="true" tabindex="-1"></a><span class="fu">with</span>(<span class="fu">spectraData</span>(<span class="fu">filterMsLevel</span>(sp, <span class="dv">1</span>)),</span>
-<span id="cb72-2"><a href="raw-ms-data.html#cb72-2" aria-hidden="true" tabindex="-1"></a>     <span class="fu">plot</span>(rtime, totIonCurrent, <span class="at">type =</span> <span class="st">"l"</span>))</span>
-<span id="cb72-3"><a href="raw-ms-data.html#cb72-3" aria-hidden="true" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">v =</span> <span class="fu">rtime</span>(sp)[<span class="dv">2807</span>], <span class="at">col =</span> <span class="st">"red"</span>)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-15-1.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ol start="2" style="list-style-type: decimal">
-<li>The <code>filterPrecursorScan()</code> function can be used to retains parent
-(MS1) and children scans (MS2) of a scan, as defined by its
-acquisition number. Use it to extract the MS1 scan of interest and
-all its MS2 children.</li>
-</ol>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-4" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-4', 'sol-start-4')"></span>
-</p>
-<div id="sol-body-4" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb73"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb73-1"><a href="raw-ms-data.html#cb73-1" aria-hidden="true" tabindex="-1"></a>ms_2 <span class="ot">&lt;-</span> <span class="fu">filterPrecursorScan</span>(sp, <span class="dv">2807</span>)</span>
-<span id="cb73-2"><a href="raw-ms-data.html#cb73-2" aria-hidden="true" tabindex="-1"></a>ms_2</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 11 spectra in a MsBackendMzR backend:
-##      msLevel     rtime scanIndex
-##    &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1          1   1800.68      2807
-## 2          2   1801.26      2808
-## 3          2   1801.92      2809
-## 4          2   1802.20      2810
-## 5          2   1802.48      2811
-## 6          2   1802.77      2812
-## 7          2   1803.05      2813
-## 8          2   1803.34      2814
-## 9          2   1803.64      2815
-## 10         2   1803.93      2816
-## 11         2   1804.21      2817
-##  ... 33 more variables/columns.
-## 
-## file(s):
-## b87c573dec94f_TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML
-## Processing:
-##  Filter: select parent/children scans for 2807 [Tue Aug 31 11:35:50 2021]</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ol start="3" style="list-style-type: decimal">
-<li>Plot the MS1 spectrum of interest and highlight all the peaks that
-will be selected for MS2 analysis.</li>
-</ol>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-5" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-5', 'sol-start-5')"></span>
-</p>
-<div id="sol-body-5" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb75"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb75-1"><a href="raw-ms-data.html#cb75-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plotSpectra</span>(sp[<span class="dv">2807</span>], <span class="at">xlim =</span> <span class="fu">c</span>(<span class="dv">400</span>, <span class="dv">1000</span>))</span>
-<span id="cb75-2"><a href="raw-ms-data.html#cb75-2" aria-hidden="true" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">v =</span> <span class="fu">precursorMz</span>(ms_2)[<span class="sc">-</span><span class="dv">1</span>], <span class="at">col =</span> <span class="st">"grey"</span>)</span>
-<span id="cb75-3"><a href="raw-ms-data.html#cb75-3" aria-hidden="true" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">v =</span> <span class="fu">precursorMz</span>(ms_2)[<span class="dv">2</span>], <span class="at">col =</span> <span class="st">"red"</span>)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-17-1.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ol start="4" style="list-style-type: decimal">
-<li>Zoom in mz values 521.1 and 522.5 to reveal the isotopic envelope
-of that peak.</li>
-</ol>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-6" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-6', 'sol-start-6')"></span>
-</p>
-<div id="sol-body-6" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb76"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb76-1"><a href="raw-ms-data.html#cb76-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plotSpectra</span>(sp[<span class="dv">2807</span>], <span class="at">xlim =</span> <span class="fu">c</span>(<span class="fl">521.2</span>, <span class="fl">522.5</span>), <span class="at">type =</span> <span class="st">"l"</span>)</span>
-<span id="cb76-2"><a href="raw-ms-data.html#cb76-2" aria-hidden="true" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">v =</span> <span class="fu">precursorMz</span>(ms_2)[<span class="dv">2</span>], <span class="at">col =</span> <span class="st">"red"</span>)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-18-1.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ol start="5" style="list-style-type: decimal">
-<li>The <code>plotSpectra()</code> function is used to plot all 10 MS2 spectra in
-one call.</li>
-</ol>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-7" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-7', 'sol-start-7')"></span>
-</p>
-<div id="sol-body-7" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb77"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb77-1"><a href="raw-ms-data.html#cb77-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plotSpectra</span>(ms_2[<span class="sc">-</span><span class="dv">1</span>])</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-19-1.png" width="768"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<p>It is possible to label the peaks with the <code>plotSpectra()</code>
-function. The <code>labels</code> argument is either a <code>character</code> of appropriate
-length (i.e. with a label for each peak) or, as illustrated below, a
-function that computes the labels.</p>
-<div class="sourceCode" id="cb78"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb78-1"><a href="raw-ms-data.html#cb78-1" aria-hidden="true" tabindex="-1"></a>mzLabel <span class="ot">&lt;-</span> <span class="cf">function</span>(z) {</span>
-<span id="cb78-2"><a href="raw-ms-data.html#cb78-2" aria-hidden="true" tabindex="-1"></a>    z <span class="ot">&lt;-</span> <span class="fu">peaksData</span>(z)[[1L]]</span>
-<span id="cb78-3"><a href="raw-ms-data.html#cb78-3" aria-hidden="true" tabindex="-1"></a>    lbls <span class="ot">&lt;-</span> <span class="fu">format</span>(z[, <span class="st">"mz"</span>], <span class="at">digits =</span> <span class="dv">4</span>)</span>
-<span id="cb78-4"><a href="raw-ms-data.html#cb78-4" aria-hidden="true" tabindex="-1"></a>    lbls[z[, <span class="st">"intensity"</span>] <span class="sc">&lt;</span> <span class="fl">1e5</span>] <span class="ot">&lt;-</span> <span class="st">""</span></span>
-<span id="cb78-5"><a href="raw-ms-data.html#cb78-5" aria-hidden="true" tabindex="-1"></a>    lbls</span>
-<span id="cb78-6"><a href="raw-ms-data.html#cb78-6" aria-hidden="true" tabindex="-1"></a>}</span>
-<span id="cb78-7"><a href="raw-ms-data.html#cb78-7" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb78-8"><a href="raw-ms-data.html#cb78-8" aria-hidden="true" tabindex="-1"></a><span class="fu">plotSpectra</span>(ms_2[<span class="dv">7</span>],</span>
-<span id="cb78-9"><a href="raw-ms-data.html#cb78-9" aria-hidden="true" tabindex="-1"></a>            <span class="at">xlim =</span> <span class="fu">c</span>(<span class="dv">126</span>, <span class="dv">132</span>),</span>
-<span id="cb78-10"><a href="raw-ms-data.html#cb78-10" aria-hidden="true" tabindex="-1"></a>            <span class="at">labels =</span> mzLabel,</span>
-<span id="cb78-11"><a href="raw-ms-data.html#cb78-11" aria-hidden="true" tabindex="-1"></a>            <span class="at">labelSrt =</span> <span class="sc">-</span><span class="dv">30</span>, <span class="at">labelPos =</span> <span class="dv">2</span>,</span>
-<span id="cb78-12"><a href="raw-ms-data.html#cb78-12" aria-hidden="true" tabindex="-1"></a>            <span class="at">labelOffset =</span> <span class="fl">0.1</span>)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-20-1.png" width="672"></p>
-<p>Spectra can also be compared either by overlay or mirror plotting
-using the <code>plotSpectraOverlay()</code> and <code>plotSpectraMirror()</code> functions.</p>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Filter MS2 level spectra and find any 2 MS2 spectra that have matching
-precursor peaks based on the precursor m/z values.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-8" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-8', 'sol-start-8')"></span>
-</p>
-<div id="sol-body-8" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb79"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb79-1"><a href="raw-ms-data.html#cb79-1" aria-hidden="true" tabindex="-1"></a>sp2 <span class="ot">&lt;-</span> <span class="fu">filterMsLevel</span>(sp, 2L)</span>
-<span id="cb79-2"><a href="raw-ms-data.html#cb79-2" aria-hidden="true" tabindex="-1"></a><span class="fu">anyDuplicated</span>(<span class="fu">precursorMz</span>(<span class="fu">filterMsLevel</span>(sp, <span class="dv">2</span>)))</span></code></pre></div>
-<pre><code>## [1] 37</code></pre>
-<div class="sourceCode" id="cb81"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb81-1"><a href="raw-ms-data.html#cb81-1" aria-hidden="true" tabindex="-1"></a>i <span class="ot">&lt;-</span> <span class="fu">which</span>(<span class="fu">precursorMz</span>(sp2) <span class="sc">==</span> <span class="fu">precursorMz</span>(sp2)[<span class="dv">37</span>])</span>
-<span id="cb81-2"><a href="raw-ms-data.html#cb81-2" aria-hidden="true" tabindex="-1"></a>sp2i <span class="ot">&lt;-</span> sp2[i]</span></code></pre></div>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Visualise the matching pair using the <code>plotSpectraOverlay()</code> and
-<code>plotSpectraMirror()</code> functions.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-9" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-9', 'sol-start-9')"></span>
-</p>
-<div id="sol-body-9" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb82"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb82-1"><a href="raw-ms-data.html#cb82-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plotSpectraOverlay</span>(sp2i, <span class="at">col =</span> <span class="fu">c</span>(<span class="st">"red"</span>, <span class="st">"steelblue"</span>))</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-22-1.png" width="672"></p>
-<div class="sourceCode" id="cb83"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb83-1"><a href="raw-ms-data.html#cb83-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plotSpectraMirror</span>(sp2i[<span class="dv">1</span>], sp2i[<span class="dv">2</span>])</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-23-1.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-</div>
-<div id="raw-data-processing-and-manipulation" class="section level2" number="3.4">
-<h2>
-<span class="header-section-number">3.4</span> Raw data processing and manipulation<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('raw-data-processing-and-manipulation')" onmouseout="reset_tooltip('raw-data-processing-and-manipulation-tooltip')"><span class="tooltiptext" id="raw-data-processing-and-manipulation-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>Apart from <em>classical</em> subsetting operations such as <code>[</code> and <code>split</code>,
-a set of filter functions are defined for <code>Spectra</code> objects (for
-detailed help please see the <code>?Spectra</code> help):</p>
-<ul>
-<li>
-<code>filterAcquisitionNum</code>: retain spectra with certain acquisition numbers.</li>
-<li>
-<code>filterDataOrigin</code>: subset to spectra from specific origins.</li>
-<li>
-<code>filterDataStorage</code>: subset to spectra from certain data storage files.</li>
-<li>
-<code>filterEmptySpectra</code>: remove spectra without mass peaks.</li>
-<li>
-<code>filterMzRange</code>: subset spectra keeping only peaks with an m/z within the
-provided m/z range.</li>
-<li>
-<code>filterMzValues</code>: subset spectra keeping only peaks matching provided m/z
-value(s).</li>
-<li>
-<code>filterIsolationWindow</code>: keep spectra with the provided <code>mz</code> in their
-isolation window (m/z range).</li>
-<li>
-<code>filterMsLevel</code>: filter by MS level.</li>
-<li>
-<code>filterPolarity</code>: filter by polarity.</li>
-<li>
-<code>filterPrecursorMz</code>: retain (MSn) spectra with a precursor m/z within the
-provided m/z range.</li>
-<li>
-<code>filterPrecursorScan</code>: retain (parent and children) scans of an acquisition
-number.</li>
-<li>
-<code>filterRt</code>: filter based on retention time ranges.</li>
-</ul>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Using the <code>sp_sciex</code> data, select all spectra measured in the second
-mzML file and subsequently filter them to retain spectra measured
-between 175 and 189 seconds in the measurement run.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-10" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-10', 'sol-start-10')"></span>
-</p>
-<div id="sol-body-10" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb84"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb84-1"><a href="raw-ms-data.html#cb84-1" aria-hidden="true" tabindex="-1"></a>fls <span class="ot">&lt;-</span> <span class="fu">unique</span>(<span class="fu">dataOrigin</span>(sp_sciex))</span>
-<span id="cb84-2"><a href="raw-ms-data.html#cb84-2" aria-hidden="true" tabindex="-1"></a>fls</span></code></pre></div>
-<pre><code>## [1] "/home/lgatto/R/x86_64-pc-linux-gnu-library/4.1/msdata/sciex/20171016_POOL_POS_1_105-134.mzML"
-## [2] "/home/lgatto/R/x86_64-pc-linux-gnu-library/4.1/msdata/sciex/20171016_POOL_POS_3_105-134.mzML"</code></pre>
-<div class="sourceCode" id="cb86"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb86-1"><a href="raw-ms-data.html#cb86-1" aria-hidden="true" tabindex="-1"></a>file_2 <span class="ot">&lt;-</span> <span class="fu">filterDataOrigin</span>(sp_sciex, <span class="at">dataOrigin =</span> fls[<span class="dv">2</span>])</span>
-<span id="cb86-2"><a href="raw-ms-data.html#cb86-2" aria-hidden="true" tabindex="-1"></a><span class="fu">length</span>(file_2)</span></code></pre></div>
-<pre><code>## [1] 931</code></pre>
-<div class="sourceCode" id="cb88"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb88-1"><a href="raw-ms-data.html#cb88-1" aria-hidden="true" tabindex="-1"></a>sps_sub <span class="ot">&lt;-</span> <span class="fu">filterRt</span>(file_2, <span class="at">rt =</span> <span class="fu">c</span>(<span class="dv">175</span>, <span class="dv">189</span>))</span>
-<span id="cb88-2"><a href="raw-ms-data.html#cb88-2" aria-hidden="true" tabindex="-1"></a><span class="fu">length</span>(sps_sub)</span></code></pre></div>
-<pre><code>## [1] 50</code></pre>
-<div class="sourceCode" id="cb90"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb90-1"><a href="raw-ms-data.html#cb90-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"magrittr"</span>)</span>
-<span id="cb90-2"><a href="raw-ms-data.html#cb90-2" aria-hidden="true" tabindex="-1"></a>sp_sciex <span class="sc">%&gt;%</span></span>
-<span id="cb90-3"><a href="raw-ms-data.html#cb90-3" aria-hidden="true" tabindex="-1"></a>    <span class="fu">filterDataOrigin</span>(fls[<span class="dv">2</span>]) <span class="sc">%&gt;%</span></span>
-<span id="cb90-4"><a href="raw-ms-data.html#cb90-4" aria-hidden="true" tabindex="-1"></a>    <span class="fu">filterRt</span>(<span class="fu">c</span>(<span class="dv">175</span>, <span class="dv">189</span>))</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 50 spectra in a MsBackendMzR backend:
-##       msLevel     rtime scanIndex
-##     &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1           1   175.212       628
-## 2           1   175.491       629
-## 3           1   175.770       630
-## 4           1   176.049       631
-## 5           1   176.328       632
-## ...       ...       ...       ...
-## 46          1   187.768       673
-## 47          1   188.047       674
-## 48          1   188.326       675
-## 49          1   188.605       676
-## 50          1   188.884       677
-##  ... 33 more variables/columns.
-## 
-## file(s):
-## 20171016_POOL_POS_3_105-134.mzML
-## Processing:
-##  Filter: select data origin(s) /home/lgatto/R/x86_64-pc-linux-gnu-library/4.1/msdata/sciex/20171016_POOL_POS_3_105-134.mzML [Tue Aug 31 11:35:51 2021]
-##  Filter: select retention time [175..189] on MS level(s) 1 [Tue Aug 31 11:35:51 2021]</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<p>As an example of data processing, below we use the <code>pickPeaks()</code>
-function to pick peaks:</p>
-<div class="sourceCode" id="cb92"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb92-1"><a href="raw-ms-data.html#cb92-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plotSpectra</span>(sp[<span class="dv">2807</span>], <span class="at">xlim =</span> <span class="fu">c</span>(<span class="fl">521.2</span>, <span class="fl">522.5</span>))</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-24-1.png" width="672"></p>
-<div class="sourceCode" id="cb93"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb93-1"><a href="raw-ms-data.html#cb93-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"magrittr"</span>)</span>
-<span id="cb93-2"><a href="raw-ms-data.html#cb93-2" aria-hidden="true" tabindex="-1"></a><span class="fu">pickPeaks</span>(sp[<span class="dv">2807</span>]) <span class="sc">%&gt;%</span></span>
-<span id="cb93-3"><a href="raw-ms-data.html#cb93-3" aria-hidden="true" tabindex="-1"></a>    <span class="fu">filterIntensity</span>(<span class="fl">1e7</span>) <span class="sc">%&gt;%</span></span>
-<span id="cb93-4"><a href="raw-ms-data.html#cb93-4" aria-hidden="true" tabindex="-1"></a>    <span class="fu">plotSpectra</span>(<span class="at">xlim =</span> <span class="fu">c</span>(<span class="fl">521.2</span>, <span class="fl">522.5</span>))</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-25-1.png" width="672"></p>
-</div>
-<div id="a-note-on-efficiency" class="section level2" number="3.5">
-<h2>
-<span class="header-section-number">3.5</span> A note on efficiency<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('a-note-on-efficiency')" onmouseout="reset_tooltip('a-note-on-efficiency-tooltip')"><span class="tooltiptext" id="a-note-on-efficiency-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<div id="backends-1" class="section level3" number="3.5.1">
-<h3>
-<span class="header-section-number">3.5.1</span> Backends<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('backends-1')" onmouseout="reset_tooltip('backends-1-tooltip')"><span class="tooltiptext" id="backends-1-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<div class="figure" style="text-align: center">
-<span style="display:block;" id="fig:unnamed-chunk-26"></span>
-<p class="caption marginnote shownote">
-Figure 3.2: (a) Reading time (triplicates, in seconds) and (b) data size in memory (in MB) to read/store 1, 5, and 10 files containing 1431 MS1 (on-disk only) and 6103 MS2 (on-disk and in-memory) spectra. (c) Filtering benchmark assessed over 10 interactions on in-memory and on-disk data containing 6103 MS2 spectra. (d) Access time to spectra for the in-memory (left) and on-disk (right) backends for 1, 10, 100 1000, 5000, and all 6103 spectra. Benchmarks were performed on a Dell XPS laptop with an Intel i5-8250U processor 1.60 GHz (4 cores, 8 threads), 7.5 GB RAM running Ubuntu 18.04.4 LTS 64-bit, and an SSD drive. The data used for the benchmarking are a TMT 4-plex experiment acquired on a LTQ Orbitrap Velos (Thermo Fisher Scientific) available in the msdata package . (Figure taken from <span class="citation">(<a href="#ref-Gatto:2020" role="doc-biblioref">Gatto, Gibb, and Rainer 2020</a>)</span>.
-</p>
-<img src="img/pr0c00313_0002.gif" alt="(a) Reading time (triplicates, in seconds) and (b) data size in memory (in MB) to read/store 1, 5, and 10 files containing 1431 MS1 (on-disk only) and 6103 MS2 (on-disk and in-memory) spectra. (c) Filtering benchmark assessed over 10 interactions on in-memory and on-disk data containing 6103 MS2 spectra. (d) Access time to spectra for the in-memory (left) and on-disk (right) backends for 1, 10, 100 1000, 5000, and all 6103 spectra. Benchmarks were performed on a Dell XPS laptop with an Intel i5-8250U processor 1.60 GHz (4 cores, 8 threads), 7.5 GB RAM running Ubuntu 18.04.4 LTS 64-bit, and an SSD drive. The data used for the benchmarking are a TMT 4-plex experiment acquired on a LTQ Orbitrap Velos (Thermo Fisher Scientific) available in the msdata package . (Figure taken from [@Gatto:2020]." width="70%">
-</div>
-</div>
-<div id="parallel-processing" class="section level3" number="3.5.2">
-<h3>
-<span class="header-section-number">3.5.2</span> Parallel processing<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('parallel-processing')" onmouseout="reset_tooltip('parallel-processing-tooltip')"><span class="tooltiptext" id="parallel-processing-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Most functions on <code>Spectra</code> support (and use) parallel processing out
-of the box. Peak data access and manipulation methods perform by
-default parallel processing on a per-file basis (i.e. using the
-dataStorage variable as splitting factor). Spectra uses
-<a href="https://bioconductor.org/packages/BiocParallel"><code>BiocParallel</code></a> for
-parallel processing and all functions use the default registered
-parallel processing setup of that package.</p>
-</div>
-<div id="lazy-evaluation" class="section level3" number="3.5.3">
-<h3>
-<span class="header-section-number">3.5.3</span> Lazy evaluation<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('lazy-evaluation')" onmouseout="reset_tooltip('lazy-evaluation-tooltip')"><span class="tooltiptext" id="lazy-evaluation-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Data manipulations on Spectra objects are not immediately applied to
-the peak data. They are added to a so called processing queue which is
-applied each time peak data is accessed (with the <code>peaksData</code>, <code>mz</code> or
-<code>intensity</code> functions). Thanks to this processing queue data
-manipulation operations are also possible for read-only backends
-(e.g. mzML-file based backends or database-based backends). The
-information about the number of such processing steps can be seen
-below (next to Lazy evaluation queue).</p>
-<div class="sourceCode" id="cb94"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb94-1"><a href="raw-ms-data.html#cb94-1" aria-hidden="true" tabindex="-1"></a><span class="fu">min</span>(<span class="fu">intensity</span>(sp_sciex[<span class="dv">1</span>]))</span></code></pre></div>
-<pre><code>## [1] 0</code></pre>
-<div class="sourceCode" id="cb96"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb96-1"><a href="raw-ms-data.html#cb96-1" aria-hidden="true" tabindex="-1"></a>sp_sciex <span class="ot">&lt;-</span> <span class="fu">filterIntensity</span>(sp_sciex, <span class="at">intensity =</span> <span class="fu">c</span>(<span class="dv">10</span>, <span class="cn">Inf</span>))</span>
-<span id="cb96-2"><a href="raw-ms-data.html#cb96-2" aria-hidden="true" tabindex="-1"></a>sp_sciex <span class="do">## Note the lazy evaluation queue</span></span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 1862 spectra in a MsBackendMzR backend:
-##        msLevel     rtime scanIndex
-##      &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1            1     0.280         1
-## 2            1     0.559         2
-## 3            1     0.838         3
-## 4            1     1.117         4
-## 5            1     1.396         5
-## ...        ...       ...       ...
-## 1858         1   258.636       927
-## 1859         1   258.915       928
-## 1860         1   259.194       929
-## 1861         1   259.473       930
-## 1862         1   259.752       931
-##  ... 33 more variables/columns.
-## 
-## file(s):
-## 20171016_POOL_POS_1_105-134.mzML
-## 20171016_POOL_POS_3_105-134.mzML
-## Lazy evaluation queue: 1 processing step(s)
-## Processing:
-##  Remove peaks with intensities outside [10, Inf] in spectra of MS level(s) 1. [Tue Aug 31 11:35:51 2021]</code></pre>
-<div class="sourceCode" id="cb98"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb98-1"><a href="raw-ms-data.html#cb98-1" aria-hidden="true" tabindex="-1"></a><span class="fu">min</span>(<span class="fu">intensity</span>(sp_sciex[<span class="dv">1</span>]))</span></code></pre></div>
-<pre><code>## [1] 412</code></pre>
-<div class="sourceCode" id="cb100"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb100-1"><a href="raw-ms-data.html#cb100-1" aria-hidden="true" tabindex="-1"></a>sp_sciex<span class="sc">@</span>processingQueue</span></code></pre></div>
-<pre><code>## [[1]]
-## Object of class "ProcessingStep"
-##  Function: user-provided function
-##  Arguments:
-##   o intensity = 10Inf
-##   o msLevel = 1</code></pre>
-<div class="sourceCode" id="cb102"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb102-1"><a href="raw-ms-data.html#cb102-1" aria-hidden="true" tabindex="-1"></a>sp_sciex <span class="ot">&lt;-</span> <span class="fu">reset</span>(sp_sciex)</span>
-<span id="cb102-2"><a href="raw-ms-data.html#cb102-2" aria-hidden="true" tabindex="-1"></a>sp_sciex<span class="sc">@</span>processingQueue</span></code></pre></div>
-<pre><code>## list()</code></pre>
-<div class="sourceCode" id="cb104"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb104-1"><a href="raw-ms-data.html#cb104-1" aria-hidden="true" tabindex="-1"></a><span class="fu">min</span>(<span class="fu">intensity</span>(sp_sciex[<span class="dv">1</span>]))</span></code></pre></div>
-<pre><code>## [1] 0</code></pre>
-
-</div>
-</div>
-</div>
-<h3>References<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('NA')" onmouseout="reset_tooltip('NA-tooltip')"><span class="tooltiptext" id="NA-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<div id="refs" class="references csl-bib-body hanging-indent">
-<div id="ref-Gatto:2020" class="csl-entry">
-Gatto, Laurent, Sebastian Gibb, and Johannes Rainer. 2020. <span>“<span>MSnbase</span>, Efficient and Elegant r-Based Processing and Visualisation of Raw Mass Spectrometry Data.”</span> <em>J. Proteome Res.</em>, September.
-</div>
-</div>
-</body></html>
-
-<p style="text-align: center;">
-<a href="sec:msintro.html"><button class="btn btn-default">Previous</button></a>
-<a href="identification-data.html"><button class="btn btn-default">Next</button></a>
-</p>
-<p class="build-date">Page built: 
-2021-08-31
- using 
-R version 4.1.0 (2021-05-18)
-</p>
-</div>
-</div>
-
-
-
-</body>
-</html>
diff --git a/docs/reference-keys.txt b/docs/reference-keys.txt
deleted file mode 100644
index 99b6fa7..0000000
--- a/docs/reference-keys.txt
+++ /dev/null
@@ -1,89 +0,0 @@
-fig:sticker
-fig:unnamed-chunk-2
-fig:unnamed-chunk-3
-fig:unnamed-chunk-4
-fig:unnamed-chunk-5
-fig:unnamed-chunk-6
-fig:unnamed-chunk-7
-fig:unnamed-chunk-8
-fig:unnamed-chunk-23
-fig:answid1
-fig:sc
-fig:itraq
-fig:lf
-fig:silab
-fig:sefig
-fig:featuresplot
-fig:cptac
-fig:imagena
-fig:miximp
-fig:lazar
-fig:nasetdist
-fig:plotdens
-fig:plotpca
-fig:vis
-fig:vp
-fig:unnamed-chunk-76
-preamble
-sec-msintro
-how-does-mass-spectrometry-work
-accessing-data
-sec-raw
-what-is-raw-data-in-r
-the-spectra-class
-spectra-from-mzml-files
-backends
-visualisation-of-raw-ms-data
-raw-data-processing-and-manipulation
-a-note-on-efficiency
-backends-1
-parallel-processing
-lazy-evaluation
-sec-id
-identification-data.frame
-keeping-all-matches
-filtering-data
-adding-identification-data-to-raw-data
-an-identification-annotated-chromatogram
-visualising-peptide-spectrum-matches
-comparing-spectra
-summary-exercise
-exploration-and-assessment-of-identifications-using-msnid
-step-by-step-work-flow
-analysis-of-peptide-sequences
-trimming-the-data
-parent-ion-mass-errors
-filtering-criteria
-setting-filters
-filter-optimisation
-export-msnid-data
-sec-quant
-quantitation-methodologies
-label-free-ms2-spectral-counting
-labelled-ms2-isobaric-tagging
-label-free-ms1-extracted-ion-chromatograms
-labelled-ms1-silac
-sec-qf
-the-qfeatures-class
-feature-aggregation
-subsetting-and-filtering
-creating-qfeatures-object
-analysis-pipeline
-missing-values
-imputation
-identification-quality-control
-creating-the-qfeatures-data
-filtering-out-contaminants-and-reverse-hits
-log-transformation-and-normalisation
-aggregation
-principal-component-analysis
-visualisation
-statistical-analysis
-summary-exercice
-sec-anx
-sec-raw2
-sec-id2
-sec-si
-additional-materials
-questions-and-help
-session-information
diff --git a/docs/sec-anx.html b/docs/sec-anx.html
deleted file mode 100644
index c8e96b8..0000000
--- a/docs/sec-anx.html
+++ /dev/null
@@ -1,531 +0,0 @@
-<!DOCTYPE html>
-<html lang="" xml:lang="">
-<head>
-
-<meta charset="utf-8" />
-<meta name="generator" content="pandoc" />
-<meta name="viewport" content="width=device-width, initial-scale=1" />
-<meta property="og:title" content="Chapter 6 Annex | R for Mass Spectrometry" />
-<meta property="og:type" content="book" />
-
-
-
-
-<meta name="author" content="Laurent Gatto, Sebastian Gibb, Johannes Rainer" />
-
-<meta name="date" content="2023-09-06" />
-
-
-<meta name="description" content="Chapter 6 Annex | R for Mass Spectrometry">
-
-<title>Chapter 6 Annex | R for Mass Spectrometry</title>
-
-<link href="libs/tufte-css-2015.12.29/tufte.css" rel="stylesheet" />
-<link href="libs/tufte-css-2015.12.29/envisioned.css" rel="stylesheet" />
-<link href="libs/msmb-css-0/msmb.css" rel="stylesheet" />
-<script>
-function toggle_visibility(id1, id2) {
-var e = document.getElementById(id1);
-var f = document.getElementById(id2);
-
-e.style.display = ((e.style.display!='none') ? 'none' : 'block');
-
-if(f.classList.contains('fa-plus-square')) {
-    f.classList.add('fa-minus-square')
-    f.classList.remove('fa-plus-square')
-} else {
-    f.classList.add('fa-plus-square')
-    f.classList.remove('fa-minus-square')
-}
-
-}
-</script>
-<script>
-function copy_link(id) {
-  var dummy = document.createElement('input'),
-  text = window.location.href.split(/[?#]/)[0] + '#' + id;
-  document.body.appendChild(dummy);
-  dummy.value = text;
-  dummy.select();
-  document.execCommand('copy');
-  document.body.removeChild(dummy);
-  
-  var tooltip = document.getElementById(id + '-tooltip');
-  tooltip.innerHTML = 'Copied!';
-}
-
-function reset_tooltip(id) {
-  var tooltip = document.getElementById(id);
-  tooltip.innerHTML = 'Copy link';
-}
-</script>
-
-
-<style type="text/css">code{white-space: pre;}</style>
-<style type="text/css">
-pre > code.sourceCode { white-space: pre; position: relative; }
-pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
-pre > code.sourceCode > span:empty { height: 1.2em; }
-
-code.sourceCode > span { color: inherit; text-decoration: inherit; }
-div.sourceCode { margin: 1em 0; }
-pre.sourceCode { margin: 0; }
-@media screen {
-div.sourceCode { overflow: auto; }
-}
-@media print {
-pre > code.sourceCode { white-space: pre-wrap; }
-pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
-}
-pre.numberSource code
-  { counter-reset: source-line 0; }
-pre.numberSource code > span
-  { position: relative; left: -4em; counter-increment: source-line; }
-pre.numberSource code > span > a:first-child::before
-  { content: counter(source-line);
-    position: relative; left: -1em; text-align: right; vertical-align: baseline;
-    border: none; display: inline-block;
-    -webkit-touch-callout: none; -webkit-user-select: none;
-    -khtml-user-select: none; -moz-user-select: none;
-    -ms-user-select: none; user-select: none;
-    padding: 0 4px; width: 4em;
-    color: #aaaaaa;
-  }
-pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
-div.sourceCode
-  {  background-color: #f8f8f8; }
-@media screen {
-pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
-}
-code span.al { color: #ef2929; } /* Alert */
-code span.an { color: #8f5902; font-weight: bold; font-style: italic; } /* Annotation */
-code span.at { color: #204a87; } /* Attribute */
-code span.bn { color: #0000cf; } /* BaseN */
-code span.cf { color: #204a87; font-weight: bold; } /* ControlFlow */
-code span.ch { color: #4e9a06; } /* Char */
-code span.cn { color: #8f5902; } /* Constant */
-code span.co { color: #8f5902; font-style: italic; } /* Comment */
-code span.cv { color: #8f5902; font-weight: bold; font-style: italic; } /* CommentVar */
-code span.do { color: #8f5902; font-weight: bold; font-style: italic; } /* Documentation */
-code span.dt { color: #204a87; } /* DataType */
-code span.dv { color: #0000cf; } /* DecVal */
-code span.er { color: #a40000; font-weight: bold; } /* Error */
-code span.ex { } /* Extension */
-code span.fl { color: #0000cf; } /* Float */
-code span.fu { color: #204a87; font-weight: bold; } /* Function */
-code span.im { } /* Import */
-code span.in { color: #8f5902; font-weight: bold; font-style: italic; } /* Information */
-code span.kw { color: #204a87; font-weight: bold; } /* Keyword */
-code span.op { color: #ce5c00; font-weight: bold; } /* Operator */
-code span.ot { color: #8f5902; } /* Other */
-code span.pp { color: #8f5902; font-style: italic; } /* Preprocessor */
-code span.sc { color: #ce5c00; font-weight: bold; } /* SpecialChar */
-code span.ss { color: #4e9a06; } /* SpecialString */
-code span.st { color: #4e9a06; } /* String */
-code span.va { color: #000000; } /* Variable */
-code span.vs { color: #4e9a06; } /* VerbatimString */
-code span.wa { color: #8f5902; font-weight: bold; font-style: italic; } /* Warning */
-</style>
-
-
-<style type="text/css">
-/* Used with Pandoc 2.11+ new --citeproc when CSL is used */
-div.csl-bib-body { }
-div.csl-entry {
-  clear: both;
-}
-.hanging div.csl-entry {
-  margin-left:2em;
-  text-indent:-2em;
-}
-div.csl-left-margin {
-  min-width:2em;
-  float:left;
-}
-div.csl-right-inline {
-  margin-left:2em;
-  padding-left:1em;
-}
-div.csl-indent {
-  margin-left: 2em;
-}
-</style>
-
-<link rel="stylesheet" href="style.css" type="text/css" />
-
-</head>
-
-<body>
-
-
-
-<div class="row">
-<div class="col-sm-12">
-<div id="TOC">
-<ul class="navbar">
-<li class="msmb"><p class="title">R for Mass Spectrometry<p><p class="author">Laurent Gatto, Sebastian Gibb, Johannes Rainer</p>
-<li class="dropdown" style="float:right">
-<a href="javascript:void(0)" class="dropbtn">&#x25BE; Chapters</a>
-<div class="dropdown-content">
-<a href="index.html" id="toc-preamble"><span class="toc-section-number">1</span> Preamble</a>
-<a href="sec-msintro.html" id="toc-sec-msintro"><span class="toc-section-number">2</span> Introduction</a>
-<a href="sec-raw.html" id="toc-sec-raw"><span class="toc-section-number">3</span> Raw MS data</a>
-<a href="sec-id.html" id="toc-sec-id"><span class="toc-section-number">4</span> Identification data</a>
-<a href="sec-quant.html" id="toc-sec-quant"><span class="toc-section-number">5</span> Quantitative data</a>
-<a id="active-page" href="sec-anx.html" id="toc-sec-anx"><span class="toc-section-number">6</span> Annex</a><ul class="toc-sections">
-<li class="toc"><a href="#sec-raw2"> Raw MS data under the hood: the <code>mzR</code> package</a></li>
-<li class="toc"><a href="#sec-id2"> PSM data under the hood</a></li>
-</ul>
-<a href="sec-si.html" id="toc-sec-si"><span class="toc-section-number">7</span> Additional materials and session information</a>
-</div>
-</li>
-</ul>
-</div>
-</div>
-</div>
-<div class="row">
-<div class="col-sm-12">
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
-<html><body><div id="sec-anx" class="section level1" number="6">
-<h1>
-<span class="header-section-number">Chapter 6</span> Annex</h1>
-<div id="sec-raw2" class="section level2" number="6.1">
-<h2>
-<span class="header-section-number">6.1</span> Raw MS data under the hood: the <code>mzR</code> package<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('sec-raw2')" onmouseout="reset_tooltip('sec-raw2-tooltip')"><span class="tooltiptext" id="sec-raw2-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>The <code>mzR</code> package is a direct interface to the
-<a href="http://proteowizard.sourceforge.net/">proteowizard</a> code base. It
-includes a substantial proportion of <em>pwiz</em>’s C/C++ code for fast and
-efficient parsing of these large raw data files.</p>
-<p>Let’s start by using some raw data files from the <code>msdata</code>
-package. After loading it, we use the <code>proteomics()</code> function to
-return the full file names for two raw data files. We will start by
-focusing on the second one.</p>
-<div class="sourceCode" id="cb422"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb422-1"><a href="sec-anx.html#cb422-1" tabindex="-1"></a>f <span class="ot">&lt;-</span> msdata<span class="sc">::</span><span class="fu">proteomics</span>(<span class="at">full.names =</span> <span class="cn">TRUE</span>)</span>
-<span id="cb422-2"><a href="sec-anx.html#cb422-2" tabindex="-1"></a>f</span></code></pre></div>
-<pre><code>## [1] "/home/lgatto/disk/R/x86_64-pc-linux-gnu-library/4.3/msdata/proteomics/MRM-standmix-5.mzML.gz"                                                
-## [2] "/home/lgatto/disk/R/x86_64-pc-linux-gnu-library/4.3/msdata/proteomics/MS3TMT10_01022016_32917-33481.mzML.gz"                                 
-## [3] "/home/lgatto/disk/R/x86_64-pc-linux-gnu-library/4.3/msdata/proteomics/MS3TMT11.mzML"                                                         
-## [4] "/home/lgatto/disk/R/x86_64-pc-linux-gnu-library/4.3/msdata/proteomics/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML.gz"
-## [5] "/home/lgatto/disk/R/x86_64-pc-linux-gnu-library/4.3/msdata/proteomics/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.mzML.gz"</code></pre>
-<div class="sourceCode" id="cb424"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb424-1"><a href="sec-anx.html#cb424-1" tabindex="-1"></a>(f2 <span class="ot">&lt;-</span> <span class="fu">grep</span>(<span class="st">"20141210"</span>, f, <span class="at">value =</span> <span class="cn">TRUE</span>))</span></code></pre></div>
-<pre><code>## [1] "/home/lgatto/disk/R/x86_64-pc-linux-gnu-library/4.3/msdata/proteomics/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML.gz"</code></pre>
-<p>The three main functions of <code>mzR</code> are</p>
-<ul>
-<li>
-<code>openMSfile</code> to create a file handle to a raw data file</li>
-<li>
-<code>header</code> to extract metadata about the spectra contained in the file</li>
-<li>
-<code>peaks</code> to extract one or multiple spectra of interest.</li>
-</ul>
-<p>Other functions such as <code>instrumentInfo</code>, or <code>runInfo</code> can be used to
-gather general information about a run.</p>
-<div class="sourceCode" id="cb426"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb426-1"><a href="sec-anx.html#cb426-1" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"mzR"</span>)</span>
-<span id="cb426-2"><a href="sec-anx.html#cb426-2" tabindex="-1"></a>ms <span class="ot">&lt;-</span> <span class="fu">openMSfile</span>(f2)</span>
-<span id="cb426-3"><a href="sec-anx.html#cb426-3" tabindex="-1"></a>ms</span></code></pre></div>
-<pre><code>## Mass Spectrometry file handle.
-## Filename:  TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML.gz 
-## Number of scans:  7534</code></pre>
-<div class="sourceCode" id="cb428"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb428-1"><a href="sec-anx.html#cb428-1" tabindex="-1"></a>hd <span class="ot">&lt;-</span> <span class="fu">header</span>(ms)</span>
-<span id="cb428-2"><a href="sec-anx.html#cb428-2" tabindex="-1"></a><span class="fu">dim</span>(hd)</span></code></pre></div>
-<pre><code>## [1] 7534   31</code></pre>
-<div class="sourceCode" id="cb430"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb430-1"><a href="sec-anx.html#cb430-1" tabindex="-1"></a><span class="fu">names</span>(hd)</span></code></pre></div>
-<pre><code>##  [1] "seqNum"                     "acquisitionNum"            
-##  [3] "msLevel"                    "polarity"                  
-##  [5] "peaksCount"                 "totIonCurrent"             
-##  [7] "retentionTime"              "basePeakMZ"                
-##  [9] "basePeakIntensity"          "collisionEnergy"           
-## [11] "ionisationEnergy"           "lowMZ"                     
-## [13] "highMZ"                     "precursorScanNum"          
-## [15] "precursorMZ"                "precursorCharge"           
-## [17] "precursorIntensity"         "mergedScan"                
-## [19] "mergedResultScanNum"        "mergedResultStartScanNum"  
-## [21] "mergedResultEndScanNum"     "injectionTime"             
-## [23] "filterString"               "spectrumId"                
-## [25] "centroided"                 "ionMobilityDriftTime"      
-## [27] "isolationWindowTargetMZ"    "isolationWindowLowerOffset"
-## [29] "isolationWindowUpperOffset" "scanWindowLowerLimit"      
-## [31] "scanWindowUpperLimit"</code></pre>
-<div class="sourceCode" id="cb432"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb432-1"><a href="sec-anx.html#cb432-1" tabindex="-1"></a><span class="fu">head</span>(<span class="fu">peaks</span>(ms, <span class="dv">117</span>))</span></code></pre></div>
-<pre><code>##            mz intensity
-## [1,] 399.9976         0
-## [2,] 399.9991         0
-## [3,] 400.0006         0
-## [4,] 400.0021         0
-## [5,] 400.2955         0
-## [6,] 400.2970         0</code></pre>
-<div class="sourceCode" id="cb434"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb434-1"><a href="sec-anx.html#cb434-1" tabindex="-1"></a><span class="fu">str</span>(<span class="fu">peaks</span>(ms, <span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>))</span></code></pre></div>
-<pre><code>## List of 5
-##  $ : num [1:25800, 1:2] 400 400 400 400 400 ...
-##   ..- attr(*, "dimnames")=List of 2
-##   .. ..$ : NULL
-##   .. ..$ : chr [1:2] "mz" "intensity"
-##  $ : num [1:25934, 1:2] 400 400 400 400 400 ...
-##   ..- attr(*, "dimnames")=List of 2
-##   .. ..$ : NULL
-##   .. ..$ : chr [1:2] "mz" "intensity"
-##  $ : num [1:26148, 1:2] 400 400 400 400 400 ...
-##   ..- attr(*, "dimnames")=List of 2
-##   .. ..$ : NULL
-##   .. ..$ : chr [1:2] "mz" "intensity"
-##  $ : num [1:26330, 1:2] 400 400 400 400 400 ...
-##   ..- attr(*, "dimnames")=List of 2
-##   .. ..$ : NULL
-##   .. ..$ : chr [1:2] "mz" "intensity"
-##  $ : num [1:26463, 1:2] 400 400 400 400 400 ...
-##   ..- attr(*, "dimnames")=List of 2
-##   .. ..$ : NULL
-##   .. ..$ : chr [1:2] "mz" "intensity"</code></pre>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Let’s extract the index of the MS2 spectrum with the highest base peak
-intensity and plot its spectrum. Is the data centroided or in profile
-mode?</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-39" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-39', 'sol-start-39')"></span>
-</p>
-<div id="sol-body-39" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb436"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb436-1"><a href="sec-anx.html#cb436-1" tabindex="-1"></a>hd2 <span class="ot">&lt;-</span> hd[hd<span class="sc">$</span>msLevel <span class="sc">==</span> <span class="dv">2</span>, ]</span>
-<span id="cb436-2"><a href="sec-anx.html#cb436-2" tabindex="-1"></a>i <span class="ot">&lt;-</span> <span class="fu">which.max</span>(hd2<span class="sc">$</span>basePeakIntensity)</span>
-<span id="cb436-3"><a href="sec-anx.html#cb436-3" tabindex="-1"></a>hd2[i, ]</span></code></pre></div>
-<pre><code>##      seqNum acquisitionNum msLevel polarity peaksCount totIonCurrent
-## 5404   5404           5404       2        1        275    2283283712
-##      retentionTime basePeakMZ basePeakIntensity collisionEnergy
-## 5404      2751.313   859.5032         354288224              45
-##      ionisationEnergy    lowMZ  highMZ precursorScanNum precursorMZ
-## 5404                0 100.5031 1995.63             5403    859.1722
-##      precursorCharge precursorIntensity mergedScan mergedResultScanNum
-## 5404               3          627820480         NA                  NA
-##      mergedResultStartScanNum mergedResultEndScanNum injectionTime
-## 5404                       NA                     NA    0.03474091
-##                                                  filterString
-## 5404 FTMS + p NSI d Full ms2 859.50@hcd45.00 [100.00-2000.00]
-##                                         spectrumId centroided
-## 5404 controllerType=0 controllerNumber=1 scan=5404       TRUE
-##      ionMobilityDriftTime isolationWindowTargetMZ isolationWindowLowerOffset
-## 5404                   NA                   859.5                          1
-##      isolationWindowUpperOffset scanWindowLowerLimit scanWindowUpperLimit
-## 5404                          1                  100                 2000</code></pre>
-<div class="sourceCode" id="cb438"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb438-1"><a href="sec-anx.html#cb438-1" tabindex="-1"></a>pi <span class="ot">&lt;-</span> <span class="fu">peaks</span>(ms, hd2[i, <span class="dv">1</span>])</span>
-<span id="cb438-2"><a href="sec-anx.html#cb438-2" tabindex="-1"></a><span class="fu">plot</span>(pi, <span class="at">type =</span> <span class="st">"h"</span>)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/ex_raw-1.png" width="672" style="display: block; margin: auto;"></p>
-<div class="sourceCode" id="cb439"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb439-1"><a href="sec-anx.html#cb439-1" tabindex="-1"></a>mz <span class="ot">&lt;-</span> hd2[i, <span class="st">"basePeakMZ"</span>]</span>
-<span id="cb439-2"><a href="sec-anx.html#cb439-2" tabindex="-1"></a><span class="fu">plot</span>(pi, <span class="at">type =</span> <span class="st">"h"</span>, <span class="at">xlim =</span> <span class="fu">c</span>(mz <span class="sc">-</span> <span class="fl">0.5</span>, mz <span class="sc">+</span> <span class="fl">0.5</span>))</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/ex_raw-2.png" width="672" style="display: block; margin: auto;"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Pick an MS1 spectrum and visually check whether it is centroided or in
-profile mode.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-40" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-40', 'sol-start-40')"></span>
-</p>
-<div id="sol-body-40" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb440"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb440-1"><a href="sec-anx.html#cb440-1" tabindex="-1"></a><span class="do">## Zooming into spectrum 300 (an MS1 spectrum).</span></span>
-<span id="cb440-2"><a href="sec-anx.html#cb440-2" tabindex="-1"></a>j <span class="ot">&lt;-</span> <span class="dv">300</span></span>
-<span id="cb440-3"><a href="sec-anx.html#cb440-3" tabindex="-1"></a>pj <span class="ot">&lt;-</span> <span class="fu">peaks</span>(ms, j)</span>
-<span id="cb440-4"><a href="sec-anx.html#cb440-4" tabindex="-1"></a><span class="fu">plot</span>(pj, <span class="at">type =</span> <span class="st">"l"</span>)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/ex_raw2-1.png" width="672"></p>
-<div class="sourceCode" id="cb441"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb441-1"><a href="sec-anx.html#cb441-1" tabindex="-1"></a>mz <span class="ot">&lt;-</span> hd[j, <span class="st">"basePeakMZ"</span>]</span>
-<span id="cb441-2"><a href="sec-anx.html#cb441-2" tabindex="-1"></a><span class="fu">plot</span>(pj, <span class="at">type =</span> <span class="st">"l"</span>, <span class="at">xlim =</span> <span class="fu">c</span>(mz <span class="sc">-</span> <span class="fl">0.5</span>, mz <span class="sc">+</span> <span class="fl">0.5</span>))</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/ex_raw2-2.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-</div>
-<div id="sec-id2" class="section level2" number="6.2">
-<h2>
-<span class="header-section-number">6.2</span> PSM data under the hood<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('sec-id2')" onmouseout="reset_tooltip('sec-id2-tooltip')"><span class="tooltiptext" id="sec-id2-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>There are two packages that can be used to parse <code>mzIdentML</code> files,
-namely <code>mzR</code> (that we have already used for raw data) and <code>mzID</code>. The
-major difference is that the former leverages C++ code from
-<code>proteowizard</code> and is hence faster than the latter (which uses the
-<code>XML</code> R package). They both work in similar ways.</p>
-<pre><code>|Data type      |File format |Data structure |Package |
-|:--------------|:-----------|:--------------|:-------|
-|Identification |mzIdentML   |mzRident       |mzR     |
-|Identification |mzIdentML   |mzID           |mzID    |</code></pre>
-<p>Which of these packages is used by <code>PSM()</code> can be defined by the
-<code>parser</code> argument, as documented in <code>?PSM</code>.</p>
-<div id="mzid" class="section level3 unnumbered">
-<h3>
-<code>mzID</code><div class="tooltip"><button class="internal-link-btn" onclick="copy_link('mzid')" onmouseout="reset_tooltip('mzid-tooltip')"><span class="tooltiptext" id="mzid-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>The main functions are <code>mzID</code> to read the data into a dedicated data
-class and <code>flatten</code> to transform it into a <code>data.frame</code>.</p>
-<div class="sourceCode" id="cb443"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb443-1"><a href="sec-anx.html#cb443-1" tabindex="-1"></a>idf</span></code></pre></div>
-<pre><code>## [1] "/home/lgatto/disk/R/x86_64-pc-linux-gnu-library/4.3/msdata/ident/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzid"</code></pre>
-<div class="sourceCode" id="cb445"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb445-1"><a href="sec-anx.html#cb445-1" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"mzID"</span>)</span></code></pre></div>
-<pre><code>## 
-## Attaching package: 'mzID'</code></pre>
-<pre><code>## The following object is masked from 'package:purrr':
-## 
-##     flatten</code></pre>
-<pre><code>## The following object is masked from 'package:dplyr':
-## 
-##     id</code></pre>
-<div class="sourceCode" id="cb449"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb449-1"><a href="sec-anx.html#cb449-1" tabindex="-1"></a>id <span class="ot">&lt;-</span> <span class="fu">mzID</span>(idf)</span></code></pre></div>
-<pre><code>## reading TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzid... DONE!</code></pre>
-<div class="sourceCode" id="cb451"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb451-1"><a href="sec-anx.html#cb451-1" tabindex="-1"></a>id</span></code></pre></div>
-<pre><code>## An mzID object
-## 
-## Software used:   MS-GF+ (version: Beta (v10072))
-## 
-## Rawfile:         /home/lg390/dev/01_svn/workflows/proteomics/TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML
-## 
-## Database:        /home/lg390/dev/01_svn/workflows/proteomics/erwinia_carotovora.fasta
-## 
-## Number of scans: 5343
-## Number of PSM's: 5656</code></pre>
-<p>Various data can be extracted from the <code>mzID</code> object, using one of the
-accessor functions such as <code>database</code>, <code>software</code>, <code>scans</code>, <code>peptides</code>,
-… The object can also be converted into a <code>data.frame</code> using the
-<code>flatten</code> function.</p>
-<div class="sourceCode" id="cb453"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb453-1"><a href="sec-anx.html#cb453-1" tabindex="-1"></a><span class="fu">head</span>(<span class="fu">flatten</span>(id))</span></code></pre></div>
-<pre><code>##                                      spectrumid scan number(s) acquisitionnum
-## 1 controllerType=0 controllerNumber=1 scan=5782           5782           5782
-## 2 controllerType=0 controllerNumber=1 scan=6037           6037           6037
-## 3 controllerType=0 controllerNumber=1 scan=5235           5235           5235
-##   passthreshold rank calculatedmasstocharge experimentalmasstocharge
-## 1          TRUE    1               1080.232                 1080.233
-## 2          TRUE    1               1002.212                 1002.209
-## 3          TRUE    1               1189.280                 1189.284
-##   chargestate ms-gf:denovoscore ms-gf:evalue ms-gf:pepqvalue ms-gf:qvalue
-## 1           3               174 1.086033e-20               0            0
-## 2           3               245 1.988774e-19               0            0
-## 3           3               264 5.129649e-19               0            0
-##   ms-gf:rawscore ms-gf:specevalue assumeddissociationmethod isotopeerror
-## 1            147     3.764831e-27                       HCD            0
-## 2            214     6.902626e-26                       HCD            0
-## 3            211     1.778789e-25                       HCD            0
-##   isdecoy post pre end start accession length
-## 1   FALSE    S   R  84    50   ECA1932    155
-## 2   FALSE    R   K 315   288   ECA1147    434
-## 3   FALSE    A   R 224   192   ECA0013    295
-##                          description                              pepseq
-## 1         outer membrane lipoprotein PVQIQAGEDSNVIGALGGAVLGGFLGNTIGGGSGR
-## 2                     trigger factor        TQVLDGLINANDIEVPVALIDGEIDVLR
-## 3 ribose-binding periplasmic protein   TKGLNVMQNLLTAHPDVQAVFAQNDEMALGALR
-##   modified modification
-## 1    FALSE         &lt;NA&gt;
-## 2    FALSE         &lt;NA&gt;
-## 3    FALSE         &lt;NA&gt;
-##                                                                idFile
-## 1 TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzid
-## 2 TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzid
-## 3 TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzid
-##                                                          spectrumFile
-## 1 TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML
-## 2 TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML
-## 3 TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML
-##               databaseFile
-## 1 erwinia_carotovora.fasta
-## 2 erwinia_carotovora.fasta
-## 3 erwinia_carotovora.fasta
-##  [ reached 'max' / getOption("max.print") -- omitted 3 rows ]</code></pre>
-</div>
-<div id="mzr" class="section level3 unnumbered">
-<h3>
-<code>mzR</code><div class="tooltip"><button class="internal-link-btn" onclick="copy_link('mzr')" onmouseout="reset_tooltip('mzr-tooltip')"><span class="tooltiptext" id="mzr-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>The <code>mzR</code> interface provides a similar interface. It is however much
-faster as it does not read all the data into memory and only extracts
-relevant data on demand. It has also accessor functions such as
-<code>softwareInfo</code>, <code>mzidInfo</code>, … (use <code>showMethods(classes = "mzRident", where = "package:mzR")</code>)
-to see all available methods.</p>
-<div class="sourceCode" id="cb455"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb455-1"><a href="sec-anx.html#cb455-1" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"mzR"</span>)</span>
-<span id="cb455-2"><a href="sec-anx.html#cb455-2" tabindex="-1"></a>id2 <span class="ot">&lt;-</span> <span class="fu">openIDfile</span>(idf)</span>
-<span id="cb455-3"><a href="sec-anx.html#cb455-3" tabindex="-1"></a>id2</span></code></pre></div>
-<pre><code>## Identification file handle.
-## Filename:  TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzid 
-## Number of psms:  5759</code></pre>
-<div class="sourceCode" id="cb457"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb457-1"><a href="sec-anx.html#cb457-1" tabindex="-1"></a><span class="fu">softwareInfo</span>(id2)</span></code></pre></div>
-<pre><code>## [1] "MS-GF+ Beta (v10072) "                        
-## [2] "ProteoWizard MzIdentML 3.0.21263 ProteoWizard"</code></pre>
-<p>The identification data can be accessed as a <code>data.frame</code> with the
-<code>psms</code> accessor.</p>
-<div class="sourceCode" id="cb459"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb459-1"><a href="sec-anx.html#cb459-1" tabindex="-1"></a><span class="fu">head</span>(<span class="fu">psms</span>(id2))</span></code></pre></div>
-<pre><code>##                                      spectrumID chargeState rank passThreshold
-## 1 controllerType=0 controllerNumber=1 scan=5782           3    1          TRUE
-## 2 controllerType=0 controllerNumber=1 scan=6037           3    1          TRUE
-## 3 controllerType=0 controllerNumber=1 scan=5235           3    1          TRUE
-## 4 controllerType=0 controllerNumber=1 scan=5397           3    1          TRUE
-## 5 controllerType=0 controllerNumber=1 scan=6075           3    1          TRUE
-##   experimentalMassToCharge calculatedMassToCharge
-## 1                1080.2325              1080.2321
-## 2                1002.2089              1002.2115
-## 3                1189.2836              1189.2800
-## 4                 960.5365               960.5365
-## 5                1264.3409              1264.3419
-##                              sequence peptideRef modNum isDecoy post pre start
-## 1 PVQIQAGEDSNVIGALGGAVLGGFLGNTIGGGSGR       Pep1      0   FALSE    S   R    50
-## 2        TQVLDGLINANDIEVPVALIDGEIDVLR       Pep2      0   FALSE    R   K   288
-## 3   TKGLNVMQNLLTAHPDVQAVFAQNDEMALGALR       Pep3      0   FALSE    A   R   192
-## 4         SQILQQAGTSVLSQANQVPQTVLSLLR       Pep4      0   FALSE    -   R   264
-## 5 PIIGDNPFVVVLPDVVLDESTADQTQENLALLISR       Pep5      0   FALSE    F   R   119
-##   end DatabaseAccess DBseqLength DatabaseSeq
-## 1  84        ECA1932         155            
-## 2 315        ECA1147         434            
-## 3 224        ECA0013         295            
-## 4 290        ECA1731         290            
-## 5 153        ECA1443         298            
-##                                    DatabaseDescription scan.number.s.
-## 1                   ECA1932 outer membrane lipoprotein           5782
-## 2                               ECA1147 trigger factor           6037
-## 3           ECA0013 ribose-binding periplasmic protein           5235
-## 4                                    ECA1731 flagellin           5397
-## 5 ECA1443 UTP--glucose-1-phosphate uridylyltransferase           6075
-##   acquisitionNum
-## 1           5782
-## 2           6037
-## 3           5235
-## 4           5397
-## 5           6075
-##  [ reached 'max' / getOption("max.print") -- omitted 1 rows ]</code></pre>
-
-</div>
-</div>
-</div></body></html>
-
-<p style="text-align: center;">
-<a href="sec-quant.html"><button class="btn btn-default">Previous</button></a>
-<a href="sec-si.html"><button class="btn btn-default">Next</button></a>
-</p>
-<p class="build-date">Page built: 
-2023-09-06
- using 
-R version 4.3.1 Patched (2023-07-10 r84676)
-</p>
-</div>
-</div>
-
-
-
-</body>
-</html>
diff --git a/docs/sec-id.html b/docs/sec-id.html
deleted file mode 100644
index 7a0d6ae..0000000
--- a/docs/sec-id.html
+++ /dev/null
@@ -1,1735 +0,0 @@
-<!DOCTYPE html>
-<html lang="" xml:lang="">
-<head>
-
-<meta charset="utf-8" />
-<meta name="generator" content="pandoc" />
-<meta name="viewport" content="width=device-width, initial-scale=1" />
-<meta property="og:title" content="Chapter 4 Identification data | R for Mass Spectrometry" />
-<meta property="og:type" content="book" />
-
-
-
-
-<meta name="author" content="Laurent Gatto, Sebastian Gibb, Johannes Rainer" />
-
-<meta name="date" content="2023-09-06" />
-
-
-<meta name="description" content="Chapter 4 Identification data | R for Mass Spectrometry">
-
-<title>Chapter 4 Identification data | R for Mass Spectrometry</title>
-
-<link href="libs/tufte-css-2015.12.29/tufte.css" rel="stylesheet" />
-<link href="libs/tufte-css-2015.12.29/envisioned.css" rel="stylesheet" />
-<link href="libs/msmb-css-0/msmb.css" rel="stylesheet" />
-<script>
-function toggle_visibility(id1, id2) {
-var e = document.getElementById(id1);
-var f = document.getElementById(id2);
-
-e.style.display = ((e.style.display!='none') ? 'none' : 'block');
-
-if(f.classList.contains('fa-plus-square')) {
-    f.classList.add('fa-minus-square')
-    f.classList.remove('fa-plus-square')
-} else {
-    f.classList.add('fa-plus-square')
-    f.classList.remove('fa-minus-square')
-}
-
-}
-</script>
-<script>
-function copy_link(id) {
-  var dummy = document.createElement('input'),
-  text = window.location.href.split(/[?#]/)[0] + '#' + id;
-  document.body.appendChild(dummy);
-  dummy.value = text;
-  dummy.select();
-  document.execCommand('copy');
-  document.body.removeChild(dummy);
-  
-  var tooltip = document.getElementById(id + '-tooltip');
-  tooltip.innerHTML = 'Copied!';
-}
-
-function reset_tooltip(id) {
-  var tooltip = document.getElementById(id);
-  tooltip.innerHTML = 'Copy link';
-}
-</script>
-
-
-<style type="text/css">code{white-space: pre;}</style>
-<style type="text/css">
-pre > code.sourceCode { white-space: pre; position: relative; }
-pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
-pre > code.sourceCode > span:empty { height: 1.2em; }
-
-code.sourceCode > span { color: inherit; text-decoration: inherit; }
-div.sourceCode { margin: 1em 0; }
-pre.sourceCode { margin: 0; }
-@media screen {
-div.sourceCode { overflow: auto; }
-}
-@media print {
-pre > code.sourceCode { white-space: pre-wrap; }
-pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
-}
-pre.numberSource code
-  { counter-reset: source-line 0; }
-pre.numberSource code > span
-  { position: relative; left: -4em; counter-increment: source-line; }
-pre.numberSource code > span > a:first-child::before
-  { content: counter(source-line);
-    position: relative; left: -1em; text-align: right; vertical-align: baseline;
-    border: none; display: inline-block;
-    -webkit-touch-callout: none; -webkit-user-select: none;
-    -khtml-user-select: none; -moz-user-select: none;
-    -ms-user-select: none; user-select: none;
-    padding: 0 4px; width: 4em;
-    color: #aaaaaa;
-  }
-pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
-div.sourceCode
-  {  background-color: #f8f8f8; }
-@media screen {
-pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
-}
-code span.al { color: #ef2929; } /* Alert */
-code span.an { color: #8f5902; font-weight: bold; font-style: italic; } /* Annotation */
-code span.at { color: #204a87; } /* Attribute */
-code span.bn { color: #0000cf; } /* BaseN */
-code span.cf { color: #204a87; font-weight: bold; } /* ControlFlow */
-code span.ch { color: #4e9a06; } /* Char */
-code span.cn { color: #8f5902; } /* Constant */
-code span.co { color: #8f5902; font-style: italic; } /* Comment */
-code span.cv { color: #8f5902; font-weight: bold; font-style: italic; } /* CommentVar */
-code span.do { color: #8f5902; font-weight: bold; font-style: italic; } /* Documentation */
-code span.dt { color: #204a87; } /* DataType */
-code span.dv { color: #0000cf; } /* DecVal */
-code span.er { color: #a40000; font-weight: bold; } /* Error */
-code span.ex { } /* Extension */
-code span.fl { color: #0000cf; } /* Float */
-code span.fu { color: #204a87; font-weight: bold; } /* Function */
-code span.im { } /* Import */
-code span.in { color: #8f5902; font-weight: bold; font-style: italic; } /* Information */
-code span.kw { color: #204a87; font-weight: bold; } /* Keyword */
-code span.op { color: #ce5c00; font-weight: bold; } /* Operator */
-code span.ot { color: #8f5902; } /* Other */
-code span.pp { color: #8f5902; font-style: italic; } /* Preprocessor */
-code span.sc { color: #ce5c00; font-weight: bold; } /* SpecialChar */
-code span.ss { color: #4e9a06; } /* SpecialString */
-code span.st { color: #4e9a06; } /* String */
-code span.va { color: #000000; } /* Variable */
-code span.vs { color: #4e9a06; } /* VerbatimString */
-code span.wa { color: #8f5902; font-weight: bold; font-style: italic; } /* Warning */
-</style>
-
-
-<style type="text/css">
-/* Used with Pandoc 2.11+ new --citeproc when CSL is used */
-div.csl-bib-body { }
-div.csl-entry {
-  clear: both;
-}
-.hanging div.csl-entry {
-  margin-left:2em;
-  text-indent:-2em;
-}
-div.csl-left-margin {
-  min-width:2em;
-  float:left;
-}
-div.csl-right-inline {
-  margin-left:2em;
-  padding-left:1em;
-}
-div.csl-indent {
-  margin-left: 2em;
-}
-</style>
-
-<link rel="stylesheet" href="style.css" type="text/css" />
-
-</head>
-
-<body>
-
-
-
-<div class="row">
-<div class="col-sm-12">
-<div id="TOC">
-<ul class="navbar">
-<li class="msmb"><p class="title">R for Mass Spectrometry<p><p class="author">Laurent Gatto, Sebastian Gibb, Johannes Rainer</p>
-<li class="dropdown" style="float:right">
-<a href="javascript:void(0)" class="dropbtn">&#x25BE; Chapters</a>
-<div class="dropdown-content">
-<a href="index.html" id="toc-preamble"><span class="toc-section-number">1</span> Preamble</a>
-<a href="sec-msintro.html" id="toc-sec-msintro"><span class="toc-section-number">2</span> Introduction</a>
-<a href="sec-raw.html" id="toc-sec-raw"><span class="toc-section-number">3</span> Raw MS data</a>
-<a id="active-page" href="sec-id.html" id="toc-sec-id"><span class="toc-section-number">4</span> Identification data</a><ul class="toc-sections">
-<li class="toc"><a href="#NA"> Identification data.frame</a></li>
-<li class="toc"><a href="#keeping-all-matches"> Keeping all matches</a></li>
-<li class="toc"><a href="#filtering-data"> Filtering data</a></li>
-<li class="toc"><a href="#adding-identification-data-to-raw-data"> Adding identification data to raw data</a></li>
-<li class="toc"><a href="#an-identification-annotated-chromatogram"> An identification-annotated chromatogram</a></li>
-<li class="toc"><a href="#visualising-peptide-spectrum-matches"> Visualising peptide-spectrum matches</a></li>
-<li class="toc"><a href="#comparing-spectra"> Comparing spectra</a></li>
-<li class="toc"><a href="#summary-exercise"> Summary exercise</a></li>
-<li class="toc"><a href="#exploration-and-assessment-of-identifications-using-msnid"> Exploration and Assessment of Identifications using <code>MSnID</code></a></li>
-</ul>
-<a href="sec-quant.html" id="toc-sec-quant"><span class="toc-section-number">5</span> Quantitative data</a>
-<a href="sec-anx.html" id="toc-sec-anx"><span class="toc-section-number">6</span> Annex</a>
-<a href="sec-si.html" id="toc-sec-si"><span class="toc-section-number">7</span> Additional materials and session information</a>
-</div>
-</li>
-</ul>
-</div>
-</div>
-</div>
-<div class="row">
-<div class="col-sm-12">
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
-<html><body>
-<div id="sec-id" class="section level1" number="4">
-<h1>
-<span class="header-section-number">Chapter 4</span> Identification data</h1>
-<p>Peptide identification is performed using third-party software - there
-is no package to run these searches directly in R. When using line
-search engines it possible to hard-code or automatically generate the
-search command lines and run them from R using a <code>system()</code> call. This
-allows to generate these reproducibly (especially useful if many
-command lines need to be run) and to keep a record in the R script of
-the exact command.</p>
-<p>The example below illustrates this for 3 mzML files to be searched
-using <code>MSGFplus</code>:</p>
-<div class="sourceCode" id="cb88"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb88-1"><a href="sec-id.html#cb88-1" tabindex="-1"></a>(mzmls <span class="ot">&lt;-</span> <span class="fu">paste0</span>(<span class="st">"file_"</span>, <span class="dv">1</span><span class="sc">:</span><span class="dv">3</span>, <span class="st">".mzML"</span>))</span></code></pre></div>
-<pre><code>## [1] "file_1.mzML" "file_2.mzML" "file_3.mzML"</code></pre>
-<div class="sourceCode" id="cb90"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb90-1"><a href="sec-id.html#cb90-1" tabindex="-1"></a>(mzids <span class="ot">&lt;-</span> <span class="fu">sub</span>(<span class="st">"mzML"</span>, <span class="st">"mzid"</span>, mzmls))</span></code></pre></div>
-<pre><code>## [1] "file_1.mzid" "file_2.mzid" "file_3.mzid"</code></pre>
-<div class="sourceCode" id="cb92"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb92-1"><a href="sec-id.html#cb92-1" tabindex="-1"></a><span class="fu">paste0</span>(<span class="st">"java -jar /path/to/MSGFPlus.jar"</span>,</span>
-<span id="cb92-2"><a href="sec-id.html#cb92-2" tabindex="-1"></a>       <span class="st">" -s "</span>, mzmls,</span>
-<span id="cb92-3"><a href="sec-id.html#cb92-3" tabindex="-1"></a>       <span class="st">" -o "</span>, mzids,</span>
-<span id="cb92-4"><a href="sec-id.html#cb92-4" tabindex="-1"></a>       <span class="st">" -d uniprot.fas"</span>,</span>
-<span id="cb92-5"><a href="sec-id.html#cb92-5" tabindex="-1"></a>       <span class="st">" -t 20ppm"</span>,</span>
-<span id="cb92-6"><a href="sec-id.html#cb92-6" tabindex="-1"></a>       <span class="st">" -m 0"</span>,</span>
-<span id="cb92-7"><a href="sec-id.html#cb92-7" tabindex="-1"></a>       <span class="st">" int 1"</span>)</span></code></pre></div>
-<pre><code>## [1] "java -jar /path/to/MSGFPlus.jar -s file_1.mzML -o file_1.mzid -d uniprot.fas -t 20ppm -m 0 int 1"
-## [2] "java -jar /path/to/MSGFPlus.jar -s file_2.mzML -o file_2.mzid -d uniprot.fas -t 20ppm -m 0 int 1"
-## [3] "java -jar /path/to/MSGFPlus.jar -s file_3.mzML -o file_3.mzid -d uniprot.fas -t 20ppm -m 0 int 1"</code></pre>
-<div id="identification-data.frame" class="section level2" number="4.1">
-<h2>
-<span class="header-section-number">4.1</span> Identification data.frame<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('identification-data.frame')" onmouseout="reset_tooltip('identification-data.frame-tooltip')"><span class="tooltiptext" id="identification-data.frame-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>Let’s use the identification from <code>msdata</code>:</p>
-<div class="sourceCode" id="cb94"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb94-1"><a href="sec-id.html#cb94-1" tabindex="-1"></a>idf <span class="ot">&lt;-</span> msdata<span class="sc">::</span><span class="fu">ident</span>(<span class="at">full.names =</span> <span class="cn">TRUE</span>)</span>
-<span id="cb94-2"><a href="sec-id.html#cb94-2" tabindex="-1"></a><span class="fu">basename</span>(idf)</span></code></pre></div>
-<pre><code>## [1] "TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzid"</code></pre>
-<p>The easiest way to read identification data in <code>mzIdentML</code> (often
-abbreviated with <code>mzid</code>) into R is to read it with the <code>readPSMs()</code>
-function from the
-<a href="https://rformassspectrometry.github.io/PSMatch/"><code>PSMatch</code></a>
-package<a href="#fn5" class="footnote-ref" id="fnref5"><sup>5</sup></a>. The function will parse the file and return a
-<code>DataFrame</code>.</p>
-<div class="sourceCode" id="cb96"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb96-1"><a href="sec-id.html#cb96-1" tabindex="-1"></a><span class="fu">library</span>(PSMatch)</span>
-<span id="cb96-2"><a href="sec-id.html#cb96-2" tabindex="-1"></a>id <span class="ot">&lt;-</span> <span class="fu">PSM</span>(idf)</span>
-<span id="cb96-3"><a href="sec-id.html#cb96-3" tabindex="-1"></a><span class="fu">dim</span>(id)</span></code></pre></div>
-<pre><code>## [1] 5802   35</code></pre>
-<div class="sourceCode" id="cb98"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb98-1"><a href="sec-id.html#cb98-1" tabindex="-1"></a><span class="fu">names</span>(id)</span></code></pre></div>
-<pre><code>##  [1] "sequence"                 "spectrumID"              
-##  [3] "chargeState"              "rank"                    
-##  [5] "passThreshold"            "experimentalMassToCharge"
-##  [7] "calculatedMassToCharge"   "peptideRef"              
-##  [9] "modNum"                   "isDecoy"                 
-## [11] "post"                     "pre"                     
-## [13] "start"                    "end"                     
-## [15] "DatabaseAccess"           "DBseqLength"             
-## [17] "DatabaseSeq"              "DatabaseDescription"     
-## [19] "scan.number.s."           "acquisitionNum"          
-## [21] "spectrumFile"             "idFile"                  
-## [23] "MS.GF.RawScore"           "MS.GF.DeNovoScore"       
-## [25] "MS.GF.SpecEValue"         "MS.GF.EValue"            
-## [27] "MS.GF.QValue"             "MS.GF.PepQValue"         
-## [29] "modPeptideRef"            "modName"                 
-## [31] "modMass"                  "modLocation"             
-## [33] "subOriginalResidue"       "subReplacementResidue"   
-## [35] "subLocation"</code></pre>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Verify that this table contains 5802 matches for 5343
-scans and 4938 peptides sequences.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-9" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-9', 'sol-start-9')"></span>
-</p>
-<div id="sol-body-9" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb100"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb100-1"><a href="sec-id.html#cb100-1" tabindex="-1"></a><span class="fu">nrow</span>(id) <span class="do">## number of matches</span></span></code></pre></div>
-<pre><code>## [1] 5802</code></pre>
-<div class="sourceCode" id="cb102"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb102-1"><a href="sec-id.html#cb102-1" tabindex="-1"></a><span class="fu">length</span>(<span class="fu">unique</span>(id<span class="sc">$</span>spectrumID)) <span class="do">## number of scans</span></span></code></pre></div>
-<pre><code>## [1] 5343</code></pre>
-<div class="sourceCode" id="cb104"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb104-1"><a href="sec-id.html#cb104-1" tabindex="-1"></a><span class="fu">length</span>(<span class="fu">unique</span>(id<span class="sc">$</span>sequence))   <span class="do">## number of peptide sequences</span></span></code></pre></div>
-<pre><code>## [1] 4938</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<p>The PSM data are read as is, without any filtering. As we can see
-below, we still have all the hits from the forward and reverse (decoy)
-databases.</p>
-<div class="sourceCode" id="cb106"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb106-1"><a href="sec-id.html#cb106-1" tabindex="-1"></a><span class="fu">table</span>(id<span class="sc">$</span>isDecoy)</span></code></pre></div>
-<pre><code>## 
-## FALSE  TRUE 
-##  2906  2896</code></pre>
-</div>
-<div id="keeping-all-matches" class="section level2" number="4.2">
-<h2>
-<span class="header-section-number">4.2</span> Keeping all matches<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('keeping-all-matches')" onmouseout="reset_tooltip('keeping-all-matches-tooltip')"><span class="tooltiptext" id="keeping-all-matches-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>The data contains also contains multiple matches for several
-spectra. The table below shows the number of number of spectra that
-have 1, 2, … up to 5 matches.</p>
-<div class="sourceCode" id="cb108"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb108-1"><a href="sec-id.html#cb108-1" tabindex="-1"></a><span class="fu">table</span>(<span class="fu">table</span>(id<span class="sc">$</span>spectrumID))</span></code></pre></div>
-<pre><code>## 
-##    1    2    3    4    5 
-## 4936  369   26   10    2</code></pre>
-<p>Below, we can see how scan 1774 has 4 matches, all to sequence
-<code>RTRYQAEVR</code>, which itself matches to 4 different proteins:</p>
-<div class="sourceCode" id="cb110"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb110-1"><a href="sec-id.html#cb110-1" tabindex="-1"></a>i <span class="ot">&lt;-</span> <span class="fu">which</span>(id<span class="sc">$</span>spectrumID <span class="sc">==</span> <span class="st">"controllerType=0 controllerNumber=1 scan=1774"</span>)</span>
-<span id="cb110-2"><a href="sec-id.html#cb110-2" tabindex="-1"></a><span class="fu">data.frame</span>(id[i, ])[<span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>]</span></code></pre></div>
-<pre><code>##    sequence                                    spectrumID chargeState rank
-## 1 RTRYQAEVR controllerType=0 controllerNumber=1 scan=1774           2    1
-## 2 RTRYQAEVR controllerType=0 controllerNumber=1 scan=1774           2    1
-## 3 RTRYQAEVR controllerType=0 controllerNumber=1 scan=1774           2    1
-## 4 RTRYQAEVR controllerType=0 controllerNumber=1 scan=1774           2    1
-##   passThreshold
-## 1          TRUE
-## 2          TRUE
-## 3          TRUE
-## 4          TRUE</code></pre>
-<p>If the goal is to keep all the matches, but arranged by scan/spectrum,
-one can <em>reduce</em> the <code>PSM</code> object by the <code>spectrumID</code> variable, so
-that each scan correponds to a single row that still stores all
-values<a href="#fn6" class="footnote-ref" id="fnref6"><sup>6</sup></a>:</p>
-<div class="sourceCode" id="cb112"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb112-1"><a href="sec-id.html#cb112-1" tabindex="-1"></a>id2 <span class="ot">&lt;-</span> <span class="fu">reducePSMs</span>(id, id<span class="sc">$</span>spectrumID)</span>
-<span id="cb112-2"><a href="sec-id.html#cb112-2" tabindex="-1"></a>id2</span></code></pre></div>
-<pre><code>## Reduced PSM with 5343 rows and 35 columns.
-## names(35): sequence spectrumID ... subReplacementResidue subLocation</code></pre>
-<p>The resulting object contains a single entry for scan 1774 with
-information for the multiple matches stored as lists within the cells.</p>
-<div class="sourceCode" id="cb114"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb114-1"><a href="sec-id.html#cb114-1" tabindex="-1"></a>j <span class="ot">&lt;-</span> <span class="fu">which</span>(id2<span class="sc">$</span>spectrumID <span class="sc">==</span> <span class="st">"controllerType=0 controllerNumber=1 scan=1774"</span>)</span>
-<span id="cb114-2"><a href="sec-id.html#cb114-2" tabindex="-1"></a>id2[j, ]</span></code></pre></div>
-<pre><code>## Reduced PSM with 1 rows and 35 columns.
-## names(35): sequence spectrumID ... subReplacementResidue subLocation</code></pre>
-<div class="sourceCode" id="cb116"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb116-1"><a href="sec-id.html#cb116-1" tabindex="-1"></a>id2[j, <span class="st">"DatabaseAccess"</span>]</span></code></pre></div>
-<pre><code>## CharacterList of length 1
-## [["controllerType=0 controllerNumber=1 scan=1774"]] ECA2104 ECA2867 ECA3427 ECA4142</code></pre>
-<p>The is the type of complete identification table that could be used to
-annotate an raw mass spectrometry <code>Spectra</code> object, as shown below.</p>
-</div>
-<div id="filtering-data" class="section level2" number="4.3">
-<h2>
-<span class="header-section-number">4.3</span> Filtering data<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('filtering-data')" onmouseout="reset_tooltip('filtering-data-tooltip')"><span class="tooltiptext" id="filtering-data-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>Often, the PSM data is filtered to only retain reliable matches. The
-<code>MSnID</code> package can be used to set thresholds to attain user-defined
-PSM, peptide or protein-level FDRs. Here, we will simply filter out
-wrong identification manually.</p>
-<p>Here, the <code>filter()</code> from the <code>dplyr</code> package comes very handy. We
-will thus start by converting the <code>DataFrame</code> to a <code>tibble</code>.</p>
-<div class="sourceCode" id="cb118"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb118-1"><a href="sec-id.html#cb118-1" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"dplyr"</span>)</span>
-<span id="cb118-2"><a href="sec-id.html#cb118-2" tabindex="-1"></a>id_tbl <span class="ot">&lt;-</span> tidyr<span class="sc">::</span><span class="fu">as_tibble</span>(id)</span>
-<span id="cb118-3"><a href="sec-id.html#cb118-3" tabindex="-1"></a>id_tbl</span></code></pre></div>
-<pre><code>## # A tibble: 5,802 × 35
-##    sequence    spectrumID chargeState  rank passThreshold experimentalMassToCh…¹
-##    &lt;chr&gt;       &lt;chr&gt;            &lt;int&gt; &lt;int&gt; &lt;lgl&gt;                          &lt;dbl&gt;
-##  1 RQCRTDFLNY… controlle…           3     1 TRUE                            548.
-##  2 ESVALADQVT… controlle…           2     1 TRUE                           1288.
-##  3 KELLCLAMQI… controlle…           2     1 TRUE                            744.
-##  4 QRMARTSDKQ… controlle…           3     1 TRUE                            913.
-##  5 KDEGSTEPLK… controlle…           3     1 TRUE                            927.
-##  6 DGGPAIYGHE… controlle…           3     1 TRUE                            969.
-##  7 QRMARTSDKQ… controlle…           2     1 TRUE                           1369.
-##  8 CIDRARHVEV… controlle…           3     1 TRUE                           1285.
-##  9 CIDRARHVEV… controlle…           3     1 TRUE                           1285.
-## 10 VGRCRPIINY… controlle…           2     1 TRUE                           1102.
-## # ℹ 5,792 more rows
-## # ℹ abbreviated name: ¹​experimentalMassToCharge
-## # ℹ 29 more variables: calculatedMassToCharge &lt;dbl&gt;, peptideRef &lt;chr&gt;,
-## #   modNum &lt;int&gt;, isDecoy &lt;lgl&gt;, post &lt;chr&gt;, pre &lt;chr&gt;, start &lt;int&gt;, end &lt;int&gt;,
-## #   DatabaseAccess &lt;chr&gt;, DBseqLength &lt;int&gt;, DatabaseSeq &lt;chr&gt;,
-## #   DatabaseDescription &lt;chr&gt;, scan.number.s. &lt;dbl&gt;, acquisitionNum &lt;dbl&gt;,
-## #   spectrumFile &lt;chr&gt;, idFile &lt;chr&gt;, MS.GF.RawScore &lt;dbl&gt;, …</code></pre>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ul>
-<li>Remove decoy hits</li>
-</ul>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-10" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-10', 'sol-start-10')"></span>
-</p>
-<div id="sol-body-10" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb120"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb120-1"><a href="sec-id.html#cb120-1" tabindex="-1"></a>id_tbl <span class="ot">&lt;-</span> id_tbl <span class="sc">%&gt;%</span></span>
-<span id="cb120-2"><a href="sec-id.html#cb120-2" tabindex="-1"></a>    <span class="fu">filter</span>(<span class="sc">!</span>isDecoy)</span>
-<span id="cb120-3"><a href="sec-id.html#cb120-3" tabindex="-1"></a>id_tbl</span></code></pre></div>
-<pre><code>## # A tibble: 2,906 × 35
-##    sequence    spectrumID chargeState  rank passThreshold experimentalMassToCh…¹
-##    &lt;chr&gt;       &lt;chr&gt;            &lt;int&gt; &lt;int&gt; &lt;lgl&gt;                          &lt;dbl&gt;
-##  1 RQCRTDFLNY… controlle…           3     1 TRUE                            548.
-##  2 ESVALADQVT… controlle…           2     1 TRUE                           1288.
-##  3 QRMARTSDKQ… controlle…           3     1 TRUE                            913.
-##  4 DGGPAIYGHE… controlle…           3     1 TRUE                            969.
-##  5 QRMARTSDKQ… controlle…           2     1 TRUE                           1369.
-##  6 CIDRARHVEV… controlle…           3     1 TRUE                           1285.
-##  7 CIDRARHVEV… controlle…           3     1 TRUE                           1285.
-##  8 VGRCRPIINY… controlle…           2     1 TRUE                           1102.
-##  9 QRLDEHCVGV… controlle…           3     1 TRUE                            713.
-## 10 VDYQGKKVVI… controlle…           4     1 TRUE                            870.
-## # ℹ 2,896 more rows
-## # ℹ abbreviated name: ¹​experimentalMassToCharge
-## # ℹ 29 more variables: calculatedMassToCharge &lt;dbl&gt;, peptideRef &lt;chr&gt;,
-## #   modNum &lt;int&gt;, isDecoy &lt;lgl&gt;, post &lt;chr&gt;, pre &lt;chr&gt;, start &lt;int&gt;, end &lt;int&gt;,
-## #   DatabaseAccess &lt;chr&gt;, DBseqLength &lt;int&gt;, DatabaseSeq &lt;chr&gt;,
-## #   DatabaseDescription &lt;chr&gt;, scan.number.s. &lt;dbl&gt;, acquisitionNum &lt;dbl&gt;,
-## #   spectrumFile &lt;chr&gt;, idFile &lt;chr&gt;, MS.GF.RawScore &lt;dbl&gt;, …</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ul>
-<li>Keep first rank matches</li>
-</ul>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-11" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-11', 'sol-start-11')"></span>
-</p>
-<div id="sol-body-11" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb122"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb122-1"><a href="sec-id.html#cb122-1" tabindex="-1"></a>id_tbl <span class="ot">&lt;-</span> id_tbl <span class="sc">%&gt;%</span></span>
-<span id="cb122-2"><a href="sec-id.html#cb122-2" tabindex="-1"></a>    <span class="fu">filter</span>(rank <span class="sc">==</span> <span class="dv">1</span>)</span>
-<span id="cb122-3"><a href="sec-id.html#cb122-3" tabindex="-1"></a>id_tbl</span></code></pre></div>
-<pre><code>## # A tibble: 2,751 × 35
-##    sequence    spectrumID chargeState  rank passThreshold experimentalMassToCh…¹
-##    &lt;chr&gt;       &lt;chr&gt;            &lt;int&gt; &lt;int&gt; &lt;lgl&gt;                          &lt;dbl&gt;
-##  1 RQCRTDFLNY… controlle…           3     1 TRUE                            548.
-##  2 ESVALADQVT… controlle…           2     1 TRUE                           1288.
-##  3 QRMARTSDKQ… controlle…           3     1 TRUE                            913.
-##  4 DGGPAIYGHE… controlle…           3     1 TRUE                            969.
-##  5 QRMARTSDKQ… controlle…           2     1 TRUE                           1369.
-##  6 CIDRARHVEV… controlle…           3     1 TRUE                           1285.
-##  7 CIDRARHVEV… controlle…           3     1 TRUE                           1285.
-##  8 VGRCRPIINY… controlle…           2     1 TRUE                           1102.
-##  9 QRLDEHCVGV… controlle…           3     1 TRUE                            713.
-## 10 VDYQGKKVVI… controlle…           4     1 TRUE                            870.
-## # ℹ 2,741 more rows
-## # ℹ abbreviated name: ¹​experimentalMassToCharge
-## # ℹ 29 more variables: calculatedMassToCharge &lt;dbl&gt;, peptideRef &lt;chr&gt;,
-## #   modNum &lt;int&gt;, isDecoy &lt;lgl&gt;, post &lt;chr&gt;, pre &lt;chr&gt;, start &lt;int&gt;, end &lt;int&gt;,
-## #   DatabaseAccess &lt;chr&gt;, DBseqLength &lt;int&gt;, DatabaseSeq &lt;chr&gt;,
-## #   DatabaseDescription &lt;chr&gt;, scan.number.s. &lt;dbl&gt;, acquisitionNum &lt;dbl&gt;,
-## #   spectrumFile &lt;chr&gt;, idFile &lt;chr&gt;, MS.GF.RawScore &lt;dbl&gt;, …</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ul>
-<li>Remove shared peptides. Start by identifying scans that match
-different proteins. For example scan 4884 matches proteins
-<code>XXX_ECA3406</code> and <code>ECA3415</code>. Scan 4099 match <code>XXX_ECA4416_1</code>,
-<code>XXX_ECA4416_2</code> and <code>XXX_ECA4416_3</code>. Then remove the scans that
-match any of these proteins.</li>
-</ul>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-12" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-12', 'sol-start-12')"></span>
-</p>
-<div id="sol-body-12" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb124"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb124-1"><a href="sec-id.html#cb124-1" tabindex="-1"></a>mltm <span class="ot">&lt;-</span></span>
-<span id="cb124-2"><a href="sec-id.html#cb124-2" tabindex="-1"></a>    id_tbl <span class="sc">%&gt;%</span></span>
-<span id="cb124-3"><a href="sec-id.html#cb124-3" tabindex="-1"></a>    <span class="fu">group_by</span>(spectrumID) <span class="sc">%&gt;%</span></span>
-<span id="cb124-4"><a href="sec-id.html#cb124-4" tabindex="-1"></a>    <span class="fu">mutate</span>(<span class="at">nProts =</span> <span class="fu">length</span>(<span class="fu">unique</span>(DatabaseAccess))) <span class="sc">%&gt;%</span></span>
-<span id="cb124-5"><a href="sec-id.html#cb124-5" tabindex="-1"></a>    <span class="fu">filter</span>(nProts <span class="sc">&gt;</span> <span class="dv">1</span>) <span class="sc">%&gt;%</span></span>
-<span id="cb124-6"><a href="sec-id.html#cb124-6" tabindex="-1"></a>    <span class="fu">select</span>(spectrumID, nProts)</span>
-<span id="cb124-7"><a href="sec-id.html#cb124-7" tabindex="-1"></a>mltm</span></code></pre></div>
-<pre><code>## # A tibble: 85 × 2
-## # Groups:   spectrumID [39]
-##    spectrumID                                    nProts
-##    &lt;chr&gt;                                          &lt;int&gt;
-##  1 controllerType=0 controllerNumber=1 scan=1073      2
-##  2 controllerType=0 controllerNumber=1 scan=1073      2
-##  3 controllerType=0 controllerNumber=1 scan=6578      2
-##  4 controllerType=0 controllerNumber=1 scan=6578      2
-##  5 controllerType=0 controllerNumber=1 scan=5617      2
-##  6 controllerType=0 controllerNumber=1 scan=5617      2
-##  7 controllerType=0 controllerNumber=1 scan=3926      2
-##  8 controllerType=0 controllerNumber=1 scan=3926      2
-##  9 controllerType=0 controllerNumber=1 scan=4784      2
-## 10 controllerType=0 controllerNumber=1 scan=4784      2
-## # ℹ 75 more rows</code></pre>
-<div class="sourceCode" id="cb126"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb126-1"><a href="sec-id.html#cb126-1" tabindex="-1"></a>id_tbl <span class="ot">&lt;-</span></span>
-<span id="cb126-2"><a href="sec-id.html#cb126-2" tabindex="-1"></a>    id_tbl <span class="sc">%&gt;%</span></span>
-<span id="cb126-3"><a href="sec-id.html#cb126-3" tabindex="-1"></a>    <span class="fu">filter</span>(<span class="sc">!</span>spectrumID <span class="sc">%in%</span> mltm<span class="sc">$</span>spectrumID)</span>
-<span id="cb126-4"><a href="sec-id.html#cb126-4" tabindex="-1"></a>id_tbl</span></code></pre></div>
-<pre><code>## # A tibble: 2,666 × 35
-##    sequence    spectrumID chargeState  rank passThreshold experimentalMassToCh…¹
-##    &lt;chr&gt;       &lt;chr&gt;            &lt;int&gt; &lt;int&gt; &lt;lgl&gt;                          &lt;dbl&gt;
-##  1 RQCRTDFLNY… controlle…           3     1 TRUE                            548.
-##  2 ESVALADQVT… controlle…           2     1 TRUE                           1288.
-##  3 QRMARTSDKQ… controlle…           3     1 TRUE                            913.
-##  4 DGGPAIYGHE… controlle…           3     1 TRUE                            969.
-##  5 QRMARTSDKQ… controlle…           2     1 TRUE                           1369.
-##  6 CIDRARHVEV… controlle…           3     1 TRUE                           1285.
-##  7 CIDRARHVEV… controlle…           3     1 TRUE                           1285.
-##  8 VGRCRPIINY… controlle…           2     1 TRUE                           1102.
-##  9 QRLDEHCVGV… controlle…           3     1 TRUE                            713.
-## 10 VDYQGKKVVI… controlle…           4     1 TRUE                            870.
-## # ℹ 2,656 more rows
-## # ℹ abbreviated name: ¹​experimentalMassToCharge
-## # ℹ 29 more variables: calculatedMassToCharge &lt;dbl&gt;, peptideRef &lt;chr&gt;,
-## #   modNum &lt;int&gt;, isDecoy &lt;lgl&gt;, post &lt;chr&gt;, pre &lt;chr&gt;, start &lt;int&gt;, end &lt;int&gt;,
-## #   DatabaseAccess &lt;chr&gt;, DBseqLength &lt;int&gt;, DatabaseSeq &lt;chr&gt;,
-## #   DatabaseDescription &lt;chr&gt;, scan.number.s. &lt;dbl&gt;, acquisitionNum &lt;dbl&gt;,
-## #   spectrumFile &lt;chr&gt;, idFile &lt;chr&gt;, MS.GF.RawScore &lt;dbl&gt;, …</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<p>Which leaves us with 2666 PSMs.</p>
-<p>This can also be achieved with the <code>filterPSMs()</code> function, or the
-individual <code>filterPsmRank()</code>, <code>filterPsmDecoy</code> and <code>filterPsmShared()</code>
-functions:</p>
-<div class="sourceCode" id="cb128"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb128-1"><a href="sec-id.html#cb128-1" tabindex="-1"></a>id_filtered <span class="ot">&lt;-</span> <span class="fu">filterPSMs</span>(id)</span></code></pre></div>
-<pre><code>## Starting with 5802 PSMs:</code></pre>
-<pre><code>## Removed 2896 decoy hits.</code></pre>
-<pre><code>## Removed 155 PSMs with rank &gt; 1.</code></pre>
-<pre><code>## Removed 85 shared peptides.</code></pre>
-<pre><code>## 2666 PSMs left.</code></pre>
-<p>The <code>describePeptides()</code> and <code>describeProteins()</code> functions from the
-<code>PSMatch</code> package provide useful summaries of preptides and proteins
-in a PSM search result.</p>
-<ul>
-<li>
-<code>describePeptides()</code> gives the number of unique and shared peptides
-and for the latter, the size of their protein groups:</li>
-</ul>
-<div class="sourceCode" id="cb134"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb134-1"><a href="sec-id.html#cb134-1" tabindex="-1"></a><span class="fu">describePeptides</span>(id_filtered)</span></code></pre></div>
-<pre><code>## 2324 peptides composed of</code></pre>
-<pre><code>##  unique peptides: 2324</code></pre>
-<pre><code>##  shared peptides (among protein):</code></pre>
-<pre><code>##   ()</code></pre>
-<ul>
-<li>
-<code>describeProteins()</code> gives the number of proteins defined by only
-unique, only shared, or a mixture of unique/shared peptides:</li>
-</ul>
-<div class="sourceCode" id="cb139"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb139-1"><a href="sec-id.html#cb139-1" tabindex="-1"></a><span class="fu">describeProteins</span>(id_filtered)</span></code></pre></div>
-<pre><code>## 1466 proteins composed of</code></pre>
-<pre><code>##  only unique peptides: 1466</code></pre>
-<pre><code>##  only shared peptides: 0</code></pre>
-<pre><code>##  unique and shared peptides: 0</code></pre>
-<p>The <a href="https://rformassspectrometry.github.io/PSMatch/articles/AdjacencyMatrix.html">Understanding protein groups with adjacency
-matrices</a>
-<code>PSMatch</code> vignette provides additional tools to explore how proteins
-were inferred from peptides.</p>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Compare the distribution of raw identification scores of the decoy and
-non-decoy hits. Interpret the figure.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-13" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-13', 'sol-start-13')"></span>
-</p>
-<div id="sol-body-13" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb144"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb144-1"><a href="sec-id.html#cb144-1" tabindex="-1"></a><span class="fu">library</span>(ggplot2)</span>
-<span id="cb144-2"><a href="sec-id.html#cb144-2" tabindex="-1"></a><span class="fu">as_tibble</span>(id) <span class="sc">%&gt;%</span></span>
-<span id="cb144-3"><a href="sec-id.html#cb144-3" tabindex="-1"></a>    <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> MS.GF.RawScore,</span>
-<span id="cb144-4"><a href="sec-id.html#cb144-4" tabindex="-1"></a>               <span class="at">colour =</span> isDecoy)) <span class="sc">+</span></span>
-<span id="cb144-5"><a href="sec-id.html#cb144-5" tabindex="-1"></a>    <span class="fu">geom_density</span>()</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-42-1.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>The <em><a href="https://CRAN.R-project.org/package=tidyverse">tidyverse</a></em>
-tools are fit for data wrangling with identification data. Using the
-above identification dataframe, calculate the length of each peptide
-(you can use <code>nchar</code> with the peptide sequence <code>sequence</code>) and the
-number of peptides for each protein (defined as
-<code>DatabaseDescription</code>). Plot the length of the proteins against their
-respective number of peptides.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-14" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-14', 'sol-start-14')"></span>
-</p>
-<div id="sol-body-14" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb145"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb145-1"><a href="sec-id.html#cb145-1" tabindex="-1"></a><span class="fu">suppressPackageStartupMessages</span>(<span class="fu">library</span>(<span class="st">"dplyr"</span>))</span>
-<span id="cb145-2"><a href="sec-id.html#cb145-2" tabindex="-1"></a>iddf <span class="ot">&lt;-</span> <span class="fu">as_tibble</span>(id_filtered) <span class="sc">%&gt;%</span></span>
-<span id="cb145-3"><a href="sec-id.html#cb145-3" tabindex="-1"></a>    <span class="fu">mutate</span>(<span class="at">peplen =</span> <span class="fu">nchar</span>(sequence))</span>
-<span id="cb145-4"><a href="sec-id.html#cb145-4" tabindex="-1"></a>npeps <span class="ot">&lt;-</span> iddf <span class="sc">%&gt;%</span></span>
-<span id="cb145-5"><a href="sec-id.html#cb145-5" tabindex="-1"></a>    <span class="fu">group_by</span>(DatabaseAccess) <span class="sc">%&gt;%</span></span>
-<span id="cb145-6"><a href="sec-id.html#cb145-6" tabindex="-1"></a>    tally</span>
-<span id="cb145-7"><a href="sec-id.html#cb145-7" tabindex="-1"></a>iddf <span class="ot">&lt;-</span> <span class="fu">full_join</span>(iddf, npeps)</span></code></pre></div>
-<pre><code>## Joining with `by = join_by(DatabaseAccess)`</code></pre>
-<div class="sourceCode" id="cb147"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb147-1"><a href="sec-id.html#cb147-1" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"ggplot2"</span>)</span>
-<span id="cb147-2"><a href="sec-id.html#cb147-2" tabindex="-1"></a><span class="fu">ggplot</span>(iddf, <span class="fu">aes</span>(<span class="at">x =</span> n, <span class="at">y =</span> DBseqLength)) <span class="sc">+</span> <span class="fu">geom_point</span>()</span></code></pre></div>
-<div class="figure">
-<span style="display:block;" id="fig:answid1"></span>
-<p class="caption marginnote shownote">
-Figure 4.1: Identifcation data wrangling.
-</p>
-<img src="R4MS_files/figure-html/answid1-1.png" alt="Identifcation data wrangling." width="672">
-</div>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<p>If you would like to learn more about how the mzid data are handled by
-<code>PSMatch</code> via the <em><a href="https://bioconductor.org/packages/3.17/mzR">mzR</a></em> and <em><a href="https://bioconductor.org/packages/3.17/mzID">mzID</a></em>
-packages, check out the <a href="sec-anx.html#sec-id2">6.2</a> section in the annex.</p>
-</div>
-<div id="adding-identification-data-to-raw-data" class="section level2" number="4.4">
-<h2>
-<span class="header-section-number">4.4</span> Adding identification data to raw data<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('adding-identification-data-to-raw-data')" onmouseout="reset_tooltip('adding-identification-data-to-raw-data-tooltip')"><span class="tooltiptext" id="adding-identification-data-to-raw-data-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>We are goind to use the <code>sp</code> object created in the previous chapter
-and the <code>id_filtered</code> variable generated above.</p>
-<p>Identification data (as a <code>DataFrame</code>) can be merged into raw data (as
-a <code>Spectra</code> object) by adding new spectra variables to the appropriate
-MS2 spectra. Scans and peptide-spectrum matches can be matched by
-their spectrum identifers.</p>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Identify the spectum identifier columns in the <code>sp</code> the <code>id_filtered</code>
-variables.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-15" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-15', 'sol-start-15')"></span>
-</p>
-<div id="sol-body-15" class="solution-body" style="display: none;">
-<p>In the raw data, it is encoded as <code>spectrumId</code>, while in the
-identification data, we have <code>spectrumID</code>.</p>
-<div class="sourceCode" id="cb148"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb148-1"><a href="sec-id.html#cb148-1" tabindex="-1"></a><span class="fu">spectraVariables</span>(sp)</span></code></pre></div>
-<pre><code>##  [1] "msLevel"                  "rtime"                   
-##  [3] "acquisitionNum"           "scanIndex"               
-##  [5] "dataStorage"              "dataOrigin"              
-##  [7] "centroided"               "smoothed"                
-##  [9] "polarity"                 "precScanNum"             
-## [11] "precursorMz"              "precursorIntensity"      
-## [13] "precursorCharge"          "collisionEnergy"         
-## [15] "isolationWindowLowerMz"   "isolationWindowTargetMz" 
-## [17] "isolationWindowUpperMz"   "peaksCount"              
-## [19] "totIonCurrent"            "basePeakMZ"              
-## [21] "basePeakIntensity"        "ionisationEnergy"        
-## [23] "lowMZ"                    "highMZ"                  
-## [25] "mergedScan"               "mergedResultScanNum"     
-## [27] "mergedResultStartScanNum" "mergedResultEndScanNum"  
-## [29] "injectionTime"            "filterString"            
-## [31] "spectrumId"               "ionMobilityDriftTime"    
-## [33] "scanWindowLowerLimit"     "scanWindowUpperLimit"    
-## [35] "rtime_minute"</code></pre>
-<div class="sourceCode" id="cb150"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb150-1"><a href="sec-id.html#cb150-1" tabindex="-1"></a><span class="fu">names</span>(id_filtered)</span></code></pre></div>
-<pre><code>##  [1] "sequence"                 "spectrumID"              
-##  [3] "chargeState"              "rank"                    
-##  [5] "passThreshold"            "experimentalMassToCharge"
-##  [7] "calculatedMassToCharge"   "peptideRef"              
-##  [9] "modNum"                   "isDecoy"                 
-## [11] "post"                     "pre"                     
-## [13] "start"                    "end"                     
-## [15] "DatabaseAccess"           "DBseqLength"             
-## [17] "DatabaseSeq"              "DatabaseDescription"     
-## [19] "scan.number.s."           "acquisitionNum"          
-## [21] "spectrumFile"             "idFile"                  
-## [23] "MS.GF.RawScore"           "MS.GF.DeNovoScore"       
-## [25] "MS.GF.SpecEValue"         "MS.GF.EValue"            
-## [27] "MS.GF.QValue"             "MS.GF.PepQValue"         
-## [29] "modPeptideRef"            "modName"                 
-## [31] "modMass"                  "modLocation"             
-## [33] "subOriginalResidue"       "subReplacementResidue"   
-## [35] "subLocation"</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<p>We still have several PTMs that are matched to a single spectrum
-identifier:</p>
-<div class="sourceCode" id="cb152"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb152-1"><a href="sec-id.html#cb152-1" tabindex="-1"></a><span class="fu">table</span>(<span class="fu">table</span>(id_filtered<span class="sc">$</span>spectrumID))</span></code></pre></div>
-<pre><code>## 
-##    1    2    3    4 
-## 2630   13    2    1</code></pre>
-<p>Let’s look at <code>"controllerType=0 controllerNumber=1 scan=5490"</code>, the
-has 4 matching PSMs in detail.</p>
-<div class="sourceCode" id="cb154"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb154-1"><a href="sec-id.html#cb154-1" tabindex="-1"></a><span class="fu">which</span>(<span class="fu">table</span>(id_filtered<span class="sc">$</span>spectrumID) <span class="sc">==</span> <span class="dv">4</span>)</span></code></pre></div>
-<pre><code>## controllerType=0 controllerNumber=1 scan=5490 
-##                                          1903</code></pre>
-<div class="sourceCode" id="cb156"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb156-1"><a href="sec-id.html#cb156-1" tabindex="-1"></a>id_4 <span class="ot">&lt;-</span> id_filtered[id_filtered<span class="sc">$</span>spectrumID <span class="sc">==</span> <span class="st">"controllerType=0 controllerNumber=1 scan=5490"</span>, ] <span class="sc">%&gt;%</span></span>
-<span id="cb156-2"><a href="sec-id.html#cb156-2" tabindex="-1"></a>    <span class="fu">as.data.frame</span>()</span>
-<span id="cb156-3"><a href="sec-id.html#cb156-3" tabindex="-1"></a>id_4</span></code></pre></div>
-<pre><code>##           sequence                                    spectrumID chargeState
-## 1 KCNQCLKVACTLFYCK controllerType=0 controllerNumber=1 scan=5490           3
-## 2 KCNQCLKVACTLFYCK controllerType=0 controllerNumber=1 scan=5490           3
-##   rank passThreshold experimentalMassToCharge calculatedMassToCharge peptideRef
-## 1    1          TRUE                 698.6633               698.3315     Pep453
-## 2    1          TRUE                 698.6633               698.3315     Pep453
-##   modNum isDecoy post pre start end DatabaseAccess DBseqLength DatabaseSeq
-## 1      4   FALSE    C   K   127 142        ECA0668         302            
-## 2      4   FALSE    C   K   127 142        ECA0668         302            
-##            DatabaseDescription scan.number.s. acquisitionNum
-## 1 ECA0668 hypothetical protein           5490           5490
-## 2 ECA0668 hypothetical protein           5490           5490
-##                                                          spectrumFile
-## 1 TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML
-## 2 TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML
-##                                                                idFile
-## 1 TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzid
-## 2 TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzid
-##   MS.GF.RawScore MS.GF.DeNovoScore MS.GF.SpecEValue MS.GF.EValue MS.GF.QValue
-## 1            -22                79     4.555588e-07     1.307689    0.9006211
-## 2            -22                79     4.555588e-07     1.307689    0.9006211
-##   MS.GF.PepQValue modPeptideRef         modName  modMass modLocation
-## 1       0.8901099        Pep453 Carbamidomethyl 57.02146           2
-## 2       0.8901099        Pep453 Carbamidomethyl 57.02146           5
-##   subOriginalResidue subReplacementResidue subLocation
-## 1               &lt;NA&gt;                  &lt;NA&gt;          NA
-## 2               &lt;NA&gt;                  &lt;NA&gt;          NA
-##  [ reached 'max' / getOption("max.print") -- omitted 2 rows ]</code></pre>
-<p>We can see that these 4 PSMs differ by the location of the
-Carbamidomethyl modification.</p>
-<div class="sourceCode" id="cb158"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb158-1"><a href="sec-id.html#cb158-1" tabindex="-1"></a>id_4[, <span class="fu">c</span>(<span class="st">"modName"</span>, <span class="st">"modLocation"</span>)]</span></code></pre></div>
-<pre><code>##           modName modLocation
-## 1 Carbamidomethyl           2
-## 2 Carbamidomethyl           5
-## 3 Carbamidomethyl          10
-## 4 Carbamidomethyl          15</code></pre>
-<p>Let’s reduce that PSM table before joining it to the <code>Spectra</code> object,
-to make sure we have unique one-to-one matches between the raw spectra
-and the PSMs.</p>
-<div class="sourceCode" id="cb160"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb160-1"><a href="sec-id.html#cb160-1" tabindex="-1"></a>id_filtered <span class="ot">&lt;-</span> <span class="fu">reducePSMs</span>(id_filtered, id_filtered<span class="sc">$</span>spectrumID)</span>
-<span id="cb160-2"><a href="sec-id.html#cb160-2" tabindex="-1"></a>id_filtered</span></code></pre></div>
-<pre><code>## Reduced PSM with 2646 rows and 35 columns.
-## names(35): sequence spectrumID ... subReplacementResidue subLocation</code></pre>
-<p>These two data can thus simply be joined using:</p>
-<div class="sourceCode" id="cb162"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb162-1"><a href="sec-id.html#cb162-1" tabindex="-1"></a>sp <span class="ot">&lt;-</span> <span class="fu">joinSpectraData</span>(sp, id_filtered,</span>
-<span id="cb162-2"><a href="sec-id.html#cb162-2" tabindex="-1"></a>                      <span class="at">by.x =</span> <span class="st">"spectrumId"</span>,</span>
-<span id="cb162-3"><a href="sec-id.html#cb162-3" tabindex="-1"></a>                      <span class="at">by.y =</span> <span class="st">"spectrumID"</span>)</span>
-<span id="cb162-4"><a href="sec-id.html#cb162-4" tabindex="-1"></a><span class="fu">spectraVariables</span>(sp)</span></code></pre></div>
-<pre><code>##  [1] "msLevel"                  "rtime"                   
-##  [3] "acquisitionNum"           "scanIndex"               
-##  [5] "dataStorage"              "dataOrigin"              
-##  [7] "centroided"               "smoothed"                
-##  [9] "polarity"                 "precScanNum"             
-## [11] "precursorMz"              "precursorIntensity"      
-## [13] "precursorCharge"          "collisionEnergy"         
-## [15] "isolationWindowLowerMz"   "isolationWindowTargetMz" 
-## [17] "isolationWindowUpperMz"   "peaksCount"              
-## [19] "totIonCurrent"            "basePeakMZ"              
-## [21] "basePeakIntensity"        "ionisationEnergy"        
-## [23] "lowMZ"                    "highMZ"                  
-## [25] "mergedScan"               "mergedResultScanNum"     
-## [27] "mergedResultStartScanNum" "mergedResultEndScanNum"  
-## [29] "injectionTime"            "filterString"            
-## [31] "spectrumId"               "ionMobilityDriftTime"    
-## [33] "scanWindowLowerLimit"     "scanWindowUpperLimit"    
-## [35] "rtime_minute"             "sequence"                
-## [37] "chargeState"              "rank"                    
-## [39] "passThreshold"            "experimentalMassToCharge"
-## [41] "calculatedMassToCharge"   "peptideRef"              
-## [43] "modNum"                   "isDecoy"                 
-## [45] "post"                     "pre"                     
-## [47] "start"                    "end"                     
-## [49] "DatabaseAccess"           "DBseqLength"             
-## [51] "DatabaseSeq"              "DatabaseDescription"     
-## [53] "scan.number.s."           "acquisitionNum.y"        
-## [55] "spectrumFile"             "idFile"                  
-## [57] "MS.GF.RawScore"           "MS.GF.DeNovoScore"       
-## [59] "MS.GF.SpecEValue"         "MS.GF.EValue"            
-## [61] "MS.GF.QValue"             "MS.GF.PepQValue"         
-## [63] "modPeptideRef"            "modName"                 
-## [65] "modMass"                  "modLocation"             
-## [67] "subOriginalResidue"       "subReplacementResidue"   
-## [69] "subLocation"</code></pre>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Verify that the identification data has been added to the correct
-spectra.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-16" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-16', 'sol-start-16')"></span>
-</p>
-<div id="sol-body-16" class="solution-body" style="display: none;">
-<p>Let’s first verify that no identification data has been added to the
-MS1 scans.</p>
-<div class="sourceCode" id="cb164"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb164-1"><a href="sec-id.html#cb164-1" tabindex="-1"></a><span class="fu">all</span>(<span class="fu">is.na</span>(<span class="fu">filterMsLevel</span>(sp, <span class="dv">1</span>)<span class="sc">$</span>sequence))</span></code></pre></div>
-<pre><code>## [1] TRUE</code></pre>
-<p>They have indeed been added to 56% of the MS2 spectra.</p>
-<div class="sourceCode" id="cb166"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb166-1"><a href="sec-id.html#cb166-1" tabindex="-1"></a>sp_2 <span class="ot">&lt;-</span> <span class="fu">filterMsLevel</span>(sp, <span class="dv">2</span>)</span>
-<span id="cb166-2"><a href="sec-id.html#cb166-2" tabindex="-1"></a><span class="fu">table</span>(<span class="fu">is.na</span>(sp_2<span class="sc">$</span>sequence))</span></code></pre></div>
-<pre><code>## 
-## FALSE  TRUE 
-##  2646  3457</code></pre>
-<p>Let’s compare the precursor/peptide mass to charges</p>
-<div class="sourceCode" id="cb168"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb168-1"><a href="sec-id.html#cb168-1" tabindex="-1"></a>sp_2 <span class="ot">&lt;-</span> sp_2[<span class="sc">!</span><span class="fu">is.na</span>(sp_2<span class="sc">$</span>sequence)]</span>
-<span id="cb168-2"><a href="sec-id.html#cb168-2" tabindex="-1"></a><span class="fu">summary</span>(sp_2<span class="sc">$</span>precursorMz <span class="sc">-</span> sp_2<span class="sc">$</span>experimentalMassToCharge)</span></code></pre></div>
-<pre><code>##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-##  0.0000  0.0000  0.0000  0.0053  0.0000  2.0297</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-</div>
-<div id="an-identification-annotated-chromatogram" class="section level2" number="4.5">
-<h2>
-<span class="header-section-number">4.5</span> An identification-annotated chromatogram<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('an-identification-annotated-chromatogram')" onmouseout="reset_tooltip('an-identification-annotated-chromatogram-tooltip')"><span class="tooltiptext" id="an-identification-annotated-chromatogram-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>Now that we have combined raw data and their associated
-peptide-spectrum matches, we can produce an improved total ion
-chromatogram, identifying MS1 scans that lead to successful
-identifications.</p>
-<p>The <code>countIdentifications()</code> function is going to tally the number of
-identifications (i.e non-missing characters in the <code>sequence</code> spectra
-variable) for each scan. In the case of MS2 scans, these will be
-either 1 or 0, depending on the presence of a sequence. For MS1 scans,
-the function will count the number of sequences for the descendant MS2
-scans, i.e. those produced from precursor ions from each MS1 scan.</p>
-<div class="sourceCode" id="cb170"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb170-1"><a href="sec-id.html#cb170-1" tabindex="-1"></a>sp <span class="ot">&lt;-</span> <span class="fu">countIdentifications</span>(sp)</span></code></pre></div>
-<p>Below, we see on the second line that 3457 MS2 scans lead to no PSM,
-while 2546 lead to an identification. Among all MS1 scans, 833 lead to
-no MS2 scans with PSMs. 30 MS1 scans generated one MS2 scan that lead
-to a PSM, 45 lead to two PSMs, …</p>
-<div class="sourceCode" id="cb171"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb171-1"><a href="sec-id.html#cb171-1" tabindex="-1"></a><span class="fu">table</span>(<span class="fu">msLevel</span>(sp), sp<span class="sc">$</span>countIdentifications)</span></code></pre></div>
-<pre><code>##    
-##        0    1    2    3    4    5    6    7    8    9   10
-##   1  833   30   45   97  139  132   92   42   17    3    1
-##   2 3457 2646    0    0    0    0    0    0    0    0    0</code></pre>
-<p>These data can also be visualised on the total ion chromatogram:</p>
-<div class="sourceCode" id="cb173"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb173-1"><a href="sec-id.html#cb173-1" tabindex="-1"></a>sp <span class="sc">|&gt;</span></span>
-<span id="cb173-2"><a href="sec-id.html#cb173-2" tabindex="-1"></a><span class="fu">filterMsLevel</span>(<span class="dv">1</span>) <span class="sc">|&gt;</span></span>
-<span id="cb173-3"><a href="sec-id.html#cb173-3" tabindex="-1"></a><span class="fu">spectraData</span>() <span class="sc">|&gt;</span></span>
-<span id="cb173-4"><a href="sec-id.html#cb173-4" tabindex="-1"></a><span class="fu">as_tibble</span>() <span class="sc">|&gt;</span></span>
-<span id="cb173-5"><a href="sec-id.html#cb173-5" tabindex="-1"></a><span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> rtime,</span>
-<span id="cb173-6"><a href="sec-id.html#cb173-6" tabindex="-1"></a>           <span class="at">y =</span> totIonCurrent)) <span class="sc">+</span></span>
-<span id="cb173-7"><a href="sec-id.html#cb173-7" tabindex="-1"></a>    <span class="fu">geom_line</span>(<span class="at">alpha =</span> <span class="fl">0.25</span>) <span class="sc">+</span></span>
-<span id="cb173-8"><a href="sec-id.html#cb173-8" tabindex="-1"></a>    <span class="fu">geom_point</span>(<span class="fu">aes</span>(<span class="at">colour =</span> <span class="fu">ifelse</span>(countIdentifications <span class="sc">==</span> <span class="dv">0</span>,</span>
-<span id="cb173-9"><a href="sec-id.html#cb173-9" tabindex="-1"></a>                                   <span class="cn">NA</span>, countIdentifications)),</span>
-<span id="cb173-10"><a href="sec-id.html#cb173-10" tabindex="-1"></a>               <span class="at">size =</span> <span class="fl">0.75</span>,</span>
-<span id="cb173-11"><a href="sec-id.html#cb173-11" tabindex="-1"></a>               <span class="at">alpha =</span> <span class="fl">0.5</span>) <span class="sc">+</span></span>
-<span id="cb173-12"><a href="sec-id.html#cb173-12" tabindex="-1"></a>    <span class="fu">labs</span>(<span class="at">colour =</span> <span class="st">"Number of ids"</span>)</span></code></pre></div>
-<div class="figure fullwidth">
-<img src="R4MS_files/figure-html/nSequencePlot-1.png" alt=" " width="768"><p class="caption marginnote shownote">
-</p>
-</div>
-</div>
-<div id="visualising-peptide-spectrum-matches" class="section level2" number="4.6">
-<h2>
-<span class="header-section-number">4.6</span> Visualising peptide-spectrum matches<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('visualising-peptide-spectrum-matches')" onmouseout="reset_tooltip('visualising-peptide-spectrum-matches-tooltip')"><span class="tooltiptext" id="visualising-peptide-spectrum-matches-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>Let’s choose a MS2 spectrum with a high identification score and plot
-it.</p>
-<div class="sourceCode" id="cb174"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb174-1"><a href="sec-id.html#cb174-1" tabindex="-1"></a>i <span class="ot">&lt;-</span> <span class="fu">which</span>(sp<span class="sc">$</span>MS.GF.RawScore <span class="sc">&gt;</span> <span class="dv">100</span>)[<span class="dv">1</span>]</span>
-<span id="cb174-2"><a href="sec-id.html#cb174-2" tabindex="-1"></a><span class="fu">plotSpectra</span>(sp[i])</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-52-1.png" width="672"></p>
-<p>We have seen above that we can add labels to each peak using the
-<code>labels</code> argument in <code>plotSpectra()</code>. The <code>addFragments()</code> function
-takes a spectrum as input (that is a <code>Spectra</code> object of length 1) and
-annotates its peaks.</p>
-<div class="sourceCode" id="cb175"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb175-1"><a href="sec-id.html#cb175-1" tabindex="-1"></a><span class="fu">addFragments</span>(sp[i])</span></code></pre></div>
-<pre><code>##   [1] NA    NA    NA    "b1"  NA    NA    NA    NA    NA    NA    NA    NA   
-##  [13] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
-##  [25] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
-##  [37] NA    NA    NA    NA    NA    NA    NA    "y1_" NA    NA    NA    NA   
-##  [49] NA    "y1"  NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
-##  [61] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
-##  [73] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
-##  [85] NA    NA    "b2"  NA    NA    NA    NA    NA    NA    NA    NA    NA   
-##  [97] NA    NA    NA    NA   
-##  [ reached getOption("max.print") -- omitted 227 entries ]</code></pre>
-<p>It can be directly used with <code>plotSpectra()</code>:</p>
-<div class="sourceCode" id="cb177"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb177-1"><a href="sec-id.html#cb177-1" tabindex="-1"></a><span class="fu">plotSpectra</span>(sp[i], <span class="at">labels =</span> addFragments,</span>
-<span id="cb177-2"><a href="sec-id.html#cb177-2" tabindex="-1"></a>            <span class="at">labelPos =</span> <span class="dv">3</span>, <span class="at">labelCol =</span> <span class="st">"steelblue"</span>)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-54-1.png" width="672"></p>
-<p>When a precursor peptide ion is fragmented in a CID cell, it breaks at
-specific bonds, producing sets of peaks (<em>a</em>, <em>b</em>, <em>c</em> and <em>x</em>, <em>y</em>,
-<em>z</em>) that can be predicted.</p>
-<div class="figure">
-<p class="caption marginnote shownote">
-(#fig:frag_img)Peptide fragmentation.
-</p>
-<img src="img/frag.png" alt="Peptide fragmentation." width="80%">
-</div>
-<p>The annotation of spectra is obtained by simulating fragmentation of a
-peptide and matching observed peaks to fragments:</p>
-<div class="sourceCode" id="cb178"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb178-1"><a href="sec-id.html#cb178-1" tabindex="-1"></a>sp[i]<span class="sc">$</span>sequence</span></code></pre></div>
-<pre><code>## [1] "THSQEEMQHMQR"</code></pre>
-<div class="sourceCode" id="cb180"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb180-1"><a href="sec-id.html#cb180-1" tabindex="-1"></a><span class="fu">calculateFragments</span>(sp[i]<span class="sc">$</span>sequence)</span></code></pre></div>
-<pre><code>## Modifications used: C=57.02146</code></pre>
-<pre><code>##           mz ion type pos z         seq
-## 1   102.0550  b1    b   1 1           T
-## 2   239.1139  b2    b   2 1          TH
-## 3   326.1459  b3    b   3 1         THS
-## 4   454.2045  b4    b   4 1        THSQ
-## 5   583.2471  b5    b   5 1       THSQE
-## 6   712.2897  b6    b   6 1      THSQEE
-## 7   843.3301  b7    b   7 1     THSQEEM
-## 8   971.3887  b8    b   8 1    THSQEEMQ
-## 9  1108.4476  b9    b   9 1   THSQEEMQH
-## 10 1239.4881 b10    b  10 1  THSQEEMQHM
-## 11 1367.5467 b11    b  11 1 THSQEEMQHMQ
-## 12  175.1190  y1    y   1 1           R
-## 13  303.1775  y2    y   2 1          QR
-## 14  434.2180  y3    y   3 1         MQR
-## 15  571.2769  y4    y   4 1        HMQR
-## 16  699.3355  y5    y   5 1       QHMQR
-##  [ reached 'max' / getOption("max.print") -- omitted 42 rows ]</code></pre>
-</div>
-<div id="comparing-spectra" class="section level2" number="4.7">
-<h2>
-<span class="header-section-number">4.7</span> Comparing spectra<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('comparing-spectra')" onmouseout="reset_tooltip('comparing-spectra-tooltip')"><span class="tooltiptext" id="comparing-spectra-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>The <code>compareSpectra()</code> function can be used to compare spectra (by default,
-computing the normalised dot product).</p>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ol style="list-style-type: decimal">
-<li>Create a new <code>Spectra</code> object containing the MS2 spectra with
-sequences <code>"SQILQQAGTSVLSQANQVPQTVLSLLR"</code> and
-<code>"TKGLNVMQNLLTAHPDVQAVFAQNDEMALGALR"</code>.</li>
-</ol>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-17" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-17', 'sol-start-17')"></span>
-</p>
-<div id="sol-body-17" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb183"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb183-1"><a href="sec-id.html#cb183-1" tabindex="-1"></a>k <span class="ot">&lt;-</span> <span class="fu">which</span>(sp<span class="sc">$</span>sequence <span class="sc">%in%</span> <span class="fu">c</span>(<span class="st">"SQILQQAGTSVLSQANQVPQTVLSLLR"</span>, <span class="st">"TKGLNVMQNLLTAHPDVQAVFAQNDEMALGALR"</span>))</span>
-<span id="cb183-2"><a href="sec-id.html#cb183-2" tabindex="-1"></a>sp_k <span class="ot">&lt;-</span> sp[k]</span>
-<span id="cb183-3"><a href="sec-id.html#cb183-3" tabindex="-1"></a>sp_k</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 5 spectra in a MsBackendMzR backend:
-##     msLevel     rtime scanIndex
-##   &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1         2   2687.42      5230
-## 2         2   2688.88      5235
-## 3         2   2748.75      5397
-## 4         2   2765.26      5442
-## 5         2   2768.17      5449
-##  ... 69 more variables/columns.
-## 
-## file(s):
-## 8ee512042c5ff_TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ol start="2" style="list-style-type: decimal">
-<li>Calculate the 5 by 5 similarity
-matrix between all spectra using <code>compareSpectra</code>. See the
-<code>?Spectra</code> man page for details. Draw a heatmap of that matrix.</li>
-</ol>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-18" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-18', 'sol-start-18')"></span>
-</p>
-<div id="sol-body-18" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb185"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb185-1"><a href="sec-id.html#cb185-1" tabindex="-1"></a>mat <span class="ot">&lt;-</span> <span class="fu">compareSpectra</span>(sp_k)</span>
-<span id="cb185-2"><a href="sec-id.html#cb185-2" tabindex="-1"></a><span class="fu">rownames</span>(mat) <span class="ot">&lt;-</span> <span class="fu">colnames</span>(mat) <span class="ot">&lt;-</span> <span class="fu">strtrim</span>(sp_k<span class="sc">$</span>sequence, <span class="dv">2</span>)</span>
-<span id="cb185-3"><a href="sec-id.html#cb185-3" tabindex="-1"></a>mat</span></code></pre></div>
-<pre><code>##              TK          TK           SQ          SQ           SQ
-## TK 1.0000000000 0.109126094 0.0009373465 0.001261338 0.0008256185
-## TK 0.1091260942 1.000000000 0.0025314670 0.001459654 0.0017613212
-## SQ 0.0009373465 0.002531467 1.0000000000 0.432133016 0.6879331218
-## SQ 0.0012613380 0.001459654 0.4321330158 1.000000000 0.4467153012
-## SQ 0.0008256185 0.001761321 0.6879331218 0.446715301 1.0000000000</code></pre>
-<div class="sourceCode" id="cb187"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb187-1"><a href="sec-id.html#cb187-1" tabindex="-1"></a>pheatmap<span class="sc">::</span><span class="fu">pheatmap</span>(mat)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-56-1.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ol start="3" style="list-style-type: decimal">
-<li>Compare the spectra with the plotting function seen previously.</li>
-</ol>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-19" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-19', 'sol-start-19')"></span>
-</p>
-<div id="sol-body-19" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb188"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb188-1"><a href="sec-id.html#cb188-1" tabindex="-1"></a><span class="fu">filterIntensity</span>(sp_k, <span class="fl">1e3</span>) <span class="sc">%&gt;%</span> <span class="fu">plotSpectra</span>(<span class="at">main =</span> sp_k<span class="sc">$</span>sequence)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-57-1.png" width="672"></p>
-<div class="sourceCode" id="cb189"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb189-1"><a href="sec-id.html#cb189-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">3</span>, <span class="dv">1</span>))</span>
-<span id="cb189-2"><a href="sec-id.html#cb189-2" tabindex="-1"></a><span class="fu">plotSpectraMirror</span>(sp_k[<span class="dv">1</span>], sp_k[<span class="dv">2</span>], <span class="at">main =</span> <span class="st">"TK..."</span>)</span>
-<span id="cb189-3"><a href="sec-id.html#cb189-3" tabindex="-1"></a><span class="fu">plotSpectraMirror</span>(sp_k[<span class="dv">3</span>], sp_k[<span class="dv">4</span>], <span class="at">main =</span> <span class="st">"SQ..."</span>)</span>
-<span id="cb189-4"><a href="sec-id.html#cb189-4" tabindex="-1"></a><span class="fu">plotSpectraMirror</span>(sp_k[<span class="dv">3</span>], sp_k[<span class="dv">4</span>], <span class="at">main =</span> <span class="st">"SQ..."</span>)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-58-1.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-</div>
-<div id="summary-exercise" class="section level2" number="4.8">
-<h2>
-<span class="header-section-number">4.8</span> Summary exercise<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('summary-exercise')" onmouseout="reset_tooltip('summary-exercise-tooltip')"><span class="tooltiptext" id="summary-exercise-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Download the 3 first mzML and mzID files from the
-<a href="https://www.ebi.ac.uk/pride/archive/projects/PXD022816">PXD022816</a>
-project <span class="citation">(<label for="tufte-mn-7" class="margin-toggle">⊕</label><input type="checkbox" id="tufte-mn-7" class="margin-toggle">Morgenstern, Barzilay, and Levin 2021<span class="marginnote">Morgenstern, David, Rotem Barzilay, and Yishai Levin. 2021. <span>“<span>RawBeans</span>: A Simple, Vendor-Independent, Raw-Data Quality-Control Tool.”</span> <em>Journal of Proteome Research</em>. <a href="https://doi.org/10.1021/acs.jproteome.0c00956">https://doi.org/10.1021/acs.jproteome.0c00956</a>.</span>)</span>.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-20" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-20', 'sol-start-20')"></span>
-</p>
-<div id="sol-body-20" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb190"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb190-1"><a href="sec-id.html#cb190-1" tabindex="-1"></a><span class="do">## Getting data from PX/PRIDE</span></span>
-<span id="cb190-2"><a href="sec-id.html#cb190-2" tabindex="-1"></a><span class="fu">library</span>(rpx)</span>
-<span id="cb190-3"><a href="sec-id.html#cb190-3" tabindex="-1"></a></span>
-<span id="cb190-4"><a href="sec-id.html#cb190-4" tabindex="-1"></a><span class="do">## https://www.ebi.ac.uk/pride/archive/projects/PXD022816</span></span>
-<span id="cb190-5"><a href="sec-id.html#cb190-5" tabindex="-1"></a><span class="do">## RawBeans: A Simple, Vendor-Independent, Raw-Data Quality-Control</span></span>
-<span id="cb190-6"><a href="sec-id.html#cb190-6" tabindex="-1"></a><span class="do">## Tool ()</span></span>
-<span id="cb190-7"><a href="sec-id.html#cb190-7" tabindex="-1"></a></span>
-<span id="cb190-8"><a href="sec-id.html#cb190-8" tabindex="-1"></a>PXD022816 <span class="ot">&lt;-</span> <span class="fu">PXDataset</span>(<span class="st">"PXD022816"</span>)</span></code></pre></div>
-<pre><code>## Loading PXD022816 from cache.</code></pre>
-<div class="sourceCode" id="cb192"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb192-1"><a href="sec-id.html#cb192-1" tabindex="-1"></a>PXD022816</span></code></pre></div>
-<pre><code>## Project PXD022816 with 32 files
-## </code></pre>
-<pre><code>## Resource ID BFC126 in cache in /home/lgatto/.cache/R/rpx.</code></pre>
-<pre><code>##  [1] 'QEP2LC6_HeLa_50ng_251120_01-calib.mzID.gz' ... [32] 'checksum.txt'
-##  Use 'pxfiles(.)' to see all files.</code></pre>
-<div class="sourceCode" id="cb196"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb196-1"><a href="sec-id.html#cb196-1" tabindex="-1"></a><span class="fu">pxfiles</span>(PXD022816)</span></code></pre></div>
-<pre><code>## Project PXD022816 files (32):
-##  [local]  QEP2LC6_HeLa_50ng_251120_01-calib.mzID.gz
-##  [local]  QEP2LC6_HeLa_50ng_251120_01-calib.mzML
-##  [remote] QEP2LC6_HeLa_50ng_251120_01.raw
-##  [local]  QEP2LC6_HeLa_50ng_251120_02-calib.mzID.gz
-##  [local]  QEP2LC6_HeLa_50ng_251120_02-calib.mzML
-##  [remote] QEP2LC6_HeLa_50ng_251120_02.raw
-##  [local]  QEP2LC6_HeLa_50ng_251120_03-calib.mzID.gz
-##  [local]  QEP2LC6_HeLa_50ng_251120_03-calib.mzML
-##  [remote] QEP2LC6_HeLa_50ng_251120_03.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_04-calib.mzID.gz
-##  ...</code></pre>
-<div class="sourceCode" id="cb198"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb198-1"><a href="sec-id.html#cb198-1" tabindex="-1"></a>(mzids <span class="ot">&lt;-</span> <span class="fu">pxget</span>(PXD022816, <span class="fu">grep</span>(<span class="st">"mzID"</span>, <span class="fu">pxfiles</span>(PXD022816))[<span class="dv">1</span><span class="sc">:</span><span class="dv">3</span>]))</span></code></pre></div>
-<pre><code>## Project PXD022816 files (32):
-##  [local]  QEP2LC6_HeLa_50ng_251120_01-calib.mzID.gz
-##  [local]  QEP2LC6_HeLa_50ng_251120_01-calib.mzML
-##  [remote] QEP2LC6_HeLa_50ng_251120_01.raw
-##  [local]  QEP2LC6_HeLa_50ng_251120_02-calib.mzID.gz
-##  [local]  QEP2LC6_HeLa_50ng_251120_02-calib.mzML
-##  [remote] QEP2LC6_HeLa_50ng_251120_02.raw
-##  [local]  QEP2LC6_HeLa_50ng_251120_03-calib.mzID.gz
-##  [local]  QEP2LC6_HeLa_50ng_251120_03-calib.mzML
-##  [remote] QEP2LC6_HeLa_50ng_251120_03.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_04-calib.mzID.gz
-##  ...</code></pre>
-<pre><code>## Loading QEP2LC6_HeLa_50ng_251120_01-calib.mzID.gz from cache.</code></pre>
-<pre><code>## Loading QEP2LC6_HeLa_50ng_251120_02-calib.mzID.gz from cache.</code></pre>
-<pre><code>## Loading QEP2LC6_HeLa_50ng_251120_03-calib.mzID.gz from cache.</code></pre>
-<pre><code>## [1] "/home/lgatto/.cache/R/rpx/3e55cc770e6b8e_QEP2LC6_HeLa_50ng_251120_01-calib.mzID.gz"
-## [2] "/home/lgatto/.cache/R/rpx/3e55cc152c0b80_QEP2LC6_HeLa_50ng_251120_02-calib.mzID.gz"
-## [3] "/home/lgatto/.cache/R/rpx/3e55cc4a362d2c_QEP2LC6_HeLa_50ng_251120_03-calib.mzID.gz"</code></pre>
-<div class="sourceCode" id="cb204"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb204-1"><a href="sec-id.html#cb204-1" tabindex="-1"></a>(mzmls <span class="ot">&lt;-</span> <span class="fu">pxget</span>(PXD022816, <span class="fu">grep</span>(<span class="st">"mzML"</span>, <span class="fu">pxfiles</span>(PXD022816))[<span class="dv">1</span><span class="sc">:</span><span class="dv">3</span>]))</span></code></pre></div>
-<pre><code>## Project PXD022816 files (32):
-##  [local]  QEP2LC6_HeLa_50ng_251120_01-calib.mzID.gz
-##  [local]  QEP2LC6_HeLa_50ng_251120_01-calib.mzML
-##  [remote] QEP2LC6_HeLa_50ng_251120_01.raw
-##  [local]  QEP2LC6_HeLa_50ng_251120_02-calib.mzID.gz
-##  [local]  QEP2LC6_HeLa_50ng_251120_02-calib.mzML
-##  [remote] QEP2LC6_HeLa_50ng_251120_02.raw
-##  [local]  QEP2LC6_HeLa_50ng_251120_03-calib.mzID.gz
-##  [local]  QEP2LC6_HeLa_50ng_251120_03-calib.mzML
-##  [remote] QEP2LC6_HeLa_50ng_251120_03.raw
-##  [remote] QEP2LC6_HeLa_50ng_251120_04-calib.mzID.gz
-##  ...</code></pre>
-<pre><code>## Loading QEP2LC6_HeLa_50ng_251120_01-calib.mzML from cache.</code></pre>
-<pre><code>## Loading QEP2LC6_HeLa_50ng_251120_02-calib.mzML from cache.</code></pre>
-<pre><code>## Loading QEP2LC6_HeLa_50ng_251120_03-calib.mzML from cache.</code></pre>
-<pre><code>## [1] "/home/lgatto/.cache/R/rpx/3e55cc6b3df7f9_QEP2LC6_HeLa_50ng_251120_01-calib.mzML"
-## [2] "/home/lgatto/.cache/R/rpx/3e55cc7a53b04f_QEP2LC6_HeLa_50ng_251120_02-calib.mzML"
-## [3] "/home/lgatto/.cache/R/rpx/3e55ccf41c0c_QEP2LC6_HeLa_50ng_251120_03-calib.mzML"</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Generate a <code>Spectra</code> object and a table of filtered PSMs. Visualise
-the total ion chromatograms and check the quality of the
-identification data by comparing the density of the decoy and target
-PSMs id scores for each file.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-21" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-21', 'sol-start-21')"></span>
-</p>
-<div id="sol-body-21" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb210"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb210-1"><a href="sec-id.html#cb210-1" tabindex="-1"></a><span class="do">## Loading raw data</span></span>
-<span id="cb210-2"><a href="sec-id.html#cb210-2" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"Spectra"</span>)</span>
-<span id="cb210-3"><a href="sec-id.html#cb210-3" tabindex="-1"></a>sp <span class="ot">&lt;-</span> <span class="fu">Spectra</span>(mzmls)</span>
-<span id="cb210-4"><a href="sec-id.html#cb210-4" tabindex="-1"></a>sp</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 87647 spectra in a MsBackendMzR backend:
-##         msLevel     rtime scanIndex
-##       &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1             1  0.177987         1
-## 2             1  0.599870         2
-## 3             1  0.978849         3
-## 4             1  1.363217         4
-## 5             1  1.742965         5
-## ...         ...       ...       ...
-## 87643         1   4198.64     28736
-## 87644         1   4199.02     28737
-## 87645         2   4199.28     28738
-## 87646         1   4199.44     28739
-## 87647         1   4199.82     28740
-##  ... 33 more variables/columns.
-## 
-## file(s):
-## 3e55cc6b3df7f9_QEP2LC6_HeLa_50ng_251120_01-calib.mzML
-## 3e55cc7a53b04f_QEP2LC6_HeLa_50ng_251120_02-calib.mzML
-## 3e55ccf41c0c_QEP2LC6_HeLa_50ng_251120_03-calib.mzML</code></pre>
-<div class="sourceCode" id="cb212"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb212-1"><a href="sec-id.html#cb212-1" tabindex="-1"></a><span class="do">## number of spectra per file</span></span>
-<span id="cb212-2"><a href="sec-id.html#cb212-2" tabindex="-1"></a><span class="fu">table</span>(<span class="fu">basename</span>(sp<span class="sc">$</span>dataOrigin))</span></code></pre></div>
-<pre><code>## 
-## 3e55cc6b3df7f9_QEP2LC6_HeLa_50ng_251120_01-calib.mzML 
-##                                                 29575 
-## 3e55cc7a53b04f_QEP2LC6_HeLa_50ng_251120_02-calib.mzML 
-##                                                 29332 
-##   3e55ccf41c0c_QEP2LC6_HeLa_50ng_251120_03-calib.mzML 
-##                                                 28740</code></pre>
-<div class="sourceCode" id="cb214"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb214-1"><a href="sec-id.html#cb214-1" tabindex="-1"></a><span class="do">## all levels are centroided</span></span>
-<span id="cb214-2"><a href="sec-id.html#cb214-2" tabindex="-1"></a><span class="fu">table</span>(sp<span class="sc">$</span>centroided, sp<span class="sc">$</span>msLevel)</span></code></pre></div>
-<pre><code>##       
-##            1     2
-##   TRUE 19607 68040</code></pre>
-<div class="sourceCode" id="cb216"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb216-1"><a href="sec-id.html#cb216-1" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"ggplot2"</span>)</span>
-<span id="cb216-2"><a href="sec-id.html#cb216-2" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"tidyr"</span>)</span>
-<span id="cb216-3"><a href="sec-id.html#cb216-3" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"magrittr"</span>)</span>
-<span id="cb216-4"><a href="sec-id.html#cb216-4" tabindex="-1"></a></span>
-<span id="cb216-5"><a href="sec-id.html#cb216-5" tabindex="-1"></a><span class="do">## Chromatograms</span></span>
-<span id="cb216-6"><a href="sec-id.html#cb216-6" tabindex="-1"></a><span class="fu">filterMsLevel</span>(sp, <span class="dv">1</span>) <span class="sc">%&gt;%</span></span>
-<span id="cb216-7"><a href="sec-id.html#cb216-7" tabindex="-1"></a>    <span class="fu">spectraData</span>() <span class="sc">%&gt;%</span></span>
-<span id="cb216-8"><a href="sec-id.html#cb216-8" tabindex="-1"></a>    <span class="fu">as_tibble</span>() <span class="sc">%&gt;%</span></span>
-<span id="cb216-9"><a href="sec-id.html#cb216-9" tabindex="-1"></a>    <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> rtime,</span>
-<span id="cb216-10"><a href="sec-id.html#cb216-10" tabindex="-1"></a>               <span class="at">y =</span> totIonCurrent,</span>
-<span id="cb216-11"><a href="sec-id.html#cb216-11" tabindex="-1"></a>               <span class="at">colour =</span> <span class="fu">basename</span>(dataOrigin))) <span class="sc">+</span></span>
-<span id="cb216-12"><a href="sec-id.html#cb216-12" tabindex="-1"></a>    <span class="fu">geom_line</span>()</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-61-1.png" width="1152"></p>
-<div class="sourceCode" id="cb217"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb217-1"><a href="sec-id.html#cb217-1" tabindex="-1"></a><span class="do">## Identification data</span></span>
-<span id="cb217-2"><a href="sec-id.html#cb217-2" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"PSMatch"</span>)</span>
-<span id="cb217-3"><a href="sec-id.html#cb217-3" tabindex="-1"></a>id <span class="ot">&lt;-</span> <span class="fu">PSM</span>(mzids)</span>
-<span id="cb217-4"><a href="sec-id.html#cb217-4" tabindex="-1"></a></span>
-<span id="cb217-5"><a href="sec-id.html#cb217-5" tabindex="-1"></a><span class="do">## Number of PSMs per acquisition</span></span>
-<span id="cb217-6"><a href="sec-id.html#cb217-6" tabindex="-1"></a><span class="fu">table</span>(id<span class="sc">$</span>idFile)</span></code></pre></div>
-<pre><code>## 
-## 3e55cc152c0b80_QEP2LC6_HeLa_50ng_251120_02-calib.mzID.gz 
-##                                                    25083 
-## 3e55cc4a362d2c_QEP2LC6_HeLa_50ng_251120_03-calib.mzID.gz 
-##                                                    24436 
-## 3e55cc770e6b8e_QEP2LC6_HeLa_50ng_251120_01-calib.mzID.gz 
-##                                                    25231</code></pre>
-<div class="sourceCode" id="cb219"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb219-1"><a href="sec-id.html#cb219-1" tabindex="-1"></a>tidyr<span class="sc">::</span><span class="fu">as_tibble</span>(id) <span class="sc">%&gt;%</span></span>
-<span id="cb219-2"><a href="sec-id.html#cb219-2" tabindex="-1"></a>    <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> MetaMorpheus.score,</span>
-<span id="cb219-3"><a href="sec-id.html#cb219-3" tabindex="-1"></a>               <span class="at">colour =</span> isDecoy)) <span class="sc">+</span></span>
-<span id="cb219-4"><a href="sec-id.html#cb219-4" tabindex="-1"></a>    <span class="fu">geom_density</span>() <span class="sc">+</span></span>
-<span id="cb219-5"><a href="sec-id.html#cb219-5" tabindex="-1"></a>    <span class="fu">facet_wrap</span>(<span class="sc">~</span> spectrumFile)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-62-1.png" width="1152"></p>
-<div class="sourceCode" id="cb220"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb220-1"><a href="sec-id.html#cb220-1" tabindex="-1"></a>id_filtered <span class="ot">&lt;-</span> <span class="fu">filterPSMs</span>(id)</span></code></pre></div>
-<pre><code>## Starting with 74750 PSMs:</code></pre>
-<pre><code>## Removed 543 decoy hits.</code></pre>
-<pre><code>## Removed 0 PSMs with rank &gt; 1.</code></pre>
-<pre><code>## Removed 11310 shared peptides.</code></pre>
-<pre><code>## 62897 PSMs left.</code></pre>
-<div class="sourceCode" id="cb226"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb226-1"><a href="sec-id.html#cb226-1" tabindex="-1"></a><span class="fu">max</span>(id_filtered<span class="sc">$</span>PSM.level.q.value)</span></code></pre></div>
-<pre><code>## [1] 0.009994817</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Join the raw and identification data. Beware though that the joining
-must now be performed by spectrum ids and by files.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-22" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-22', 'sol-start-22')"></span>
-</p>
-<div id="sol-body-22" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb228"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb228-1"><a href="sec-id.html#cb228-1" tabindex="-1"></a><span class="do">## primary key for spectra</span></span>
-<span id="cb228-2"><a href="sec-id.html#cb228-2" tabindex="-1"></a>sp<span class="sc">$</span>pkey <span class="ot">&lt;-</span></span>
-<span id="cb228-3"><a href="sec-id.html#cb228-3" tabindex="-1"></a>    <span class="fu">paste0</span>(<span class="fu">sub</span>(<span class="st">"^.+_QEP"</span>, <span class="st">"QEP"</span>, <span class="fu">basename</span>(<span class="fu">dataOrigin</span>(sp))),</span>
-<span id="cb228-4"><a href="sec-id.html#cb228-4" tabindex="-1"></a>           <span class="fu">gsub</span>(<span class="st">"^.+="</span>, <span class="st">"::"</span>, sp<span class="sc">$</span>spectrumId))</span>
-<span id="cb228-5"><a href="sec-id.html#cb228-5" tabindex="-1"></a><span class="fu">head</span>(sp<span class="sc">$</span>pkey)</span></code></pre></div>
-<pre><code>## [1] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::1"
-## [2] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::2"
-## [3] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::3"
-## [4] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::4"
-## [5] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::5"
-## [6] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::6"</code></pre>
-<div class="sourceCode" id="cb230"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb230-1"><a href="sec-id.html#cb230-1" tabindex="-1"></a><span class="do">## primary key for PSMs</span></span>
-<span id="cb230-2"><a href="sec-id.html#cb230-2" tabindex="-1"></a>id_filtered<span class="sc">$</span>pkey <span class="ot">&lt;-</span></span>
-<span id="cb230-3"><a href="sec-id.html#cb230-3" tabindex="-1"></a>    <span class="fu">paste0</span>(<span class="fu">gsub</span>(<span class="st">"^.+</span><span class="sc">\\</span><span class="st">QEP"</span>, <span class="st">"QEP"</span>, id_filtered<span class="sc">$</span>spectrumFile),</span>
-<span id="cb230-4"><a href="sec-id.html#cb230-4" tabindex="-1"></a>           <span class="fu">sub</span>(<span class="st">"^.+="</span>, <span class="st">"::"</span>, id_filtered<span class="sc">$</span>spectrumID))</span>
-<span id="cb230-5"><a href="sec-id.html#cb230-5" tabindex="-1"></a><span class="fu">head</span>(id_filtered<span class="sc">$</span>pkey)</span></code></pre></div>
-<pre><code>## [1] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::3426" 
-## [2] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::20165"
-## [3] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::20180"
-## [4] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::15180"
-## [5] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::10327"
-## [6] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::12894"</code></pre>
-<div class="sourceCode" id="cb232"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb232-1"><a href="sec-id.html#cb232-1" tabindex="-1"></a><span class="do">## For simplicity, let keep single hits per spectrum id.</span></span>
-<span id="cb232-2"><a href="sec-id.html#cb232-2" tabindex="-1"></a><span class="do">## Alternatively, explore duplicates and use QFeatures::reduceDataFrame</span></span>
-<span id="cb232-3"><a href="sec-id.html#cb232-3" tabindex="-1"></a>id_filtered <span class="ot">&lt;-</span> id_filtered[<span class="sc">!</span><span class="fu">duplicated</span>(id_filtered<span class="sc">$</span>pkey), ]</span>
-<span id="cb232-4"><a href="sec-id.html#cb232-4" tabindex="-1"></a></span>
-<span id="cb232-5"><a href="sec-id.html#cb232-5" tabindex="-1"></a><span class="fu">head</span>(id_filtered<span class="sc">$</span>pkey)</span></code></pre></div>
-<pre><code>## [1] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::3426" 
-## [2] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::20165"
-## [3] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::20180"
-## [4] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::15180"
-## [5] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::10327"
-## [6] "QEP2LC6_HeLa_50ng_251120_01-calib.mzML::12894"</code></pre>
-<div class="sourceCode" id="cb234"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb234-1"><a href="sec-id.html#cb234-1" tabindex="-1"></a>sp <span class="ot">&lt;-</span> <span class="fu">joinSpectraData</span>(sp, id_filtered, <span class="at">by.x =</span> <span class="st">"pkey"</span>)</span>
-<span id="cb234-2"><a href="sec-id.html#cb234-2" tabindex="-1"></a></span>
-<span id="cb234-3"><a href="sec-id.html#cb234-3" tabindex="-1"></a><span class="do">## Number of MS2 scans with a PSM</span></span>
-<span id="cb234-4"><a href="sec-id.html#cb234-4" tabindex="-1"></a><span class="fu">table</span>(<span class="sc">!</span><span class="fu">is.na</span>(<span class="fu">filterMsLevel</span>(sp, <span class="dv">2</span>)<span class="sc">$</span>sequence))</span></code></pre></div>
-<pre><code>## 
-## FALSE  TRUE 
-## 22006 46034</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Extract the PSMs that have been matched to peptides from protein
-<code>O43175</code> and compare and cluster the scans. Hint: once you have
-created the smaller <code>Spectra</code> object with the scans of interest,
-switch to an in-memory backend to seed up the calculations.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-23" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-23', 'sol-start-23')"></span>
-</p>
-<div id="sol-body-23" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb236"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb236-1"><a href="sec-id.html#cb236-1" tabindex="-1"></a>sp_O43175 <span class="ot">&lt;-</span> sp[<span class="fu">which</span>(sp<span class="sc">$</span>DatabaseAccess <span class="sc">==</span> <span class="st">"O43175"</span>)]</span>
-<span id="cb236-2"><a href="sec-id.html#cb236-2" tabindex="-1"></a>sp_O43175 <span class="ot">&lt;-</span> <span class="fu">setBackend</span>(sp_O43175, <span class="fu">MsBackendDataFrame</span>())</span>
-<span id="cb236-3"><a href="sec-id.html#cb236-3" tabindex="-1"></a>sp_O43175</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 77 spectra in a MsBackendDataFrame backend:
-##       msLevel     rtime scanIndex
-##     &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1           2   1496.36      8143
-## 2           2   1518.19      8362
-## 3           2   1582.59      9009
-## 4           2   1587.08      9056
-## 5           2   1708.69     10288
-## ...       ...       ...       ...
-## 73          2   3148.28     23503
-## 74          2   3242.61     24333
-## 75          2   3436.78     25513
-## 76          2   3547.90     26170
-## 77          2   3563.78     26314
-##  ... 65 more variables/columns.
-## Processing:
-##  Switch backend from MsBackendMzR to MsBackendDataFrame [Wed Sep  6 11:50:52 2023]</code></pre>
-<div class="sourceCode" id="cb238"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb238-1"><a href="sec-id.html#cb238-1" tabindex="-1"></a>cmat <span class="ot">&lt;-</span> <span class="fu">compareSpectra</span>(sp_O43175)</span>
-<span id="cb238-2"><a href="sec-id.html#cb238-2" tabindex="-1"></a><span class="fu">rownames</span>(cmat) <span class="ot">&lt;-</span></span>
-<span id="cb238-3"><a href="sec-id.html#cb238-3" tabindex="-1"></a>    <span class="fu">colnames</span>(cmat) <span class="ot">&lt;-</span> <span class="fu">strtrim</span>(sp_O43175<span class="sc">$</span>sequence, <span class="dv">3</span>)</span>
-<span id="cb238-4"><a href="sec-id.html#cb238-4" tabindex="-1"></a></span>
-<span id="cb238-5"><a href="sec-id.html#cb238-5" tabindex="-1"></a>pheatmap<span class="sc">::</span><span class="fu">pheatmap</span>(cmat)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-65-1.png" width="672"></p>
-<div class="sourceCode" id="cb239"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb239-1"><a href="sec-id.html#cb239-1" tabindex="-1"></a>(i <span class="ot">&lt;-</span> <span class="fu">which</span>(<span class="fu">rownames</span>(cmat) <span class="sc">==</span> <span class="st">"DLP"</span>))</span></code></pre></div>
-<pre><code>## [1] 21 23 45 46 49 73 74 77</code></pre>
-<div class="sourceCode" id="cb241"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb241-1"><a href="sec-id.html#cb241-1" tabindex="-1"></a><span class="fu">plotSpectra</span>(sp_O43175[i], <span class="at">labels =</span> addFragments,</span>
-<span id="cb241-2"><a href="sec-id.html#cb241-2" tabindex="-1"></a>            <span class="at">labelPos =</span> <span class="dv">3</span>, <span class="at">labelCol =</span> <span class="st">"steelblue"</span>,</span>
-<span id="cb241-3"><a href="sec-id.html#cb241-3" tabindex="-1"></a>            <span class="at">main =</span> sp_O43175<span class="sc">$</span>sequence[i])</span>
-<span id="cb241-4"><a href="sec-id.html#cb241-4" tabindex="-1"></a></span>
-<span id="cb241-5"><a href="sec-id.html#cb241-5" tabindex="-1"></a><span class="fu">spectraData</span>(sp_O43175[i])<span class="sc">$</span>precursorCharge</span></code></pre></div>
-<pre><code>## [1] 2 2 2 2 2 2 2 2</code></pre>
-<div class="sourceCode" id="cb243"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb243-1"><a href="sec-id.html#cb243-1" tabindex="-1"></a><span class="fu">spectraData</span>(sp_O43175[i])<span class="sc">$</span>precursorMz</span></code></pre></div>
-<pre><code>## [1] 493.8055 515.3086 493.8383 493.8380 515.3084 493.8057 493.8390 515.3087</code></pre>
-<div class="sourceCode" id="cb245"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb245-1"><a href="sec-id.html#cb245-1" tabindex="-1"></a><span class="fu">spectraData</span>(sp_O43175[i])<span class="sc">$</span>modName</span></code></pre></div>
-<pre><code>## [1] NA         "Carbamyl" NA         NA         "Carbamyl" NA         NA        
-## [8] "Carbamyl"</code></pre>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-65-2.png" width="672"></p>
-<div class="sourceCode" id="cb247"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb247-1"><a href="sec-id.html#cb247-1" tabindex="-1"></a><span class="do">## Directly compare spectra with/without Carbamyl</span></span>
-<span id="cb247-2"><a href="sec-id.html#cb247-2" tabindex="-1"></a><span class="fu">plotSpectraMirror</span>(sp_O43175[<span class="dv">4</span>], sp_O43175[<span class="dv">9</span>])</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-66-1.png" width="672"></p>
-<div class="sourceCode" id="cb248"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb248-1"><a href="sec-id.html#cb248-1" tabindex="-1"></a><span class="fu">plotSpectraMirror</span>(sp_O43175[<span class="dv">2</span>], sp_O43175[<span class="dv">10</span>])</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-66-2.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Generate total ion chromatograms for each acquisition and annotate the
-MS1 scans with the number of PSMs using the <code>countIdentifications()</code>
-function, as shown above. The function will automatically perform the
-counts in parallel for each acquisition.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-24" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-24', 'sol-start-24')"></span>
-</p>
-<div id="sol-body-24" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb249"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb249-1"><a href="sec-id.html#cb249-1" tabindex="-1"></a>sp <span class="ot">&lt;-</span> <span class="fu">countIdentifications</span>(sp)</span>
-<span id="cb249-2"><a href="sec-id.html#cb249-2" tabindex="-1"></a><span class="fu">table</span>(<span class="fu">msLevel</span>(sp), sp<span class="sc">$</span>countIdentifications)</span></code></pre></div>
-<pre><code>##    
-##         0     1     2     3     4     5     6     7     8     9    10
-##   1 12937   488   254   213   237   345   588   991  1439  1401   714
-##   2 22006 46034     0     0     0     0     0     0     0     0     0</code></pre>
-<div class="sourceCode" id="cb251"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb251-1"><a href="sec-id.html#cb251-1" tabindex="-1"></a>sp <span class="sc">|&gt;</span></span>
-<span id="cb251-2"><a href="sec-id.html#cb251-2" tabindex="-1"></a> <span class="fu">filterMsLevel</span>(<span class="dv">1</span>) <span class="sc">|&gt;</span></span>
-<span id="cb251-3"><a href="sec-id.html#cb251-3" tabindex="-1"></a> <span class="fu">spectraData</span>() <span class="sc">|&gt;</span></span>
-<span id="cb251-4"><a href="sec-id.html#cb251-4" tabindex="-1"></a> <span class="fu">as_tibble</span>() <span class="sc">|&gt;</span></span>
-<span id="cb251-5"><a href="sec-id.html#cb251-5" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> rtime,</span>
-<span id="cb251-6"><a href="sec-id.html#cb251-6" tabindex="-1"></a>            <span class="at">y =</span> totIonCurrent)) <span class="sc">+</span></span>
-<span id="cb251-7"><a href="sec-id.html#cb251-7" tabindex="-1"></a>     <span class="fu">geom_line</span>(<span class="at">alpha =</span> <span class="fl">0.25</span>) <span class="sc">+</span></span>
-<span id="cb251-8"><a href="sec-id.html#cb251-8" tabindex="-1"></a>     <span class="fu">geom_point</span>(<span class="fu">aes</span>(<span class="at">colour =</span> <span class="fu">ifelse</span>(countIdentifications <span class="sc">==</span> <span class="dv">0</span>,</span>
-<span id="cb251-9"><a href="sec-id.html#cb251-9" tabindex="-1"></a>                                    <span class="cn">NA</span>, countIdentifications)),</span>
-<span id="cb251-10"><a href="sec-id.html#cb251-10" tabindex="-1"></a>                <span class="at">size =</span> <span class="fl">0.75</span>,</span>
-<span id="cb251-11"><a href="sec-id.html#cb251-11" tabindex="-1"></a>                <span class="at">alpha =</span> <span class="fl">0.5</span>) <span class="sc">+</span></span>
-<span id="cb251-12"><a href="sec-id.html#cb251-12" tabindex="-1"></a>     <span class="fu">scale_colour_gradient</span>(<span class="at">low =</span> <span class="st">"orange"</span>, <span class="at">high =</span> <span class="st">"red"</span>) <span class="sc">+</span></span>
-<span id="cb251-13"><a href="sec-id.html#cb251-13" tabindex="-1"></a>     <span class="fu">facet_grid</span>(<span class="fu">sub</span>(<span class="st">"^.+_"</span>, <span class="st">""</span>, <span class="fu">basename</span>(dataOrigin)) <span class="sc">~</span> .) <span class="sc">+</span></span>
-<span id="cb251-14"><a href="sec-id.html#cb251-14" tabindex="-1"></a>     <span class="fu">labs</span>(<span class="at">colour =</span> <span class="st">"Number of ids"</span>)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-68-1.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-</div>
-<div id="exploration-and-assessment-of-identifications-using-msnid" class="section level2" number="4.9">
-<h2>
-<span class="header-section-number">4.9</span> Exploration and Assessment of Identifications using <code>MSnID</code>
-<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('exploration-and-assessment-of-identifications-using-msnid')" onmouseout="reset_tooltip('exploration-and-assessment-of-identifications-using-msnid-tooltip')"><span class="tooltiptext" id="exploration-and-assessment-of-identifications-using-msnid-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>The <code>MSnID</code> package extracts MS/MS ID data from mzIdentML (leveraging
-the <code>mzID</code> package) or text files. After collating the search results
-from multiple datasets it assesses their identification quality and
-optimises filtering criteria to achieve the maximum number of
-identifications while not exceeding a specified false discovery
-rate. It also contains a number of utilities to explore the MS/MS
-results and assess missed and irregular enzymatic cleavages, mass
-measurement accuracy, etc.</p>
-<div id="step-by-step-work-flow" class="section level3" number="4.9.1">
-<h3>
-<span class="header-section-number">4.9.1</span> Step-by-step work-flow<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('step-by-step-work-flow')" onmouseout="reset_tooltip('step-by-step-work-flow-tooltip')"><span class="tooltiptext" id="step-by-step-work-flow-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Let’s reproduce parts of the analysis described the <code>MSnID</code>
-vignette. You can explore more with</p>
-<div class="sourceCode" id="cb252"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb252-1"><a href="sec-id.html#cb252-1" tabindex="-1"></a><span class="fu">vignette</span>(<span class="st">"msnid_vignette"</span>, <span class="at">package =</span> <span class="st">"MSnID"</span>)</span></code></pre></div>
-<p>The <em><a href="https://bioconductor.org/packages/3.17/MSnID">MSnID</a></em> package can be used for post-search filtering
-of MS/MS identifications. One starts with the construction of an
-<code>MSnID</code> object that is populated with identification results that can
-be imported from a <code>data.frame</code> or from <code>mzIdenML</code> files. Here, we
-will use the example identification data provided with the package.</p>
-<div class="sourceCode" id="cb253"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb253-1"><a href="sec-id.html#cb253-1" tabindex="-1"></a>mzids <span class="ot">&lt;-</span> <span class="fu">system.file</span>(<span class="st">"extdata"</span>, <span class="st">"c_elegans.mzid.gz"</span>, <span class="at">package=</span><span class="st">"MSnID"</span>)</span>
-<span id="cb253-2"><a href="sec-id.html#cb253-2" tabindex="-1"></a><span class="fu">basename</span>(mzids)</span></code></pre></div>
-<pre><code>## [1] "c_elegans.mzid.gz"</code></pre>
-<p>We start by loading the package, initialising the <code>MSnID</code> object, and
-add the identification result from our <code>mzid</code> file (there could of
-course be more than one).</p>
-<div class="sourceCode" id="cb255"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb255-1"><a href="sec-id.html#cb255-1" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"MSnID"</span>)</span></code></pre></div>
-<pre><code>## 
-## Attaching package: 'MSnID'</code></pre>
-<pre><code>## The following object is masked from 'package:ProtGenerics':
-## 
-##     peptides</code></pre>
-<div class="sourceCode" id="cb258"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb258-1"><a href="sec-id.html#cb258-1" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">MSnID</span>(<span class="st">"."</span>)</span></code></pre></div>
-<pre><code>## Note, the anticipated/suggested columns in the
-## peptide-to-spectrum matching results are:
-## -----------------------------------------------
-## accession
-## calculatedMassToCharge
-## chargeState
-## experimentalMassToCharge
-## isDecoy
-## peptide
-## spectrumFile
-## spectrumID</code></pre>
-<div class="sourceCode" id="cb260"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb260-1"><a href="sec-id.html#cb260-1" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">read_mzIDs</span>(msnid, mzids)</span></code></pre></div>
-<pre><code>## Loaded cached data</code></pre>
-<div class="sourceCode" id="cb262"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb262-1"><a href="sec-id.html#cb262-1" tabindex="-1"></a><span class="fu">show</span>(msnid)</span></code></pre></div>
-<pre><code>## MSnID object
-## Working directory: "."
-## #Spectrum Files:  1 
-## #PSMs: 12263 at 36 % FDR
-## #peptides: 9489 at 44 % FDR
-## #accessions: 7414 at 76 % FDR</code></pre>
-<p>Printing the <code>MSnID</code> object returns some basic information such as</p>
-<ul>
-<li>Working directory.</li>
-<li>Number of spectrum files used to generate data.</li>
-<li>Number of peptide-to-spectrum matches and corresponding FDR.</li>
-<li>Number of unique peptide sequences and corresponding FDR.</li>
-<li>Number of unique proteins or amino acid sequence accessions and corresponding FDR.</li>
-</ul>
-<p>The package then enables to define, optimise and apply filtering based
-for example on missed cleavages, identification scores, precursor mass
-errors, etc. and assess PSM, peptide and protein FDR levels. To
-properly function, it expects to have access to the following data</p>
-<pre><code>## [1] "accession"                "calculatedMassToCharge"  
-## [3] "chargeState"              "experimentalMassToCharge"
-## [5] "isDecoy"                  "peptide"                 
-## [7] "spectrumFile"             "spectrumID"</code></pre>
-<p>which are indeed present in our data:</p>
-<div class="sourceCode" id="cb265"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb265-1"><a href="sec-id.html#cb265-1" tabindex="-1"></a><span class="fu">names</span>(msnid)</span></code></pre></div>
-<pre><code>##  [1] "spectrumID"                "scan number(s)"           
-##  [3] "acquisitionNum"            "passThreshold"            
-##  [5] "rank"                      "calculatedMassToCharge"   
-##  [7] "experimentalMassToCharge"  "chargeState"              
-##  [9] "MS-GF:DeNovoScore"         "MS-GF:EValue"             
-## [11] "MS-GF:PepQValue"           "MS-GF:QValue"             
-## [13] "MS-GF:RawScore"            "MS-GF:SpecEValue"         
-## [15] "AssumedDissociationMethod" "IsotopeError"             
-## [17] "isDecoy"                   "post"                     
-## [19] "pre"                       "end"                      
-## [21] "start"                     "accession"                
-## [23] "length"                    "description"              
-## [25] "pepSeq"                    "modified"                 
-## [27] "modification"              "idFile"                   
-## [29] "spectrumFile"              "databaseFile"             
-## [31] "peptide"</code></pre>
-<p>Here, we summarise a few steps and redirect the reader to the
-package’s vignette for more details:</p>
-</div>
-<div id="analysis-of-peptide-sequences" class="section level3" number="4.9.2">
-<h3>
-<span class="header-section-number">4.9.2</span> Analysis of peptide sequences<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('analysis-of-peptide-sequences')" onmouseout="reset_tooltip('analysis-of-peptide-sequences-tooltip')"><span class="tooltiptext" id="analysis-of-peptide-sequences-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Cleaning irregular cleavages at the termini of the peptides and
-missing cleavage site within the peptide sequences. The following two
-function calls create the new <code>numMisCleavages</code> and <code>numIrregCleavages</code>
-columns in the <code>MSnID</code> object</p>
-<div class="sourceCode" id="cb267"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb267-1"><a href="sec-id.html#cb267-1" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">assess_termini</span>(msnid, <span class="at">validCleavagePattern=</span><span class="st">"[KR]</span><span class="sc">\\</span><span class="st">.[^P]"</span>)</span>
-<span id="cb267-2"><a href="sec-id.html#cb267-2" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">assess_missed_cleavages</span>(msnid, <span class="at">missedCleavagePattern=</span><span class="st">"[KR](?=[^P$])"</span>)</span></code></pre></div>
-</div>
-<div id="trimming-the-data" class="section level3" number="4.9.3">
-<h3>
-<span class="header-section-number">4.9.3</span> Trimming the data<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('trimming-the-data')" onmouseout="reset_tooltip('trimming-the-data-tooltip')"><span class="tooltiptext" id="trimming-the-data-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Now, we can use the <code>apply_filter</code> function to effectively apply
-filters. The strings passed to the function represent expressions that
-will be evaluated, thus keeping only PSMs that have 0 irregular
-cleavages and 2 or less missed cleavages.</p>
-<div class="sourceCode" id="cb268"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb268-1"><a href="sec-id.html#cb268-1" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">apply_filter</span>(msnid, <span class="st">"numIrregCleavages == 0"</span>)</span>
-<span id="cb268-2"><a href="sec-id.html#cb268-2" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">apply_filter</span>(msnid, <span class="st">"numMissCleavages &lt;= 2"</span>)</span>
-<span id="cb268-3"><a href="sec-id.html#cb268-3" tabindex="-1"></a><span class="fu">show</span>(msnid)</span></code></pre></div>
-<pre><code>## MSnID object
-## Working directory: "."
-## #Spectrum Files:  1 
-## #PSMs: 7838 at 17 % FDR
-## #peptides: 5598 at 23 % FDR
-## #accessions: 3759 at 53 % FDR</code></pre>
-</div>
-<div id="parent-ion-mass-errors" class="section level3" number="4.9.4">
-<h3>
-<span class="header-section-number">4.9.4</span> Parent ion mass errors<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('parent-ion-mass-errors')" onmouseout="reset_tooltip('parent-ion-mass-errors-tooltip')"><span class="tooltiptext" id="parent-ion-mass-errors-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Using <code>"calculatedMassToCharge"</code> and <code>"experimentalMassToCharge"</code>, the
-<code>mass_measurement_error</code> function calculates the parent ion mass
-measurement error in parts per million.</p>
-<div class="sourceCode" id="cb270"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb270-1"><a href="sec-id.html#cb270-1" tabindex="-1"></a><span class="fu">summary</span>(<span class="fu">mass_measurement_error</span>(msnid))</span></code></pre></div>
-<pre><code>##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-## -2184.0640    -0.6992     0.0000    17.6146     0.7512  2012.5178</code></pre>
-<p>We then filter any matches that do not fit the +/- 20 ppm tolerance</p>
-<div class="sourceCode" id="cb272"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb272-1"><a href="sec-id.html#cb272-1" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">apply_filter</span>(msnid, <span class="st">"abs(mass_measurement_error(msnid)) &lt; 20"</span>)</span>
-<span id="cb272-2"><a href="sec-id.html#cb272-2" tabindex="-1"></a><span class="fu">summary</span>(<span class="fu">mass_measurement_error</span>(msnid))</span></code></pre></div>
-<pre><code>##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-## -19.7797  -0.5866   0.0000  -0.2970   0.5713  19.6758</code></pre>
-</div>
-<div id="filtering-criteria" class="section level3" number="4.9.5">
-<h3>
-<span class="header-section-number">4.9.5</span> Filtering criteria<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('filtering-criteria')" onmouseout="reset_tooltip('filtering-criteria-tooltip')"><span class="tooltiptext" id="filtering-criteria-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Filtering of the identification data will rely on</p>
-<ul>
-<li>-log10 transformed MS-GF+ Spectrum E-value, reflecting the goodness
-of match between experimental and theoretical fragmentation patterns</li>
-</ul>
-<div class="sourceCode" id="cb274"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb274-1"><a href="sec-id.html#cb274-1" tabindex="-1"></a>msnid<span class="sc">$</span>msmsScore <span class="ot">&lt;-</span> <span class="sc">-</span><span class="fu">log10</span>(msnid<span class="sc">$</span><span class="st">`</span><span class="at">MS-GF:SpecEValue</span><span class="st">`</span>)</span></code></pre></div>
-<ul>
-<li>the absolute mass measurement error (in ppm units) of the parent ion</li>
-</ul>
-<div class="sourceCode" id="cb275"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb275-1"><a href="sec-id.html#cb275-1" tabindex="-1"></a>msnid<span class="sc">$</span>absParentMassErrorPPM <span class="ot">&lt;-</span> <span class="fu">abs</span>(<span class="fu">mass_measurement_error</span>(msnid))</span></code></pre></div>
-</div>
-<div id="setting-filters" class="section level3" number="4.9.6">
-<h3>
-<span class="header-section-number">4.9.6</span> Setting filters<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('setting-filters')" onmouseout="reset_tooltip('setting-filters-tooltip')"><span class="tooltiptext" id="setting-filters-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>MS2 filters are handled by a special <code>MSnIDFilter</code> class objects, where
-individual filters are set by name (that is present in <code>names(msnid)</code>)
-and comparison operator (&gt;, &lt;, = , …) defining if we should retain
-hits with higher or lower given the threshold and finally the
-threshold value itself.</p>
-<div class="sourceCode" id="cb276"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb276-1"><a href="sec-id.html#cb276-1" tabindex="-1"></a>filtObj <span class="ot">&lt;-</span> <span class="fu">MSnIDFilter</span>(msnid)</span>
-<span id="cb276-2"><a href="sec-id.html#cb276-2" tabindex="-1"></a>filtObj<span class="sc">$</span>absParentMassErrorPPM <span class="ot">&lt;-</span> <span class="fu">list</span>(<span class="at">comparison=</span><span class="st">"&lt;"</span>, <span class="at">threshold=</span><span class="fl">10.0</span>)</span>
-<span id="cb276-3"><a href="sec-id.html#cb276-3" tabindex="-1"></a>filtObj<span class="sc">$</span>msmsScore <span class="ot">&lt;-</span> <span class="fu">list</span>(<span class="at">comparison=</span><span class="st">"&gt;"</span>, <span class="at">threshold=</span><span class="fl">10.0</span>)</span>
-<span id="cb276-4"><a href="sec-id.html#cb276-4" tabindex="-1"></a><span class="fu">show</span>(filtObj)</span></code></pre></div>
-<pre><code>## MSnIDFilter object
-## (absParentMassErrorPPM &lt; 10) &amp; (msmsScore &gt; 10)</code></pre>
-<p>We can then evaluate the filter on the identification data object,
-which returns the false discovery rate and number of retained
-identifications for the filtering criteria at hand.</p>
-<div class="sourceCode" id="cb278"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb278-1"><a href="sec-id.html#cb278-1" tabindex="-1"></a><span class="fu">evaluate_filter</span>(msnid, filtObj)</span></code></pre></div>
-<pre><code>##           fdr    n
-## PSM         0 3807
-## peptide     0 2455
-## accession   0 1009</code></pre>
-</div>
-<div id="filter-optimisation" class="section level3" number="4.9.7">
-<h3>
-<span class="header-section-number">4.9.7</span> Filter optimisation<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('filter-optimisation')" onmouseout="reset_tooltip('filter-optimisation-tooltip')"><span class="tooltiptext" id="filter-optimisation-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Rather than setting filtering values by hand, as shown above, these
-can be set automatically to meet a specific false discovery rate.</p>
-<div class="sourceCode" id="cb280"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb280-1"><a href="sec-id.html#cb280-1" tabindex="-1"></a>filtObj.grid <span class="ot">&lt;-</span> <span class="fu">optimize_filter</span>(filtObj, msnid, <span class="at">fdr.max=</span><span class="fl">0.01</span>,</span>
-<span id="cb280-2"><a href="sec-id.html#cb280-2" tabindex="-1"></a>                                <span class="at">method=</span><span class="st">"Grid"</span>, <span class="at">level=</span><span class="st">"peptide"</span>,</span>
-<span id="cb280-3"><a href="sec-id.html#cb280-3" tabindex="-1"></a>                                <span class="at">n.iter=</span><span class="dv">500</span>)</span>
-<span id="cb280-4"><a href="sec-id.html#cb280-4" tabindex="-1"></a><span class="fu">show</span>(filtObj.grid)</span></code></pre></div>
-<pre><code>## MSnIDFilter object
-## (absParentMassErrorPPM &lt; 3) &amp; (msmsScore &gt; 7.4)</code></pre>
-<div class="sourceCode" id="cb282"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb282-1"><a href="sec-id.html#cb282-1" tabindex="-1"></a><span class="fu">evaluate_filter</span>(msnid, filtObj.grid)</span></code></pre></div>
-<pre><code>##                   fdr    n
-## PSM       0.004097561 5146
-## peptide   0.006447651 3278
-## accession 0.021996616 1208</code></pre>
-<p>Filters can eventually be applied (rather than just evaluated) using
-the <code>apply_filter</code> function.</p>
-<div class="sourceCode" id="cb284"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb284-1"><a href="sec-id.html#cb284-1" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">apply_filter</span>(msnid, filtObj.grid)</span>
-<span id="cb284-2"><a href="sec-id.html#cb284-2" tabindex="-1"></a><span class="fu">show</span>(msnid)</span></code></pre></div>
-<pre><code>## MSnID object
-## Working directory: "."
-## #Spectrum Files:  1 
-## #PSMs: 5146 at 0.41 % FDR
-## #peptides: 3278 at 0.64 % FDR
-## #accessions: 1208 at 2.2 % FDR</code></pre>
-<p>And finally, identifications that matched decoy and contaminant
-protein sequences are removed</p>
-<div class="sourceCode" id="cb286"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb286-1"><a href="sec-id.html#cb286-1" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">apply_filter</span>(msnid, <span class="st">"isDecoy == FALSE"</span>)</span>
-<span id="cb286-2"><a href="sec-id.html#cb286-2" tabindex="-1"></a>msnid <span class="ot">&lt;-</span> <span class="fu">apply_filter</span>(msnid, <span class="st">"!grepl('Contaminant',accession)"</span>)</span>
-<span id="cb286-3"><a href="sec-id.html#cb286-3" tabindex="-1"></a><span class="fu">show</span>(msnid)</span></code></pre></div>
-<pre><code>## MSnID object
-## Working directory: "."
-## #Spectrum Files:  1 
-## #PSMs: 5117 at 0 % FDR
-## #peptides: 3251 at 0 % FDR
-## #accessions: 1179 at 0 % FDR</code></pre>
-</div>
-<div id="export-msnid-data" class="section level3" number="4.9.8">
-<h3>
-<span class="header-section-number">4.9.8</span> Export <code>MSnID</code> data<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('export-msnid-data')" onmouseout="reset_tooltip('export-msnid-data-tooltip')"><span class="tooltiptext" id="export-msnid-data-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>The resulting filtered identification data can be exported to a
-<code>data.frame</code> (or to a dedicated <code>MSnSet</code> data structure from the
-<code>MSnbase</code> package) for quantitative MS data, described below, and
-further processed and analysed using appropriate statistical tests.</p>
-<div class="sourceCode" id="cb288"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb288-1"><a href="sec-id.html#cb288-1" tabindex="-1"></a><span class="fu">head</span>(<span class="fu">psms</span>(msnid))</span></code></pre></div>
-<pre><code>##   spectrumID scan number(s) acquisitionNum passThreshold rank
-## 1 index=7151           8819           7151          TRUE    1
-## 2 index=8520          10419           8520          TRUE    1
-##   calculatedMassToCharge experimentalMassToCharge chargeState MS-GF:DeNovoScore
-## 1               1270.318                 1270.318           3               287
-## 2               1426.737                 1426.739           3               270
-##   MS-GF:EValue MS-GF:PepQValue MS-GF:QValue MS-GF:RawScore MS-GF:SpecEValue
-## 1 1.709082e-24               0            0            239     1.007452e-31
-## 2 3.780745e-24               0            0            230     2.217275e-31
-##   AssumedDissociationMethod IsotopeError isDecoy post pre end start accession
-## 1                       CID            0   FALSE    A   K 283   249   CE02347
-## 2                       CID            0   FALSE    A   K 182   142   CE07055
-##   length
-## 1    393
-## 2    206
-##                                                                                                                           description
-## 1 WBGene00001993; locus:hpd-1; 4-hydroxyphenylpyruvate dioxygenase; status:Confirmed; UniProt:Q22633; protein_id:CAA90315.1; T21C12.2
-## 2           WBGene00001755; locus:gst-7; glutathione S-transferase; status:Confirmed; UniProt:P91253; protein_id:AAB37846.1; F11G11.2
-##                                      pepSeq modified modification
-## 1       AISQIQEYVDYYGGSGVQHIALNTSDIITAIEALR    FALSE         &lt;NA&gt;
-## 2 SAGSGYLVGDSLTFVDLLVAQHTADLLAANAALLDEFPQFK    FALSE         &lt;NA&gt;
-##              idFile                                   spectrumFile
-## 1 c_elegans.mzid.gz c_elegans_A_3_1_21Apr10_Draco_10-03-04_dta.txt
-## 2 c_elegans.mzid.gz c_elegans_A_3_1_21Apr10_Draco_10-03-04_dta.txt
-##               databaseFile                                       peptide
-## 1 ID_004174_E48C5B52.fasta       K.AISQIQEYVDYYGGSGVQHIALNTSDIITAIEALR.A
-## 2 ID_004174_E48C5B52.fasta K.SAGSGYLVGDSLTFVDLLVAQHTADLLAANAALLDEFPQFK.A
-##   numIrregCleavages numMissCleavages msmsScore absParentMassErrorPPM
-## 1                 0                0  30.99678             0.3843772
-## 2                 0                0  30.65418             1.3689451
-##  [ reached 'max' / getOption("max.print") -- omitted 4 rows ]</code></pre>
-
-</div>
-</div>
-</div>
-<div class="footnotes">
-<hr>
-<ol start="5">
-<li id="fn5"><p>Previously named <code>PSM</code>.<a href="sec-id.html#fnref5" class="footnote-back">↩︎</a></p></li>
-<li id="fn6"><p>The rownames aren’t needed here are are removed to reduce
-to output in the the next code chunk display parts of <code>id2</code>.<a href="sec-id.html#fnref6" class="footnote-back">↩︎</a></p></li>
-</ol>
-</div>
-</body></html>
-
-<p style="text-align: center;">
-<a href="sec-raw.html"><button class="btn btn-default">Previous</button></a>
-<a href="sec-quant.html"><button class="btn btn-default">Next</button></a>
-</p>
-<p class="build-date">Page built: 
-2023-09-06
- using 
-R version 4.3.1 Patched (2023-07-10 r84676)
-</p>
-</div>
-</div>
-
-
-
-</body>
-</html>
diff --git a/docs/sec-msintro.html b/docs/sec-msintro.html
deleted file mode 100644
index 84ee364..0000000
--- a/docs/sec-msintro.html
+++ /dev/null
@@ -1,410 +0,0 @@
-<!DOCTYPE html>
-<html lang="" xml:lang="">
-<head>
-
-<meta charset="utf-8" />
-<meta name="generator" content="pandoc" />
-<meta name="viewport" content="width=device-width, initial-scale=1" />
-<meta property="og:title" content="Chapter 2 Introduction | R for Mass Spectrometry" />
-<meta property="og:type" content="book" />
-
-
-
-
-<meta name="author" content="Laurent Gatto, Sebastian Gibb, Johannes Rainer" />
-
-<meta name="date" content="2023-09-06" />
-
-
-<meta name="description" content="Chapter 2 Introduction | R for Mass Spectrometry">
-
-<title>Chapter 2 Introduction | R for Mass Spectrometry</title>
-
-<link href="libs/tufte-css-2015.12.29/tufte.css" rel="stylesheet" />
-<link href="libs/tufte-css-2015.12.29/envisioned.css" rel="stylesheet" />
-<link href="libs/msmb-css-0/msmb.css" rel="stylesheet" />
-<script>
-function toggle_visibility(id1, id2) {
-var e = document.getElementById(id1);
-var f = document.getElementById(id2);
-
-e.style.display = ((e.style.display!='none') ? 'none' : 'block');
-
-if(f.classList.contains('fa-plus-square')) {
-    f.classList.add('fa-minus-square')
-    f.classList.remove('fa-plus-square')
-} else {
-    f.classList.add('fa-plus-square')
-    f.classList.remove('fa-minus-square')
-}
-
-}
-</script>
-<script>
-function copy_link(id) {
-  var dummy = document.createElement('input'),
-  text = window.location.href.split(/[?#]/)[0] + '#' + id;
-  document.body.appendChild(dummy);
-  dummy.value = text;
-  dummy.select();
-  document.execCommand('copy');
-  document.body.removeChild(dummy);
-  
-  var tooltip = document.getElementById(id + '-tooltip');
-  tooltip.innerHTML = 'Copied!';
-}
-
-function reset_tooltip(id) {
-  var tooltip = document.getElementById(id);
-  tooltip.innerHTML = 'Copy link';
-}
-</script>
-
-
-<style type="text/css">code{white-space: pre;}</style>
-<style type="text/css">
-pre > code.sourceCode { white-space: pre; position: relative; }
-pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
-pre > code.sourceCode > span:empty { height: 1.2em; }
-
-code.sourceCode > span { color: inherit; text-decoration: inherit; }
-div.sourceCode { margin: 1em 0; }
-pre.sourceCode { margin: 0; }
-@media screen {
-div.sourceCode { overflow: auto; }
-}
-@media print {
-pre > code.sourceCode { white-space: pre-wrap; }
-pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
-}
-pre.numberSource code
-  { counter-reset: source-line 0; }
-pre.numberSource code > span
-  { position: relative; left: -4em; counter-increment: source-line; }
-pre.numberSource code > span > a:first-child::before
-  { content: counter(source-line);
-    position: relative; left: -1em; text-align: right; vertical-align: baseline;
-    border: none; display: inline-block;
-    -webkit-touch-callout: none; -webkit-user-select: none;
-    -khtml-user-select: none; -moz-user-select: none;
-    -ms-user-select: none; user-select: none;
-    padding: 0 4px; width: 4em;
-    color: #aaaaaa;
-  }
-pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
-div.sourceCode
-  {  background-color: #f8f8f8; }
-@media screen {
-pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
-}
-code span.al { color: #ef2929; } /* Alert */
-code span.an { color: #8f5902; font-weight: bold; font-style: italic; } /* Annotation */
-code span.at { color: #204a87; } /* Attribute */
-code span.bn { color: #0000cf; } /* BaseN */
-code span.cf { color: #204a87; font-weight: bold; } /* ControlFlow */
-code span.ch { color: #4e9a06; } /* Char */
-code span.cn { color: #8f5902; } /* Constant */
-code span.co { color: #8f5902; font-style: italic; } /* Comment */
-code span.cv { color: #8f5902; font-weight: bold; font-style: italic; } /* CommentVar */
-code span.do { color: #8f5902; font-weight: bold; font-style: italic; } /* Documentation */
-code span.dt { color: #204a87; } /* DataType */
-code span.dv { color: #0000cf; } /* DecVal */
-code span.er { color: #a40000; font-weight: bold; } /* Error */
-code span.ex { } /* Extension */
-code span.fl { color: #0000cf; } /* Float */
-code span.fu { color: #204a87; font-weight: bold; } /* Function */
-code span.im { } /* Import */
-code span.in { color: #8f5902; font-weight: bold; font-style: italic; } /* Information */
-code span.kw { color: #204a87; font-weight: bold; } /* Keyword */
-code span.op { color: #ce5c00; font-weight: bold; } /* Operator */
-code span.ot { color: #8f5902; } /* Other */
-code span.pp { color: #8f5902; font-style: italic; } /* Preprocessor */
-code span.sc { color: #ce5c00; font-weight: bold; } /* SpecialChar */
-code span.ss { color: #4e9a06; } /* SpecialString */
-code span.st { color: #4e9a06; } /* String */
-code span.va { color: #000000; } /* Variable */
-code span.vs { color: #4e9a06; } /* VerbatimString */
-code span.wa { color: #8f5902; font-weight: bold; font-style: italic; } /* Warning */
-</style>
-
-
-<style type="text/css">
-/* Used with Pandoc 2.11+ new --citeproc when CSL is used */
-div.csl-bib-body { }
-div.csl-entry {
-  clear: both;
-}
-.hanging div.csl-entry {
-  margin-left:2em;
-  text-indent:-2em;
-}
-div.csl-left-margin {
-  min-width:2em;
-  float:left;
-}
-div.csl-right-inline {
-  margin-left:2em;
-  padding-left:1em;
-}
-div.csl-indent {
-  margin-left: 2em;
-}
-</style>
-
-<link rel="stylesheet" href="style.css" type="text/css" />
-
-</head>
-
-<body>
-
-
-
-<div class="row">
-<div class="col-sm-12">
-<div id="TOC">
-<ul class="navbar">
-<li class="msmb"><p class="title">R for Mass Spectrometry<p><p class="author">Laurent Gatto, Sebastian Gibb, Johannes Rainer</p>
-<li class="dropdown" style="float:right">
-<a href="javascript:void(0)" class="dropbtn">&#x25BE; Chapters</a>
-<div class="dropdown-content">
-<a href="index.html" id="toc-preamble"><span class="toc-section-number">1</span> Preamble</a>
-<a id="active-page" href="sec-msintro.html" id="toc-sec-msintro"><span class="toc-section-number">2</span> Introduction</a><ul class="toc-sections">
-<li class="toc"><a href="#how-does-mass-spectrometry-work"> How does mass spectrometry work?</a></li>
-<li class="toc"><a href="#accessing-data"> Accessing data</a></li>
-</ul>
-<a href="sec-raw.html" id="toc-sec-raw"><span class="toc-section-number">3</span> Raw MS data</a>
-<a href="sec-id.html" id="toc-sec-id"><span class="toc-section-number">4</span> Identification data</a>
-<a href="sec-quant.html" id="toc-sec-quant"><span class="toc-section-number">5</span> Quantitative data</a>
-<a href="sec-anx.html" id="toc-sec-anx"><span class="toc-section-number">6</span> Annex</a>
-<a href="sec-si.html" id="toc-sec-si"><span class="toc-section-number">7</span> Additional materials and session information</a>
-</div>
-</li>
-</ul>
-</div>
-</div>
-</div>
-<div class="row">
-<div class="col-sm-12">
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
-<html><body>
-<div id="sec-msintro" class="section level1" number="2">
-<h1>
-<span class="header-section-number">Chapter 2</span> Introduction</h1>
-<div id="how-does-mass-spectrometry-work" class="section level2" number="2.1">
-<h2>
-<span class="header-section-number">2.1</span> How does mass spectrometry work?<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('how-does-mass-spectrometry-work')" onmouseout="reset_tooltip('how-does-mass-spectrometry-work-tooltip')"><span class="tooltiptext" id="how-does-mass-spectrometry-work-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>Mass spectrometry (MS) is a technology that <em>separates</em> charged
-molecules (ions) based on their mass to charge ratio (M/Z). It is
-often coupled to chromatography (liquid LC, but can also be gas-based
-GC). The time an analyte takes to elute from the chromatography
-column is the <em>retention time</em>.</p>
-<div class="figure" style="text-align: center">
-<span style="display:block;" id="fig:unnamed-chunk-2"></span>
-<p class="caption marginnote shownote">
-Figure 2.1: A chromatogram, illustrating the total amount of analytes over the retention time.
-</p>
-<img src="img/chromatogram.png" alt="A chromatogram, illustrating the total amount of analytes over the retention time." width="100%">
-</div>
-<p>An mass spectrometer is composed of three components:</p>
-<ol style="list-style-type: decimal">
-<li>The <em>source</em>, that ionises the molecules: examples are Matrix-assisted
-laser desorption/ionisation (MALDI) or electrospray ionisation.
-(ESI)</li>
-<li>The <em>analyser</em>, that separates the ions: Time of flight (TOF) or Orbitrap.</li>
-<li>The <em>detector</em> that quantifies the ions.</li>
-</ol>
-<p>When using mass spectrometry for proteomics, the proteins are first
-digested with a protease such as trypsin. In mass shotgun proteomics,
-the analytes assayed in the mass spectrometer are peptides.</p>
-<p>Often, ions are subjected to more than a single MS round. After a
-first round of separation, the peaks in the spectra, called MS1
-spectra, represent peptides. At this stage, the only information we
-possess about these peptides are their retention time and their
-mass-to-charge (we can also infer their charge by inspecting their
-isotopic envelope, i.e the peaks of the individual isotopes, see
-below), which is not enough to infer their identify (i.e. their
-sequence).</p>
-<p>In MSMS (or MS2), the settings of the mass spectrometer are set
-automatically to select a certain number of MS1 peaks (for example
-20)<a href="#fn1" class="footnote-ref" id="fnref1"><sup>1</sup></a>. Once a narrow M/Z range has been
-selected (corresponding to one high-intensity peak, a peptide, and
-some background noise), it is fragmented (using for example
-collision-induced dissociation (CID), higher energy collisional
-dissociation (HCD) or electron-transfer dissociation (ETD)). The
-fragment ions are then themselves separated in the analyser to produce
-a MS2 spectrum. The unique fragment ion pattern can then be used to
-infer the peptide sequence using de novo sequencing (when the spectrum
-is of high enough quality) or using a search engine such as, for
-example Mascot, MSGF+, …, that will match the observed, experimental
-spectrum to theoretical spectra (see details below).</p>
-<div class="figure" style="text-align: center">
-<span style="display:block;" id="fig:unnamed-chunk-3"></span>
-<p class="caption marginnote shownote">
-Figure 2.2: Schematics of a mass spectrometer and two rounds of MS.
-</p>
-<img src="img/SchematicMS2.png" alt="Schematics of a mass spectrometer and two rounds of MS." width="100%">
-</div>
-<p>The animation below show how 25 ions different ions (i.e. having
-different M/Z values) are separated throughout the MS analysis and are
-eventually detected (i.e. quantified). The final frame shows the
-hypothetical spectrum.</p>
-<div class="figure" style="text-align: center">
-<span style="display:block;" id="fig:unnamed-chunk-4"></span>
-<p class="caption marginnote shownote">
-Figure 2.3: Separation and detection of ions in a mass spectrometer.
-</p>
-<img src="img/mstut.gif" alt="Separation and detection of ions in a mass spectrometer." width="100%">
-</div>
-<p>The figures below illustrate the two rounds of MS. The spectrum on the
-left is an MS1 spectrum acquired after 21 minutes and 3 seconds of
-elution. 10 peaks, highlited by dotted vertical lines, were selected
-for MS2 analysis. The peak at M/Z 460.79 (488.8) is highlighted by a
-red (orange) vertical line on the MS1 spectrum and the fragment
-spectra are shown on the MS2 spectrum on the top (bottom) right
-figure.</p>
-<div class="figure" style="text-align: center">
-<span style="display:block;" id="fig:unnamed-chunk-5"></span>
-<p class="caption marginnote shownote">
-Figure 2.4: Parent ions in the MS1 spectrum (left) and two sected fragment ions MS2 spectra (right)
-</p>
-<img src="img/MS1-MS2-spectra.png" alt="Parent ions in the MS1 spectrum (left) and two sected fragment ions MS2 spectra (right)" width="100%">
-</div>
-<p>The figures below represent the 3 dimensions of MS data: a set of
-spectra (M/Z and intensity) of retention time, as well as the
-interleaved nature of MS1 and MS2 (and there could be more levels)
-data.</p>
-<div class="figure" style="text-align: center">
-<span style="display:block;" id="fig:unnamed-chunk-6"></span>
-<p class="caption marginnote shownote">
-Figure 2.5: MS1 spectra over retention time.
-</p>
-<img src="img/F02-3D-MS1-scans-400-1200-lattice.png" alt="MS1 spectra over retention time." width="100%">
-</div>
-<div class="figure" style="text-align: center">
-<span style="display:block;" id="fig:unnamed-chunk-7"></span>
-<p class="caption marginnote shownote">
-Figure 2.6: MS2 spectra interleaved between two MS1 spectra.
-</p>
-<img src="img/F02-3D-MS1-MS2-scans-100-1200-lattice.png" alt="MS2 spectra interleaved between two MS1 spectra." width="100%">
-</div>
-</div>
-<div id="accessing-data" class="section level2" number="2.2">
-<h2>
-<span class="header-section-number">2.2</span> Accessing data<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('accessing-data')" onmouseout="reset_tooltip('accessing-data-tooltip')"><span class="tooltiptext" id="accessing-data-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<div id="from-the-proteomexchange-database" class="section level3 unnumbered">
-<h3>From the ProteomeXchange database<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('from-the-proteomexchange-database')" onmouseout="reset_tooltip('from-the-proteomexchange-database-tooltip')"><span class="tooltiptext" id="from-the-proteomexchange-database-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>MS-based proteomics data is disseminated through the
-<a href="http://www.proteomexchange.org/">ProteomeXchange</a> infrastructure,
-which centrally coordinates submission, storage and dissemination
-through multiple data repositories, such as the
-<a href="https://www.ebi.ac.uk/pride/archive/">PRoteomics IDEntifications (PRIDE)</a>
-database at the EBI for mass spectrometry-based experiments (including
-quantitative data, as opposed as the name suggests),
-<a href="http://www.peptideatlas.org/passel/">PASSEL</a> at the ISB for Selected
-Reaction Monitoring (SRM, i.e. targeted) data and the
-<a href="http://massive.ucsd.edu/ProteoSAFe/static/massive.jsp">MassIVE</a>
-resource. These data can be downloaded within R using the
-<em><a href="https://bioconductor.org/packages/3.17/rpx">rpx</a></em> package.</p>
-<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="sec-msintro.html#cb4-1" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"rpx"</span>)</span></code></pre></div>
-<p>Using the unique <code>PXD000001</code> identifier, we can retrieve the relevant
-metadata that will be stored in a <code>PXDataset</code> object. The names of the
-files available in this data can be retrieved with the <code>pxfiles</code>
-accessor function.</p>
-<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="sec-msintro.html#cb5-1" tabindex="-1"></a>px <span class="ot">&lt;-</span> <span class="fu">PXDataset</span>(<span class="st">"PXD000001"</span>)</span></code></pre></div>
-<pre><code>## Loading PXD000001 from cache.</code></pre>
-<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="sec-msintro.html#cb7-1" tabindex="-1"></a>px</span></code></pre></div>
-<pre><code>## Project PXD000001 with 11 files
-## </code></pre>
-<pre><code>## Resource ID BFC225 in cache in /home/lgatto/.cache/R/rpx.</code></pre>
-<pre><code>##  [1] 'F063721.dat' ... [11] 'erwinia_carotovora.fasta'
-##  Use 'pxfiles(.)' to see all files.</code></pre>
-<div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="sec-msintro.html#cb11-1" tabindex="-1"></a><span class="fu">pxfiles</span>(px)</span></code></pre></div>
-<pre><code>## Project PXD000001 files (11):
-##  [remote] F063721.dat
-##  [local]  F063721.dat-mztab.txt
-##  [remote] PRIDE_Exp_Complete_Ac_22134.xml.gz
-##  [remote] PRIDE_Exp_mzData_Ac_22134.xml.gz
-##  [remote] PXD000001_mztab.txt
-##  [remote] README.txt
-##  [local]  TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML
-##  [remote] TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzXML
-##  [local]  TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.mzXML
-##  [remote] TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.raw
-##  ...</code></pre>
-<p>Other metadata for the <code>px</code> data set:</p>
-<div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb13-1"><a href="sec-msintro.html#cb13-1" tabindex="-1"></a><span class="fu">pxtax</span>(px)</span></code></pre></div>
-<pre><code>## [1] "Erwinia carotovora"</code></pre>
-<div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb15-1"><a href="sec-msintro.html#cb15-1" tabindex="-1"></a><span class="fu">pxurl</span>(px)</span></code></pre></div>
-<pre><code>## [1] "ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2012/03/PXD000001"</code></pre>
-<div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb17-1"><a href="sec-msintro.html#cb17-1" tabindex="-1"></a><span class="fu">pxref</span>(px)</span></code></pre></div>
-<pre><code>## [1] "Gatto L, Christoforou A; Using R and Bioconductor for proteomics data analysis., Biochim Biophys Acta, 2013 May 18, doi:10.1016/j.bbapap.2013.04.032 PMID:23692960"</code></pre>
-<p>Data files can then be downloaded with the <code>pxget</code> function. Below, we
-retrieve the raw data file. The file is
-downloaded<a href="#fn2" class="footnote-ref" id="fnref2"><sup>2</sup></a>
-in the working directory and the name of the file is return by the
-function and stored in the <code>mzf</code> variable for later use <a href="#fn3" class="footnote-ref" id="fnref3"><sup>3</sup></a>.</p>
-<div class="sourceCode" id="cb19"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb19-1"><a href="sec-msintro.html#cb19-1" tabindex="-1"></a>fn <span class="ot">&lt;-</span> <span class="st">"TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML"</span></span>
-<span id="cb19-2"><a href="sec-msintro.html#cb19-2" tabindex="-1"></a>mzf <span class="ot">&lt;-</span> <span class="fu">pxget</span>(px, fn)</span></code></pre></div>
-<pre><code>## Loading TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML from cache.</code></pre>
-<div class="sourceCode" id="cb21"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb21-1"><a href="sec-msintro.html#cb21-1" tabindex="-1"></a>mzf</span></code></pre></div>
-<pre><code>## [1] "/home/lgatto/.cache/R/rpx/8ee512042c5ff_TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML"</code></pre>
-</div>
-<div id="data-packages" class="section level3 unnumbered">
-<h3>Data packages<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('data-packages')" onmouseout="reset_tooltip('data-packages-tooltip')"><span class="tooltiptext" id="data-packages-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Some data are also distributed through dedicated packages. The
-<em><a href="https://bioconductor.org/packages/3.17/msdata">msdata</a></em>, for example, provides some
-general raw data files relevant for both proteomics and
-metabolomics.</p>
-<div class="sourceCode" id="cb23"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb23-1"><a href="sec-msintro.html#cb23-1" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"msdata"</span>)</span>
-<span id="cb23-2"><a href="sec-msintro.html#cb23-2" tabindex="-1"></a><span class="do">## proteomics raw data</span></span>
-<span id="cb23-3"><a href="sec-msintro.html#cb23-3" tabindex="-1"></a><span class="fu">proteomics</span>()</span></code></pre></div>
-<pre><code>## [1] "MRM-standmix-5.mzML.gz"                                                
-## [2] "MS3TMT10_01022016_32917-33481.mzML.gz"                                 
-## [3] "MS3TMT11.mzML"                                                         
-## [4] "TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML.gz"
-## [5] "TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.mzML.gz"</code></pre>
-<div class="sourceCode" id="cb25"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb25-1"><a href="sec-msintro.html#cb25-1" tabindex="-1"></a><span class="do">## proteomics identification data</span></span>
-<span id="cb25-2"><a href="sec-msintro.html#cb25-2" tabindex="-1"></a><span class="fu">ident</span>()</span></code></pre></div>
-<pre><code>## [1] "TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzid"</code></pre>
-<div class="sourceCode" id="cb27"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb27-1"><a href="sec-msintro.html#cb27-1" tabindex="-1"></a><span class="do">## quantitative data</span></span>
-<span id="cb27-2"><a href="sec-msintro.html#cb27-2" tabindex="-1"></a><span class="fu">quant</span>()</span></code></pre></div>
-<pre><code>## [1] "cptac_a_b_peptides.txt"</code></pre>
-<p>More often, such <em>experiment packages</em> distribute processed data; an
-example of such is the <em><a href="https://bioconductor.org/packages/3.17/pRolocdata">pRolocdata</a></em>
-package, that offers quantitative proteomics data.</p>
-
-</div>
-</div>
-</div>
-<div class="footnotes">
-<hr>
-<ol start="1">
-<li id="fn1"><p>Here, we will focus on data dependent acquisition (DDA), where
-MS1 peaks are selected. In data independent acquisition (DIA), all peaks
-in the MS1 spectrum are fragmented.<a href="sec-msintro.html#fnref1" class="footnote-back">↩︎</a></p></li>
-<li id="fn2"><p>If the file is already available, it is not downloaded a second time.<a href="sec-msintro.html#fnref2" class="footnote-back">↩︎</a></p></li>
-<li id="fn3"><p>This and other files are also availabel in the <code>msdata</code> package, described below<a href="sec-msintro.html#fnref3" class="footnote-back">↩︎</a></p></li>
-</ol>
-</div>
-</body></html>
-
-<p style="text-align: center;">
-<a href="index.html"><button class="btn btn-default">Previous</button></a>
-<a href="sec-raw.html"><button class="btn btn-default">Next</button></a>
-</p>
-<p class="build-date">Page built: 
-2023-09-06
- using 
-R version 4.3.1 Patched (2023-07-10 r84676)
-</p>
-</div>
-</div>
-
-
-
-</body>
-</html>
diff --git a/docs/sec-quant.html b/docs/sec-quant.html
deleted file mode 100644
index aae7f09..0000000
--- a/docs/sec-quant.html
+++ /dev/null
@@ -1,1695 +0,0 @@
-<!DOCTYPE html>
-<html lang="" xml:lang="">
-<head>
-
-<meta charset="utf-8" />
-<meta name="generator" content="pandoc" />
-<meta name="viewport" content="width=device-width, initial-scale=1" />
-<meta property="og:title" content="Chapter 5 Quantitative data | R for Mass Spectrometry" />
-<meta property="og:type" content="book" />
-
-
-
-
-<meta name="author" content="Laurent Gatto, Sebastian Gibb, Johannes Rainer" />
-
-<meta name="date" content="2023-09-06" />
-
-
-<meta name="description" content="Chapter 5 Quantitative data | R for Mass Spectrometry">
-
-<title>Chapter 5 Quantitative data | R for Mass Spectrometry</title>
-
-<link href="libs/tufte-css-2015.12.29/tufte.css" rel="stylesheet" />
-<link href="libs/tufte-css-2015.12.29/envisioned.css" rel="stylesheet" />
-<link href="libs/msmb-css-0/msmb.css" rel="stylesheet" />
-<script>
-function toggle_visibility(id1, id2) {
-var e = document.getElementById(id1);
-var f = document.getElementById(id2);
-
-e.style.display = ((e.style.display!='none') ? 'none' : 'block');
-
-if(f.classList.contains('fa-plus-square')) {
-    f.classList.add('fa-minus-square')
-    f.classList.remove('fa-plus-square')
-} else {
-    f.classList.add('fa-plus-square')
-    f.classList.remove('fa-minus-square')
-}
-
-}
-</script>
-<script>
-function copy_link(id) {
-  var dummy = document.createElement('input'),
-  text = window.location.href.split(/[?#]/)[0] + '#' + id;
-  document.body.appendChild(dummy);
-  dummy.value = text;
-  dummy.select();
-  document.execCommand('copy');
-  document.body.removeChild(dummy);
-  
-  var tooltip = document.getElementById(id + '-tooltip');
-  tooltip.innerHTML = 'Copied!';
-}
-
-function reset_tooltip(id) {
-  var tooltip = document.getElementById(id);
-  tooltip.innerHTML = 'Copy link';
-}
-</script>
-
-
-<style type="text/css">code{white-space: pre;}</style>
-<style type="text/css">
-pre > code.sourceCode { white-space: pre; position: relative; }
-pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
-pre > code.sourceCode > span:empty { height: 1.2em; }
-
-code.sourceCode > span { color: inherit; text-decoration: inherit; }
-div.sourceCode { margin: 1em 0; }
-pre.sourceCode { margin: 0; }
-@media screen {
-div.sourceCode { overflow: auto; }
-}
-@media print {
-pre > code.sourceCode { white-space: pre-wrap; }
-pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
-}
-pre.numberSource code
-  { counter-reset: source-line 0; }
-pre.numberSource code > span
-  { position: relative; left: -4em; counter-increment: source-line; }
-pre.numberSource code > span > a:first-child::before
-  { content: counter(source-line);
-    position: relative; left: -1em; text-align: right; vertical-align: baseline;
-    border: none; display: inline-block;
-    -webkit-touch-callout: none; -webkit-user-select: none;
-    -khtml-user-select: none; -moz-user-select: none;
-    -ms-user-select: none; user-select: none;
-    padding: 0 4px; width: 4em;
-    color: #aaaaaa;
-  }
-pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
-div.sourceCode
-  {  background-color: #f8f8f8; }
-@media screen {
-pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
-}
-code span.al { color: #ef2929; } /* Alert */
-code span.an { color: #8f5902; font-weight: bold; font-style: italic; } /* Annotation */
-code span.at { color: #204a87; } /* Attribute */
-code span.bn { color: #0000cf; } /* BaseN */
-code span.cf { color: #204a87; font-weight: bold; } /* ControlFlow */
-code span.ch { color: #4e9a06; } /* Char */
-code span.cn { color: #8f5902; } /* Constant */
-code span.co { color: #8f5902; font-style: italic; } /* Comment */
-code span.cv { color: #8f5902; font-weight: bold; font-style: italic; } /* CommentVar */
-code span.do { color: #8f5902; font-weight: bold; font-style: italic; } /* Documentation */
-code span.dt { color: #204a87; } /* DataType */
-code span.dv { color: #0000cf; } /* DecVal */
-code span.er { color: #a40000; font-weight: bold; } /* Error */
-code span.ex { } /* Extension */
-code span.fl { color: #0000cf; } /* Float */
-code span.fu { color: #204a87; font-weight: bold; } /* Function */
-code span.im { } /* Import */
-code span.in { color: #8f5902; font-weight: bold; font-style: italic; } /* Information */
-code span.kw { color: #204a87; font-weight: bold; } /* Keyword */
-code span.op { color: #ce5c00; font-weight: bold; } /* Operator */
-code span.ot { color: #8f5902; } /* Other */
-code span.pp { color: #8f5902; font-style: italic; } /* Preprocessor */
-code span.sc { color: #ce5c00; font-weight: bold; } /* SpecialChar */
-code span.ss { color: #4e9a06; } /* SpecialString */
-code span.st { color: #4e9a06; } /* String */
-code span.va { color: #000000; } /* Variable */
-code span.vs { color: #4e9a06; } /* VerbatimString */
-code span.wa { color: #8f5902; font-weight: bold; font-style: italic; } /* Warning */
-</style>
-
-
-<style type="text/css">
-/* Used with Pandoc 2.11+ new --citeproc when CSL is used */
-div.csl-bib-body { }
-div.csl-entry {
-  clear: both;
-}
-.hanging div.csl-entry {
-  margin-left:2em;
-  text-indent:-2em;
-}
-div.csl-left-margin {
-  min-width:2em;
-  float:left;
-}
-div.csl-right-inline {
-  margin-left:2em;
-  padding-left:1em;
-}
-div.csl-indent {
-  margin-left: 2em;
-}
-</style>
-
-<link rel="stylesheet" href="style.css" type="text/css" />
-
-</head>
-
-<body>
-
-
-
-<div class="row">
-<div class="col-sm-12">
-<div id="TOC">
-<ul class="navbar">
-<li class="msmb"><p class="title">R for Mass Spectrometry<p><p class="author">Laurent Gatto, Sebastian Gibb, Johannes Rainer</p>
-<li class="dropdown" style="float:right">
-<a href="javascript:void(0)" class="dropbtn">&#x25BE; Chapters</a>
-<div class="dropdown-content">
-<a href="index.html" id="toc-preamble"><span class="toc-section-number">1</span> Preamble</a>
-<a href="sec-msintro.html" id="toc-sec-msintro"><span class="toc-section-number">2</span> Introduction</a>
-<a href="sec-raw.html" id="toc-sec-raw"><span class="toc-section-number">3</span> Raw MS data</a>
-<a href="sec-id.html" id="toc-sec-id"><span class="toc-section-number">4</span> Identification data</a>
-<a id="active-page" href="sec-quant.html" id="toc-sec-quant"><span class="toc-section-number">5</span> Quantitative data</a><ul class="toc-sections">
-<li class="toc"><a href="#quantitation-methodologies"> Quantitation methodologies</a></li>
-<li class="toc"><a href="#sec-qf"> QFeatures</a></li>
-<li class="toc"><a href="#creating-qfeatures-object"> Creating <code>QFeatures</code> object</a></li>
-<li class="toc"><a href="#analysis-pipeline"> Analysis pipeline</a></li>
-<li class="toc"><a href="#summary-exercice"> Summary exercice</a></li>
-</ul>
-<a href="sec-anx.html" id="toc-sec-anx"><span class="toc-section-number">6</span> Annex</a>
-<a href="sec-si.html" id="toc-sec-si"><span class="toc-section-number">7</span> Additional materials and session information</a>
-</div>
-</li>
-</ul>
-</div>
-</div>
-</div>
-<div class="row">
-<div class="col-sm-12">
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
-<html><body>
-<div id="sec-quant" class="section level1" number="5">
-<h1>
-<span class="header-section-number">Chapter 5</span> Quantitative data</h1>
-<div id="quantitation-methodologies" class="section level2" number="5.1">
-<h2>
-<span class="header-section-number">5.1</span> Quantitation methodologies<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('quantitation-methodologies')" onmouseout="reset_tooltip('quantitation-methodologies-tooltip')"><span class="tooltiptext" id="quantitation-methodologies-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>There are a wide range of proteomics quantitation techniques that can
-broadly be classified as labelled vs. label-free, depending on whether
-the features are labelled prior the MS acquisition and the MS level at
-which quantitation is inferred, namely MS1 or MS2.</p>
-<table>
-<thead><tr class="header">
-<th align="left"></th>
-<th align="left">Label-free</th>
-<th align="left">Labelled</th>
-</tr></thead>
-<tbody>
-<tr class="odd">
-<td align="left">MS1</td>
-<td align="left">XIC</td>
-<td align="left">SILAC, 15N</td>
-</tr>
-<tr class="even">
-<td align="left">MS2</td>
-<td align="left">Counting</td>
-<td align="left">iTRAQ, TMT</td>
-</tr>
-</tbody>
-</table>
-<div id="label-free-ms2-spectral-counting" class="section level3" number="5.1.1">
-<h3>
-<span class="header-section-number">5.1.1</span> Label-free MS2: Spectral counting<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('label-free-ms2-spectral-counting')" onmouseout="reset_tooltip('label-free-ms2-spectral-counting-tooltip')"><span class="tooltiptext" id="label-free-ms2-spectral-counting-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>In spectral counting, one simply counts the number of quantified
-peptides that are assigned to a protein.</p>
-<div class="figure">
-<span style="display:block;" id="fig:sc"></span>
-<p class="caption marginnote shownote">
-Figure 5.1: Spectral counting. Figure from the <code>Pbase</code> package.
-</p>
-<img src="img/pbase.png" alt="Spectral counting. Figure from the `Pbase` package." width="75%">
-</div>
-</div>
-<div id="labelled-ms2-isobaric-tagging" class="section level3" number="5.1.2">
-<h3>
-<span class="header-section-number">5.1.2</span> Labelled MS2: Isobaric tagging<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('labelled-ms2-isobaric-tagging')" onmouseout="reset_tooltip('labelled-ms2-isobaric-tagging-tooltip')"><span class="tooltiptext" id="labelled-ms2-isobaric-tagging-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Isobaric tagging refers to the labelling using isobaric tags,
-i.e. chemical tags that have the same mass and hence can’t be
-distinguished by the spectrometer. The peptides of different samples (4,
-6, 10, 11 or 16) are labelled with different tags and combined prior
-to mass spectrometry acquisition. Given that they are isobaric, all
-identical peptides, irrespective of the tag and this the sample of
-origin, are co-analysed, up to fragmentation prior to MS2
-analysis. During fragmentation, the isobaric tags fall of, fragment
-themselves, and result in a set of sample specific peaks. These
-specific peaks can be used to infer sample-specific quantitation,
-while the rest of the MS2 spectrum is used for identification.</p>
-<div class="figure">
-<span style="display:block;" id="fig:itraq"></span>
-<p class="caption marginnote shownote">
-Figure 5.2: iTRAQ 4-plex isobaric tagging. Tandem Mass Tags (TMT) offer up to 16 tags.
-</p>
-<img src="img/itraq.png" alt="iTRAQ 4-plex isobaric tagging. Tandem Mass Tags (TMT) offer up to 16 tags." width="75%">
-</div>
-</div>
-<div id="label-free-ms1-extracted-ion-chromatograms" class="section level3" number="5.1.3">
-<h3>
-<span class="header-section-number">5.1.3</span> Label-free MS1: extracted ion chromatograms<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('label-free-ms1-extracted-ion-chromatograms')" onmouseout="reset_tooltip('label-free-ms1-extracted-ion-chromatograms-tooltip')"><span class="tooltiptext" id="label-free-ms1-extracted-ion-chromatograms-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>In label-free quantitation, the precursor peaks that match an
-identified peptide are integrated over retention time and the area under
-that <em>extracted ion chromatogram</em> is used to quantify that peptide in
-that sample.</p>
-<div class="figure">
-<span style="display:block;" id="fig:lf"></span>
-<p class="caption marginnote shownote">
-Figure 5.3: Label-free quantitation. Figure credit <a href="https://github.com/jorainer/">Johannes Rainer</a>.
-</p>
-<img src="img/chrompeaks.png" alt="Label-free quantitation. Figure credit [Johannes Rainer](https://github.com/jorainer/)." width="75%">
-</div>
-</div>
-<div id="labelled-ms1-silac" class="section level3" number="5.1.4">
-<h3>
-<span class="header-section-number">5.1.4</span> Labelled MS1: SILAC<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('labelled-ms1-silac')" onmouseout="reset_tooltip('labelled-ms1-silac-tooltip')"><span class="tooltiptext" id="labelled-ms1-silac-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>In SILAC quantitation, sample are grown in a medium that contains
-heavy amino acids (typically arginine and lysine). All proteins grown
-in this <em>heavy</em> growth medium contain the heavy form of these amino
-acids. Two samples, one grown in heavy medium, and one grown in normal
-(light) medium are then combined and analysed together. The heavy
-peptides precursor peaks are systematically shifted compared to the
-light ones, and the ratio between the height of a heavy and light
-peaks can be used to calculate peptide and protein fold-changes.</p>
-<div class="figure">
-<span style="display:block;" id="fig:silab"></span>
-<p class="caption marginnote shownote">
-Figure 5.4: Silac quantitation. Figure credit Wikimedia Commons.
-</p>
-<img src="img/Silac.png" alt="Silac quantitation. Figure credit Wikimedia Commons." width="75%">
-</div>
-<p>These different quantitation techniques come with their respective
-benefits and distinct challenges, such as large quantities of raw data
-processing, data transformation and normalisation, missing values, and
-different underlying statistical models for the quantitative data
-(count data for spectral counting, continuous data for the others).</p>
-<p>In terms of raw data quantitation in R/Bioconductor, most efforts have
-been devoted to MS2-level quantitation. Label-free XIC quantitation
-has been addressed in the frame of metabolomics data processing by the
-<em><a href="https://bioconductor.org/packages/3.17/xcms">xcms</a></em> infrastructure.</p>
-<!-- Below is a list of suggested packages for some common proteomics -->
-<!-- quantitation technologies: -->
-<!-- * Isobaric tagging (iTRAQ and TMT): *[MSnbase](https://bioconductor.org/packages/3.17/MSnbase)* and *[isobar](https://bioconductor.org/packages/3.17/isobar)*. -->
-<!-- * Label-free: *[xcms](https://bioconductor.org/packages/3.17/xcms)* (metabolomics). -->
-<!-- * Counting: *[MSnbase](https://bioconductor.org/packages/3.17/MSnbase)* and *[MSnID](https://bioconductor.org/packages/3.17/MSnID)* for -->
-<!--   peptide-spectrum matching confidence assessment. -->
-<!-- * *[N14N15](https://github.com/vladpetyuk/N14N15)* for heavy Nitrogen-labelled data. -->
-</div>
-</div>
-<div id="sec-qf" class="section level2" number="5.2">
-<h2>
-<span class="header-section-number">5.2</span> QFeatures<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('sec-qf')" onmouseout="reset_tooltip('sec-qf-tooltip')"><span class="tooltiptext" id="sec-qf-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>Mass spectrometry-based quantitative proteomics data can be
-represented as a matrix of quantitative values for features (PSMs,
-peptides, proteins) arranged along the rows, measured for a set of
-samples, arranged along the columns. There is a common representation
-for such quantitative data set, namely the <code>SummarizedExperiment</code>
-<span class="citation">(<label for="tufte-mn-8" class="margin-toggle">⊕</label><input type="checkbox" id="tufte-mn-8" class="margin-toggle">Morgan et al. 2020<span class="marginnote">Morgan, Martin, Valerie Obenchain, Jim Hester, and Hervé Pagès. 2020. <em>SummarizedExperiment: SummarizedExperiment Container</em>. <a href="https://bioconductor.org/packages/SummarizedExperiment">https://bioconductor.org/packages/SummarizedExperiment</a>.</span>)</span> class:</p>
-<div class="figure" style="text-align: center">
-<span style="display:block;" id="fig:sefig"></span>
-<p class="caption marginnote shownote">
-Figure 5.5: Schematic representation of the anatomy of a <code>SummarizedExperiment</code> object. (Figure taken from the <code>SummarizedExperiment</code> package vignette.)
-</p>
-<img src="img/SE.png" alt="Schematic representation of the anatomy of a `SummarizedExperiment` object. (Figure taken from the `SummarizedExperiment` package vignette.)" width="100%">
-</div>
-<ul>
-<li>The sample (columns) metadata can be accessed with the <code>colData()</code>
-function.</li>
-<li>The features (rows) metadata can be accessed with the <code>rowData()</code>
-column.</li>
-<li>If the features represent ranges along genomic coordinates, these
-can be accessed with <code>rowRanges()</code>
-</li>
-<li>Additional metadata describing the overall experiment can be
-accessed with <code>metadata()</code>.</li>
-<li>The quantitative data can be accessed with <code>assay()</code>.</li>
-<li>
-<code>assays()</code> returns a list of matrix-like assays.</li>
-</ul>
-<div id="the-qfeatures-class" class="section level3" number="5.2.1">
-<h3>
-<span class="header-section-number">5.2.1</span> The QFeatures class<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('the-qfeatures-class')" onmouseout="reset_tooltip('the-qfeatures-class-tooltip')"><span class="tooltiptext" id="the-qfeatures-class-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>While mass spectrometers acquire data for spectra/peptides, the
-biological entity of interest are the protein. As part of the data
-processing, we are thus required to <strong>aggregate</strong> low-level
-quantitative features into higher level data.</p>
-<div class="figure">
-<span style="display:block;" id="fig:featuresplot"></span>
-<p class="caption marginnote shownote">
-Figure 5.6: Conceptual representation of a <code>QFeatures</code> object and the aggregative relation between different assays.
-</p>
-<img src="R4MS_files/figure-html/featuresplot-1.png" alt="Conceptual representation of a `QFeatures` object and the aggregative relation between different assays." width="672">
-</div>
-<p>We are going to start to familiarise ourselves with the <code>QFeatures</code>
-class implemented in the
-<a href="https://rformassspectrometry.github.io/QFeatures/articles/QFeatures.html"><code>QFeatures</code></a>
-package. The class is derived from the Bioconductor
-<code>MultiAssayExperiment</code> <span class="citation">(<label for="tufte-mn-9" class="margin-toggle">⊕</label><input type="checkbox" id="tufte-mn-9" class="margin-toggle">Ramos et al. 2017<span class="marginnote">Ramos, Marcel, Lucas Schiffer, Angela Re, Rimsha Azhar, Azfar Basunia, Carmen Rodriguez Cabrera, Tiffany Chan, et al. 2017. <span>“Software for the Integration of Multi-Omics Experiments in Bioconductor.”</span> <em>Cancer Research</em> 77(21); e39-42.</span>)</span> (MAE) class. Let’s start by loading the
-<code>QFeatures</code> package.</p>
-<div class="sourceCode" id="cb290"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb290-1"><a href="sec-quant.html#cb290-1" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"QFeatures"</span>)</span></code></pre></div>
-<p>Next, we load the <code>feat1</code> test data, which is composed of single
-<em>assay</em> of class <code>SummarizedExperiment</code> composed of 10 rows and 2
-columns.</p>
-<div class="sourceCode" id="cb291"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb291-1"><a href="sec-quant.html#cb291-1" tabindex="-1"></a><span class="fu">data</span>(feat1)</span>
-<span id="cb291-2"><a href="sec-quant.html#cb291-2" tabindex="-1"></a>feat1</span></code></pre></div>
-<pre><code>## An instance of class QFeatures containing 1 assays:
-##  [1] psms: SummarizedExperiment with 10 rows and 2 columns</code></pre>
-<p>Let’s perform some simple operations to familiarise ourselves with the
-<code>QFeatures</code> class:</p>
-<ul>
-<li>Extract the sample metadata using the <code>colData()</code> accessor (like you
-have previously done with <code>SummarizedExperiment</code> objects).</li>
-</ul>
-<div class="sourceCode" id="cb293"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb293-1"><a href="sec-quant.html#cb293-1" tabindex="-1"></a><span class="fu">colData</span>(feat1)</span></code></pre></div>
-<pre><code>## DataFrame with 2 rows and 1 column
-##        Group
-##    &lt;integer&gt;
-## S1         1
-## S2         2</code></pre>
-<p>We can also further annotate the experiment by adding columns to the <code>colData</code> slot:</p>
-<div class="sourceCode" id="cb295"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb295-1"><a href="sec-quant.html#cb295-1" tabindex="-1"></a><span class="fu">colData</span>(feat1)<span class="sc">$</span>X <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="st">"X1"</span>, <span class="st">"X2"</span>)</span>
-<span id="cb295-2"><a href="sec-quant.html#cb295-2" tabindex="-1"></a>feat1<span class="sc">$</span>Y <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="st">"Y1"</span>, <span class="st">"Y2"</span>)</span>
-<span id="cb295-3"><a href="sec-quant.html#cb295-3" tabindex="-1"></a><span class="fu">colData</span>(feat1)</span></code></pre></div>
-<pre><code>## DataFrame with 2 rows and 3 columns
-##        Group           X           Y
-##    &lt;integer&gt; &lt;character&gt; &lt;character&gt;
-## S1         1          X1          Y1
-## S2         2          X2          Y2</code></pre>
-<ul>
-<li>Extract the first (and only) assay composing this <code>QFeatures</code> data
-using the <code>[[</code> operator (as you have done to extract elements of a
-list) by using the assay’s index or name.</li>
-</ul>
-<div class="sourceCode" id="cb297"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb297-1"><a href="sec-quant.html#cb297-1" tabindex="-1"></a>feat1[[<span class="dv">1</span>]]</span></code></pre></div>
-<pre><code>## class: SummarizedExperiment 
-## dim: 10 2 
-## metadata(0):
-## assays(1): ''
-## rownames(10): PSM1 PSM2 ... PSM9 PSM10
-## rowData names(5): Sequence Protein Var location pval
-## colnames(2): S1 S2
-## colData names(0):</code></pre>
-<div class="sourceCode" id="cb299"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb299-1"><a href="sec-quant.html#cb299-1" tabindex="-1"></a>feat1[[<span class="st">"psms"</span>]]</span></code></pre></div>
-<pre><code>## class: SummarizedExperiment 
-## dim: 10 2 
-## metadata(0):
-## assays(1): ''
-## rownames(10): PSM1 PSM2 ... PSM9 PSM10
-## rowData names(5): Sequence Protein Var location pval
-## colnames(2): S1 S2
-## colData names(0):</code></pre>
-<ul>
-<li>Extract the <code>psms</code> assay’s row data and quantitative values.</li>
-</ul>
-<div class="sourceCode" id="cb301"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb301-1"><a href="sec-quant.html#cb301-1" tabindex="-1"></a><span class="fu">assay</span>(feat1[[<span class="dv">1</span>]])</span></code></pre></div>
-<pre><code>##       S1 S2
-## PSM1   1 11
-## PSM2   2 12
-## PSM3   3 13
-## PSM4   4 14
-## PSM5   5 15
-## PSM6   6 16
-## PSM7   7 17
-## PSM8   8 18
-## PSM9   9 19
-## PSM10 10 20</code></pre>
-<div class="sourceCode" id="cb303"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb303-1"><a href="sec-quant.html#cb303-1" tabindex="-1"></a><span class="fu">rowData</span>(feat1[[<span class="dv">1</span>]])</span></code></pre></div>
-<pre><code>## DataFrame with 10 rows and 5 columns
-##            Sequence     Protein       Var      location      pval
-##         &lt;character&gt; &lt;character&gt; &lt;integer&gt;   &lt;character&gt; &lt;numeric&gt;
-## PSM1       SYGFNAAR       ProtA         1 Mitochondr...     0.084
-## PSM2       SYGFNAAR       ProtA         2 Mitochondr...     0.077
-## PSM3       SYGFNAAR       ProtA         3 Mitochondr...     0.063
-## PSM4       ELGNDAYK       ProtA         4 Mitochondr...     0.073
-## PSM5       ELGNDAYK       ProtA         5 Mitochondr...     0.012
-## PSM6       ELGNDAYK       ProtA         6 Mitochondr...     0.011
-## PSM7  IAEESNFPFI...       ProtB         7       unknown     0.075
-## PSM8  IAEESNFPFI...       ProtB         8       unknown     0.038
-## PSM9  IAEESNFPFI...       ProtB         9       unknown     0.028
-## PSM10 IAEESNFPFI...       ProtB        10       unknown     0.097</code></pre>
-</div>
-<div id="feature-aggregation" class="section level3" number="5.2.2">
-<h3>
-<span class="header-section-number">5.2.2</span> Feature aggregation<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('feature-aggregation')" onmouseout="reset_tooltip('feature-aggregation-tooltip')"><span class="tooltiptext" id="feature-aggregation-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>The central functionality of the <code>QFeatures</code> infrastructure is the
-aggregation of features into higher-level features while retaining the
-link between the different levels. This can be done with the
-<a href="https://rformassspectrometry.github.io/QFeatures/reference/QFeatures-aggregate.html"><code>aggregateFeatures()</code> function</a>.</p>
-<p>The call below will</p>
-<ul>
-<li>operate on the <code>psms</code> assay of the <code>feat1</code> objects;</li>
-<li>aggregate the rows of the assay following the grouping defined in the
-<code>peptides</code> row data variables;</li>
-<li>perform aggregation using the <code>colMeans()</code> function;</li>
-<li>create a new assay named <code>peptides</code> and add it to the <code>feat1</code>
-object.</li>
-</ul>
-<div class="sourceCode" id="cb305"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb305-1"><a href="sec-quant.html#cb305-1" tabindex="-1"></a>feat1 <span class="ot">&lt;-</span> <span class="fu">aggregateFeatures</span>(feat1, <span class="at">i =</span> <span class="st">"psms"</span>,</span>
-<span id="cb305-2"><a href="sec-quant.html#cb305-2" tabindex="-1"></a>                           <span class="at">fcol =</span> <span class="st">"Sequence"</span>,</span>
-<span id="cb305-3"><a href="sec-quant.html#cb305-3" tabindex="-1"></a>                           <span class="at">name =</span> <span class="st">"peptides"</span>,</span>
-<span id="cb305-4"><a href="sec-quant.html#cb305-4" tabindex="-1"></a>                           <span class="at">fun =</span> colMeans)</span>
-<span id="cb305-5"><a href="sec-quant.html#cb305-5" tabindex="-1"></a>feat1</span></code></pre></div>
-<pre><code>## An instance of class QFeatures containing 2 assays:
-##  [1] psms: SummarizedExperiment with 10 rows and 2 columns 
-##  [2] peptides: SummarizedExperiment with 3 rows and 2 columns</code></pre>
-<ul>
-<li>Let’s convince ourselves that we understand the effect of feature
-aggregation and repeat the calculations manually and check the
-content of the new assay’s row data.</li>
-</ul>
-<div class="sourceCode" id="cb307"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb307-1"><a href="sec-quant.html#cb307-1" tabindex="-1"></a><span class="do">## SYGFNAAR</span></span>
-<span id="cb307-2"><a href="sec-quant.html#cb307-2" tabindex="-1"></a><span class="fu">colMeans</span>(<span class="fu">assay</span>(feat1[[<span class="dv">1</span>]])[<span class="dv">1</span><span class="sc">:</span><span class="dv">3</span>, ])</span></code></pre></div>
-<pre><code>## S1 S2 
-##  2 12</code></pre>
-<div class="sourceCode" id="cb309"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb309-1"><a href="sec-quant.html#cb309-1" tabindex="-1"></a><span class="fu">assay</span>(feat1[[<span class="dv">2</span>]])[<span class="st">"SYGFNAAR"</span>, ]</span></code></pre></div>
-<pre><code>## S1 S2 
-##  2 12</code></pre>
-<div class="sourceCode" id="cb311"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb311-1"><a href="sec-quant.html#cb311-1" tabindex="-1"></a><span class="do">## ELGNDAYK</span></span>
-<span id="cb311-2"><a href="sec-quant.html#cb311-2" tabindex="-1"></a><span class="fu">colMeans</span>(<span class="fu">assay</span>(feat1[[<span class="dv">1</span>]])[<span class="dv">4</span><span class="sc">:</span><span class="dv">6</span>, ])</span></code></pre></div>
-<pre><code>## S1 S2 
-##  5 15</code></pre>
-<div class="sourceCode" id="cb313"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb313-1"><a href="sec-quant.html#cb313-1" tabindex="-1"></a><span class="fu">assay</span>(feat1[[<span class="dv">2</span>]])[<span class="st">"ELGNDAYK"</span>, ]</span></code></pre></div>
-<pre><code>## S1 S2 
-##  5 15</code></pre>
-<div class="sourceCode" id="cb315"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb315-1"><a href="sec-quant.html#cb315-1" tabindex="-1"></a><span class="do">## IAEESNFPFIK</span></span>
-<span id="cb315-2"><a href="sec-quant.html#cb315-2" tabindex="-1"></a><span class="fu">colMeans</span>(<span class="fu">assay</span>(feat1[[<span class="dv">1</span>]])[<span class="dv">7</span><span class="sc">:</span><span class="dv">10</span>, ])</span></code></pre></div>
-<pre><code>##   S1   S2 
-##  8.5 18.5</code></pre>
-<div class="sourceCode" id="cb317"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb317-1"><a href="sec-quant.html#cb317-1" tabindex="-1"></a><span class="fu">assay</span>(feat1[[<span class="dv">2</span>]])[<span class="st">"IAEESNFPFIK"</span>, ]</span></code></pre></div>
-<pre><code>##   S1   S2 
-##  8.5 18.5</code></pre>
-<div class="sourceCode" id="cb319"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb319-1"><a href="sec-quant.html#cb319-1" tabindex="-1"></a><span class="fu">rowData</span>(feat1[[<span class="dv">2</span>]])</span></code></pre></div>
-<pre><code>## DataFrame with 3 rows and 4 columns
-##                  Sequence     Protein      location        .n
-##               &lt;character&gt; &lt;character&gt;   &lt;character&gt; &lt;integer&gt;
-## ELGNDAYK         ELGNDAYK       ProtA Mitochondr...         3
-## IAEESNFPFIK IAEESNFPFI...       ProtB       unknown         4
-## SYGFNAAR         SYGFNAAR       ProtA Mitochondr...         3</code></pre>
-<p>We can now aggregate the peptide-level data into a new protein-level
-assay using the <code>colMedians()</code> aggregation function.</p>
-<div class="sourceCode" id="cb321"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb321-1"><a href="sec-quant.html#cb321-1" tabindex="-1"></a>feat1 <span class="ot">&lt;-</span> <span class="fu">aggregateFeatures</span>(feat1, <span class="at">i =</span> <span class="st">"peptides"</span>,</span>
-<span id="cb321-2"><a href="sec-quant.html#cb321-2" tabindex="-1"></a>                           <span class="at">fcol =</span> <span class="st">"Protein"</span>,</span>
-<span id="cb321-3"><a href="sec-quant.html#cb321-3" tabindex="-1"></a>                           <span class="at">name =</span> <span class="st">"proteins"</span>,</span>
-<span id="cb321-4"><a href="sec-quant.html#cb321-4" tabindex="-1"></a>                           <span class="at">fun =</span> colMedians)</span>
-<span id="cb321-5"><a href="sec-quant.html#cb321-5" tabindex="-1"></a>feat1</span></code></pre></div>
-<pre><code>## An instance of class QFeatures containing 3 assays:
-##  [1] psms: SummarizedExperiment with 10 rows and 2 columns 
-##  [2] peptides: SummarizedExperiment with 3 rows and 2 columns 
-##  [3] proteins: SummarizedExperiment with 2 rows and 2 columns</code></pre>
-<div class="sourceCode" id="cb323"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb323-1"><a href="sec-quant.html#cb323-1" tabindex="-1"></a><span class="fu">assay</span>(feat1[[<span class="st">"proteins"</span>]])</span></code></pre></div>
-<pre><code>##        S1   S2
-## ProtA 3.5 13.5
-## ProtB 8.5 18.5</code></pre>
-</div>
-<div id="subsetting-and-filtering" class="section level3" number="5.2.3">
-<h3>
-<span class="header-section-number">5.2.3</span> Subsetting and filtering<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('subsetting-and-filtering')" onmouseout="reset_tooltip('subsetting-and-filtering-tooltip')"><span class="tooltiptext" id="subsetting-and-filtering-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>The link between the assays becomes apparent when we now subset the
-assays for protein A as shown below or using the <code>subsetByFeature()</code>
-function. This creates a new instance of class <code>QFeatures</code> containing
-assays with the expression data for protein, its peptides and their
-PSMs.</p>
-<div class="sourceCode" id="cb325"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb325-1"><a href="sec-quant.html#cb325-1" tabindex="-1"></a>feat1[<span class="st">"ProtA"</span>, , ]</span></code></pre></div>
-<pre><code>## An instance of class QFeatures containing 3 assays:
-##  [1] psms: SummarizedExperiment with 6 rows and 2 columns 
-##  [2] peptides: SummarizedExperiment with 2 rows and 2 columns 
-##  [3] proteins: SummarizedExperiment with 1 rows and 2 columns</code></pre>
-<p>The <code>filterFeatures()</code> function can be used to filter rows the assays
-composing a <code>QFeatures</code> object using the row data variables. We can
-for example retain rows that have a <code>pval</code> &lt; 0.05, which would only
-keep rows in the <code>psms</code> assay because the <code>pval</code> is only relevant for
-that assay.</p>
-<div class="sourceCode" id="cb327"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb327-1"><a href="sec-quant.html#cb327-1" tabindex="-1"></a><span class="fu">filterFeatures</span>(feat1, <span class="sc">~</span> pval <span class="sc">&lt;</span> <span class="fl">0.05</span>)</span></code></pre></div>
-<pre><code>## 'pval' found in 1 out of 3 assay(s)
-## No filter applied to the following assay(s) because one or more filtering variables are missing in the rowData: peptides, proteins.
-## You can control whether to remove or keep the features using the 'keep' argument (see '?filterFeature').</code></pre>
-<pre><code>## An instance of class QFeatures containing 3 assays:
-##  [1] psms: SummarizedExperiment with 4 rows and 2 columns 
-##  [2] peptides: SummarizedExperiment with 0 rows and 2 columns 
-##  [3] proteins: SummarizedExperiment with 0 rows and 2 columns</code></pre>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>As the message above implies, it is also possible to apply a filter to
-only the assays that have a filtering variables by setting the <code>keep</code>
-variables.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-25" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-25', 'sol-start-25')"></span>
-</p>
-<div id="sol-body-25" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb330"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb330-1"><a href="sec-quant.html#cb330-1" tabindex="-1"></a><span class="fu">filterFeatures</span>(feat1, <span class="sc">~</span> pval <span class="sc">&lt;</span> <span class="fl">0.05</span>, <span class="at">keep =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
-<pre><code>## 'pval' found in 1 out of 3 assay(s)
-## No filter applied to the following assay(s) because one or more filtering variables are missing in the rowData: peptides, proteins.
-## You can control whether to remove or keep the features using the 'keep' argument (see '?filterFeature').</code></pre>
-<pre><code>## An instance of class QFeatures containing 3 assays:
-##  [1] psms: SummarizedExperiment with 4 rows and 2 columns 
-##  [2] peptides: SummarizedExperiment with 3 rows and 2 columns 
-##  [3] proteins: SummarizedExperiment with 2 rows and 2 columns</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<p>On the other hand, if we filter assay rows for those that localise to
-the mitochondrion, we retain the relevant protein, peptides and PSMs.</p>
-<div class="sourceCode" id="cb333"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb333-1"><a href="sec-quant.html#cb333-1" tabindex="-1"></a><span class="fu">filterFeatures</span>(feat1, <span class="sc">~</span> location <span class="sc">==</span> <span class="st">"Mitochondrion"</span>)</span></code></pre></div>
-<pre><code>## 'location' found in 3 out of 3 assay(s)</code></pre>
-<pre><code>## An instance of class QFeatures containing 3 assays:
-##  [1] psms: SummarizedExperiment with 6 rows and 2 columns 
-##  [2] peptides: SummarizedExperiment with 2 rows and 2 columns 
-##  [3] proteins: SummarizedExperiment with 1 rows and 2 columns</code></pre>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>As an exercise, let’s filter rows that do not localise to the
-mitochondrion.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-26" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-26', 'sol-start-26')"></span>
-</p>
-<div id="sol-body-26" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb336"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb336-1"><a href="sec-quant.html#cb336-1" tabindex="-1"></a><span class="fu">filterFeatures</span>(feat1, <span class="sc">~</span> location <span class="sc">!=</span> <span class="st">"Mitochondrion"</span>)</span></code></pre></div>
-<pre><code>## 'location' found in 3 out of 3 assay(s)</code></pre>
-<pre><code>## An instance of class QFeatures containing 3 assays:
-##  [1] psms: SummarizedExperiment with 4 rows and 2 columns 
-##  [2] peptides: SummarizedExperiment with 1 rows and 2 columns 
-##  [3] proteins: SummarizedExperiment with 1 rows and 2 columns</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<p>You can refer to the <a href="https://rformassspectrometry.github.io/QFeatures/articles/QFeatures.html"><em>Quantitative features for mass spectrometry
-data</em></a>
-vignette and the <code>QFeatures</code> <a href="https://rformassspectrometry.github.io/QFeatures/reference/QFeatures-class.html">manual
-page</a>
-for more details about the class.</p>
-</div>
-</div>
-<div id="creating-qfeatures-object" class="section level2" number="5.3">
-<h2>
-<span class="header-section-number">5.3</span> Creating <code>QFeatures</code> object<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('creating-qfeatures-object')" onmouseout="reset_tooltip('creating-qfeatures-object-tooltip')"><span class="tooltiptext" id="creating-qfeatures-object-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>While <code>QFeatures</code> objects can be created manually (see <code>?QFeatures</code>
-for details), most users have a quantitative data in a spreadsheet or
-a data.frame. In such cases, the easiest is to use the <code>readQFeatures</code>
-function to extract the quantitative data and metadata columns. Below,
-we load the <code>hlpsms</code> dataframe that contains data for 28
-PSMs from the TMT-10plex <em>hyper</em>LOPIT spatial proteomics experiment
-from <span class="citation">(<label for="tufte-mn-10" class="margin-toggle">⊕</label><input type="checkbox" id="tufte-mn-10" class="margin-toggle">Christoforou et al. 2016<span class="marginnote">Christoforou, Andy, Claire M Mulvey, Lisa M Breckels, Aikaterini Geladaki, Tracey Hurrell, Penelope C Hayward, Thomas Naake, et al. 2016. <span>“A Draft Map of the Mouse Pluripotent Stem Cell Spatial Proteome.”</span> <em>Nat Commun</em> 7: 8992. <a href="https://doi.org/10.1038/ncomms9992">https://doi.org/10.1038/ncomms9992</a>.</span>)</span>. The <code>ecol</code> argument specifies that columns
-1 to 10 contain quantitation data, and that the assay should be named
-<code>psms</code> in the returned <code>QFeatures</code> object, to reflect the nature of
-the data.</p>
-<div class="sourceCode" id="cb339"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb339-1"><a href="sec-quant.html#cb339-1" tabindex="-1"></a><span class="fu">data</span>(hlpsms)</span>
-<span id="cb339-2"><a href="sec-quant.html#cb339-2" tabindex="-1"></a>hl <span class="ot">&lt;-</span> <span class="fu">readQFeatures</span>(hlpsms, <span class="at">ecol =</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">10</span>, <span class="at">name =</span> <span class="st">"psms"</span>)</span>
-<span id="cb339-3"><a href="sec-quant.html#cb339-3" tabindex="-1"></a>hl</span></code></pre></div>
-<pre><code>## An instance of class QFeatures containing 1 assays:
-##  [1] psms: SummarizedExperiment with 3010 rows and 10 columns</code></pre>
-<p>Below, we see that we can extract an assay using its index or its
-name. The individual assays are stored as <em>SummarizedExperiment</em>
-object and further access its quantitative data and metadata using
-the <code>assay</code> and <code>rowData</code> functions.</p>
-<div class="sourceCode" id="cb341"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb341-1"><a href="sec-quant.html#cb341-1" tabindex="-1"></a>hl[[<span class="dv">1</span>]]</span></code></pre></div>
-<pre><code>## class: SummarizedExperiment 
-## dim: 3010 10 
-## metadata(0):
-## assays(1): ''
-## rownames(3010): 1 2 ... 3009 3010
-## rowData names(18): Sequence ProteinDescriptions ... RTmin markers
-## colnames(10): X126 X127C ... X130N X131
-## colData names(0):</code></pre>
-<div class="sourceCode" id="cb343"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb343-1"><a href="sec-quant.html#cb343-1" tabindex="-1"></a>hl[[<span class="st">"psms"</span>]]</span></code></pre></div>
-<pre><code>## class: SummarizedExperiment 
-## dim: 3010 10 
-## metadata(0):
-## assays(1): ''
-## rownames(3010): 1 2 ... 3009 3010
-## rowData names(18): Sequence ProteinDescriptions ... RTmin markers
-## colnames(10): X126 X127C ... X130N X131
-## colData names(0):</code></pre>
-<div class="sourceCode" id="cb345"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb345-1"><a href="sec-quant.html#cb345-1" tabindex="-1"></a><span class="fu">head</span>(<span class="fu">assay</span>(hl[[<span class="st">"psms"</span>]]))</span></code></pre></div>
-<pre><code>##         X126      X127C       X127N      X128C       X128N      X129C
-## 1 0.12283431 0.08045915 0.070804055 0.09386901 0.051815695 0.13034383
-## 2 0.35268185 0.14162381 0.167523880 0.07843497 0.071087436 0.03214548
-## 3 0.01546089 0.16142297 0.086938133 0.23120844 0.114664348 0.09610188
-## 4 0.04702854 0.09288723 0.102012167 0.11125409 0.067969116 0.14155358
-## 5 0.01044693 0.15866147 0.167315736 0.21017494 0.147946673 0.07088253
-## 6 0.04955362 0.01215244 0.002477681 0.01297833 0.002988949 0.06253195
-##        X129N       X130C      X130N       X131
-## 1 0.17540095 0.040068658 0.11478839 0.11961594
-## 2 0.06686260 0.031961793 0.02810434 0.02957384
-## 3 0.15977819 0.010127118 0.08059400 0.04370403
-## 4 0.18015910 0.035329902 0.12166589 0.10014038
-## 5 0.17555789 0.007088253 0.02884754 0.02307803
-## 6 0.01726511 0.172651119 0.37007905 0.29732174</code></pre>
-<div class="sourceCode" id="cb347"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb347-1"><a href="sec-quant.html#cb347-1" tabindex="-1"></a><span class="fu">head</span>(<span class="fu">rowData</span>(hl[[<span class="st">"psms"</span>]]))</span></code></pre></div>
-<pre><code>## DataFrame with 6 rows and 18 columns
-##      Sequence ProteinDescriptions NbProteins ProteinGroupAccessions
-##   &lt;character&gt;         &lt;character&gt;  &lt;integer&gt;            &lt;character&gt;
-## 1     SQGEIDk       Tetratrico...          1                 Q8BYY4
-## 2     YEAQGDk       Vacuolar p...          1                 P46467
-## 3     TTScDTk       C-type man...          1                 Q64449
-## 4     aEELESR       Liprin-alp...          1                 P60469
-##   Modifications    qValue       PEP  IonScore NbMissedCleavages
-##     &lt;character&gt; &lt;numeric&gt; &lt;numeric&gt; &lt;integer&gt;         &lt;integer&gt;
-## 1 K7(TMT6ple...     0.008   0.11800        27                 0
-## 2 K7(TMT6ple...     0.001   0.01070        27                 0
-## 3 C4(Carbami...     0.008   0.11800        11                 0
-## 4 N-Term(TMT...     0.002   0.04450        24                 0
-##   IsolationInterference IonInjectTimems Intensity    Charge      mzDa      MHDa
-##               &lt;integer&gt;       &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt; &lt;numeric&gt; &lt;numeric&gt;
-## 1                     0              70    335000         2   503.274   1005.54
-## 2                     0              70    926000         2   520.267   1039.53
-## 3                     0              70    159000         2   521.258   1041.51
-## 4                     0              70    232000         2   531.785   1062.56
-##   DeltaMassPPM     RTmin       markers
-##      &lt;numeric&gt; &lt;numeric&gt;   &lt;character&gt;
-## 1        -0.38     24.02       unknown
-## 2         0.61     18.85       unknown
-## 3         1.11     10.17       unknown
-## 4         0.35     29.18       unknown
-##  [ reached getOption("max.print") -- omitted 2 rows ]</code></pre>
-<p>For further details on how to manipulate such objects, refer to the
-<em><a href="https://bioconductor.org/packages/3.17/MultiAssayExperiment">MultiAssayExperiment</a></em> <span class="citation">(<label for="tufte-mn-11" class="margin-toggle">⊕</label><input type="checkbox" id="tufte-mn-11" class="margin-toggle">Ramos et al. 2017<span class="marginnote">Ramos, Marcel, Lucas Schiffer, Angela Re, Rimsha Azhar, Azfar Basunia, Carmen Rodriguez Cabrera, Tiffany Chan, et al. 2017. <span>“Software for the Integration of Multi-Omics Experiments in Bioconductor.”</span> <em>Cancer Research</em> 77(21); e39-42.</span>)</span> and
-<em><a href="https://bioconductor.org/packages/3.17/SummarizedExperiment">SummarizedExperiment</a></em> <span class="citation">(<label for="tufte-mn-12" class="margin-toggle">⊕</label><input type="checkbox" id="tufte-mn-12" class="margin-toggle">Morgan et al. 2020<span class="marginnote">Morgan, Martin, Valerie Obenchain, Jim Hester, and Hervé Pagès. 2020. <em>SummarizedExperiment: SummarizedExperiment Container</em>. <a href="https://bioconductor.org/packages/SummarizedExperiment">https://bioconductor.org/packages/SummarizedExperiment</a>.</span>)</span> packages.</p>
-<p>It is also possible to first create a <code>SummarizedExperiment</code>, and then
-only include it into a <code>QFeatures</code> object.</p>
-<div class="sourceCode" id="cb349"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb349-1"><a href="sec-quant.html#cb349-1" tabindex="-1"></a>se <span class="ot">&lt;-</span> <span class="fu">readSummarizedExperiment</span>(hlpsms, <span class="at">ecol =</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">10</span>)</span>
-<span id="cb349-2"><a href="sec-quant.html#cb349-2" tabindex="-1"></a>se</span></code></pre></div>
-<pre><code>## class: SummarizedExperiment 
-## dim: 3010 10 
-## metadata(0):
-## assays(1): ''
-## rownames(3010): 1 2 ... 3009 3010
-## rowData names(18): Sequence ProteinDescriptions ... RTmin markers
-## colnames(10): X126 X127C ... X130N X131
-## colData names(0):</code></pre>
-<div class="sourceCode" id="cb351"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb351-1"><a href="sec-quant.html#cb351-1" tabindex="-1"></a><span class="fu">QFeatures</span>(<span class="fu">list</span>(<span class="at">psm =</span> se))</span></code></pre></div>
-<pre><code>## An instance of class QFeatures containing 1 assays:
-##  [1] psm: SummarizedExperiment with 3010 rows and 10 columns</code></pre>
-<p>At this stage, i.e. at the beginning of the analysis, whether you have
-a <code>SummarizedExperiment</code> or a <code>QFeatures</code> object, it is a good time to
-define the experimental design in the <code>colData</code> slot.</p>
-<div id="exercise-4" class="section level3 unnumbered">
-<h3>Exercise<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('exercise-4')" onmouseout="reset_tooltip('exercise-4-tooltip')"><span class="tooltiptext" id="exercise-4-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>The CPTAC spike-in study 6 <span class="citation">(<label for="tufte-mn-13" class="margin-toggle">⊕</label><input type="checkbox" id="tufte-mn-13" class="margin-toggle">Paulovich et al. 2010<span class="marginnote">Paulovich, Amanda G, Dean Billheimer, Amy-Joan L Ham, Lorenzo Vega-Montoto, Paul A Rudnick, David L Tabb, Pei Wang, et al. 2010. <span>“Interlaboratory Study Characterizing a Yeast Performance Standard for Benchmarking <span>LC-MS</span> Platform Performance.”</span> <em>Mol. Cell. Proteomics</em> 9 (2): 242–54.</span>)</span> combines the Sigma UPS1
-standard containing 48 different human proteins that are spiked in at
-5 different concentrations (conditions A to E) into a constant yeast
-protein background. The sample were acquired in triplicate on
-different instruments in different labs. We are going to start with a
-subset of the CPTAC study 6 containing conditions A and B for a single
-lab.</p>
-<div class="figure" style="text-align: center">
-<span style="display:block;" id="fig:cptac"></span>
-<p class="caption marginnote shownote">
-Figure 5.7: The CPTAC spike-in study design (credit Lieven Clement, statOmics, Ghent University).
-</p>
-<img src="img/cptac.png" alt="The CPTAC spike-in study design (credit Lieven Clement, statOmics, Ghent University)." width="70%">
-</div>
-<p>The peptide-level data, as processed by MaxQuant <span class="citation">(<label for="tufte-mn-14" class="margin-toggle">⊕</label><input type="checkbox" id="tufte-mn-14" class="margin-toggle">Cox and Mann 2008<span class="marginnote">Cox, J, and M Mann. 2008. <span>“MaxQuant Enables High Peptide Identification Rates, Individualized p.p.b.-Range Mass Accuracies and Proteome-Wide Protein Quantification.”</span> <em>Nat Biotechnol</em> 26 (12): 1367–72. <a href="https://doi.org/10.1038/nbt.1511">https://doi.org/10.1038/nbt.1511</a>.</span>)</span> is
-available in the <code>msdata</code> package:</p>
-<div class="sourceCode" id="cb353"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb353-1"><a href="sec-quant.html#cb353-1" tabindex="-1"></a><span class="fu">basename</span>(f <span class="ot">&lt;-</span> msdata<span class="sc">::</span><span class="fu">quant</span>(<span class="at">pattern =</span> <span class="st">"cptac"</span>, <span class="at">full.names =</span> <span class="cn">TRUE</span>))</span></code></pre></div>
-<pre><code>## [1] "cptac_a_b_peptides.txt"</code></pre>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Read these data in as either a <code>SummarizedExperiment</code> or a <code>QFeatures</code>
-object.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-27" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-27', 'sol-start-27')"></span>
-</p>
-<div id="sol-body-27" class="solution-body" style="display: none;">
-<p>From the names of the columns, we see that the quantitative columns,
-starting with <code>"Intensity."</code> (note the dot!) are at positions 56 to
-61.</p>
-<div class="sourceCode" id="cb355"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb355-1"><a href="sec-quant.html#cb355-1" tabindex="-1"></a><span class="fu">names</span>(<span class="fu">read.delim</span>(f))</span></code></pre></div>
-<pre><code>##  [1] "Sequence"                 "N.term.cleavage.window"  
-##  [3] "C.term.cleavage.window"   "Amino.acid.before"       
-##  [5] "First.amino.acid"         "Second.amino.acid"       
-##  [7] "Second.last.amino.acid"   "Last.amino.acid"         
-##  [9] "Amino.acid.after"         "A.Count"                 
-## [11] "R.Count"                  "N.Count"                 
-## [13] "D.Count"                  "C.Count"                 
-## [15] "Q.Count"                  "E.Count"                 
-## [17] "G.Count"                  "H.Count"                 
-## [19] "I.Count"                  "L.Count"                 
-## [21] "K.Count"                  "M.Count"                 
-## [23] "F.Count"                  "P.Count"                 
-## [25] "S.Count"                  "T.Count"                 
-## [27] "W.Count"                  "Y.Count"                 
-## [29] "V.Count"                  "U.Count"                 
-## [31] "Length"                   "Missed.cleavages"        
-## [33] "Mass"                     "Proteins"                
-## [35] "Leading.razor.protein"    "Start.position"          
-## [37] "End.position"             "Unique..Groups."         
-## [39] "Unique..Proteins."        "Charges"                 
-## [41] "PEP"                      "Score"                   
-## [43] "Identification.type.6A_7" "Identification.type.6A_8"
-## [45] "Identification.type.6A_9" "Identification.type.6B_7"
-## [47] "Identification.type.6B_8" "Identification.type.6B_9"
-## [49] "Experiment.6A_7"          "Experiment.6A_8"         
-## [51] "Experiment.6A_9"          "Experiment.6B_7"         
-## [53] "Experiment.6B_8"          "Experiment.6B_9"         
-## [55] "Intensity"                "Intensity.6A_7"          
-## [57] "Intensity.6A_8"           "Intensity.6A_9"          
-## [59] "Intensity.6B_7"           "Intensity.6B_8"          
-## [61] "Intensity.6B_9"           "Reverse"                 
-## [63] "Potential.contaminant"    "id"                      
-## [65] "Protein.group.IDs"        "Mod..peptide.IDs"        
-## [67] "Evidence.IDs"             "MS.MS.IDs"               
-## [69] "Best.MS.MS"               "Oxidation..M..site.IDs"  
-## [71] "MS.MS.Count"</code></pre>
-<div class="sourceCode" id="cb357"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb357-1"><a href="sec-quant.html#cb357-1" tabindex="-1"></a>(i <span class="ot">&lt;-</span> <span class="fu">grep</span>(<span class="st">"Intensity</span><span class="sc">\\</span><span class="st">."</span>, <span class="fu">names</span>(<span class="fu">read.delim</span>(f))))</span></code></pre></div>
-<pre><code>## [1] 56 57 58 59 60 61</code></pre>
-<p>We now read these data using the <code>readSummarizedExperiment</code>
-function. This peptide-level expression data will be imported into R
-as an instance of class <code>SummarizedExperiment</code>. We also use the
-<code>fnames</code> argument to set the row-names of the <code>peptides</code> assay to the
-peptide sequences and specify that the file is a tab-separated table.</p>
-<div class="sourceCode" id="cb359"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb359-1"><a href="sec-quant.html#cb359-1" tabindex="-1"></a>cptac_se <span class="ot">&lt;-</span> <span class="fu">readSummarizedExperiment</span>(f, <span class="at">ecol =</span> i,</span>
-<span id="cb359-2"><a href="sec-quant.html#cb359-2" tabindex="-1"></a>                                     <span class="at">fnames =</span> <span class="st">"Sequence"</span>,</span>
-<span id="cb359-3"><a href="sec-quant.html#cb359-3" tabindex="-1"></a>                                     <span class="at">sep =</span> <span class="st">"</span><span class="sc">\t</span><span class="st">"</span>)</span>
-<span id="cb359-4"><a href="sec-quant.html#cb359-4" tabindex="-1"></a>cptac_se</span></code></pre></div>
-<pre><code>## class: SummarizedExperiment 
-## dim: 11466 6 
-## metadata(0):
-## assays(1): ''
-## rownames(11466): AAAAGAGGAGDSGDAVTK AAAALAGGK ... YYTVFDRDNNR
-##   YYTVFDRDNNRVGFAEAAR
-## rowData names(65): Sequence N.term.cleavage.window ...
-##   Oxidation..M..site.IDs MS.MS.Count
-## colnames(6): Intensity.6A_7 Intensity.6A_8 ... Intensity.6B_8
-##   Intensity.6B_9
-## colData names(0):</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Before proceeding, we are going to clean up the sample names by
-removing the unnecessary <em>Intensity</em> prefix and annotate the
-experiment in the object’s <code>colData</code>.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-28" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-28', 'sol-start-28')"></span>
-</p>
-<div id="sol-body-28" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb361"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb361-1"><a href="sec-quant.html#cb361-1" tabindex="-1"></a><span class="fu">colnames</span>(cptac_se) <span class="ot">&lt;-</span> <span class="fu">sub</span>(<span class="st">"I.+</span><span class="sc">\\</span><span class="st">."</span>, <span class="st">""</span>, <span class="fu">colnames</span>(cptac_se))</span>
-<span id="cb361-2"><a href="sec-quant.html#cb361-2" tabindex="-1"></a>cptac_se<span class="sc">$</span>condition <span class="ot">&lt;-</span> <span class="fu">sub</span>(<span class="st">"_[7-9]"</span>, <span class="st">""</span>, <span class="fu">colnames</span>(cptac_se))</span>
-<span id="cb361-3"><a href="sec-quant.html#cb361-3" tabindex="-1"></a>cptac_se<span class="sc">$</span>id <span class="ot">&lt;-</span> <span class="fu">sub</span>(<span class="st">"^.+_"</span>, <span class="st">""</span>, <span class="fu">colnames</span>(cptac_se))</span>
-<span id="cb361-4"><a href="sec-quant.html#cb361-4" tabindex="-1"></a><span class="fu">colData</span>(cptac_se)</span></code></pre></div>
-<pre><code>## DataFrame with 6 rows and 2 columns
-##        condition          id
-##      &lt;character&gt; &lt;character&gt;
-## 6A_7          6A           7
-## 6A_8          6A           8
-## 6A_9          6A           9
-## 6B_7          6B           7
-## 6B_8          6B           8
-## 6B_9          6B           9</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>There are many row variables that aren’t useful here. Get rid or all
-of them but <code>Sequence</code>, <code>Proteins</code>, <code>Leading.razor.protein</code>, <code>PEP</code>,
-<code>Score</code>, <code>Reverse</code>, and <code>Potential.contaminant</code>.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-29" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-29', 'sol-start-29')"></span>
-</p>
-<div id="sol-body-29" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb363"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb363-1"><a href="sec-quant.html#cb363-1" tabindex="-1"></a>keep_var <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="st">"Sequence"</span>, <span class="st">"Proteins"</span>, <span class="st">"Leading.razor.protein"</span>, <span class="st">"PEP"</span>,</span>
-<span id="cb363-2"><a href="sec-quant.html#cb363-2" tabindex="-1"></a>              <span class="st">"Score"</span>, <span class="st">"Reverse"</span>, <span class="st">"Potential.contaminant"</span>)</span>
-<span id="cb363-3"><a href="sec-quant.html#cb363-3" tabindex="-1"></a></span>
-<span id="cb363-4"><a href="sec-quant.html#cb363-4" tabindex="-1"></a><span class="fu">rowData</span>(cptac_se) <span class="ot">&lt;-</span> <span class="fu">rowData</span>(cptac_se)[, keep_var]</span></code></pre></div>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-</div>
-</div>
-<div id="analysis-pipeline" class="section level2" number="5.4">
-<h2>
-<span class="header-section-number">5.4</span> Analysis pipeline<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('analysis-pipeline')" onmouseout="reset_tooltip('analysis-pipeline-tooltip')"><span class="tooltiptext" id="analysis-pipeline-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>A typical quantitative proteomics data processing is composed of the
-following steps, which we are going to apply to the cptac data created
-above.</p>
-<ul>
-<li>Data import</li>
-<li>Exploratory data analysis (PCA)</li>
-<li>Missing data management (filtering and/or imputation)</li>
-<li>Data cleaning</li>
-<li>Transformation and normalisation</li>
-<li>Aggregation</li>
-<li>Downstream analysis</li>
-</ul>
-<div class="sourceCode" id="cb364"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb364-1"><a href="sec-quant.html#cb364-1" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"tidyverse"</span>)</span>
-<span id="cb364-2"><a href="sec-quant.html#cb364-2" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"ggplot2"</span>)</span>
-<span id="cb364-3"><a href="sec-quant.html#cb364-3" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"QFeatures"</span>)</span>
-<span id="cb364-4"><a href="sec-quant.html#cb364-4" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"limma"</span>)</span></code></pre></div>
-<div id="missing-values" class="section level3" number="5.4.1">
-<h3>
-<span class="header-section-number">5.4.1</span> Missing values<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('missing-values')" onmouseout="reset_tooltip('missing-values-tooltip')"><span class="tooltiptext" id="missing-values-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Missing values can be highly frequent in proteomics. There are two
-reasons supporting the existence of missing values, namely biological
-or technical.</p>
-<ol style="list-style-type: decimal">
-<li><p>Values that are missing due to the absence (or extremely low
-concentration) of a protein are observed for biological reasons,
-and their pattern <strong>aren’t random</strong> (MNAR). A protein missing
-due to the suppression of its expression will not be missing at
-random: it will be missing in the condition in which it was
-suppressed, and be present in the condition where it is expressed.</p></li>
-<li><p>Due to its data-dependent acquisition, mass spectrometry isn’t
-capable of assaying all peptides in a sample. Peptides that are
-less abundant than some of their co-eluting ions, peptides that do
-not ionise well or peptides that do not get identified might be
-sporadically missing in the final quantitation table, despite their
-presence in the biological samples. Their absence patterns are
-(completely) <strong>random</strong> (MAR or MCAR) in such cases.</p></li>
-</ol>
-<p>Often, third party software that produce quantitative data use zeros
-instead of properly reporting missing values. We can use the
-<code>zeroIsNA()</code> function to replace the <code>0</code> by <code>NA</code> values in our
-<code>cptac_se</code> object and then explore the missing data patterns across
-columns and rows.</p>
-<div class="sourceCode" id="cb365"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb365-1"><a href="sec-quant.html#cb365-1" tabindex="-1"></a>cptac_se <span class="ot">&lt;-</span> <span class="fu">zeroIsNA</span>(cptac_se)</span>
-<span id="cb365-2"><a href="sec-quant.html#cb365-2" tabindex="-1"></a><span class="fu">nNA</span>(cptac_se)</span></code></pre></div>
-<pre><code>## $nNA
-## DataFrame with 1 row and 2 columns
-##         nNA       pNA
-##   &lt;integer&gt; &lt;numeric&gt;
-## 1     31130  0.452497
-## 
-## $nNArows
-## DataFrame with 11466 rows and 3 columns
-##                name       nNA       pNA
-##         &lt;character&gt; &lt;integer&gt; &lt;numeric&gt;
-## 1     AAAAGAGGAG...         4  0.666667
-## 2         AAAALAGGK         0  0.000000
-## 3        AAAALAGGKK         0  0.000000
-## 4     AAADALSDLE...         0  0.000000
-## 5     AAADALSDLE...         0  0.000000
-## ...             ...       ...       ...
-## 11462 YYSIYDLGNN...         6  1.000000
-## 11463 YYTFNGPNYN...         3  0.500000
-## 11464    YYTITEVATR         4  0.666667
-## 11465 YYTVFDRDNN...         6  1.000000
-## 11466 YYTVFDRDNN...         6  1.000000
-## 
-## $nNAcols
-## DataFrame with 6 rows and 3 columns
-##          name       nNA       pNA
-##   &lt;character&gt; &lt;integer&gt; &lt;numeric&gt;
-## 1        6A_7      4743  0.413658
-## 2        6A_8      5483  0.478196
-## 3        6A_9      5320  0.463980
-## 4        6B_7      4721  0.411739
-## 5        6B_8      5563  0.485174
-## 6        6B_9      5300  0.462236</code></pre>
-<div class="figure">
-<span style="display:block;" id="fig:imagena"></span>
-<p class="caption marginnote shownote">
-Figure 5.8: Distribution of missing value (white). Peptides row with more missing values are moved towards the top of the figure.
-</p>
-<img src="R4MS_files/figure-html/imagena-1.png" alt="Distribution of missing value (white). Peptides row with more missing values are moved towards the top of the figure." width="672">
-</div>
-<p>Let’s now explore these missing values:</p>
-<ul>
-<li>Explore the number or proportion of missing values across peptides
-and samples of the <code>cptac_se</code> data.</li>
-</ul>
-<div class="sourceCode" id="cb367"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb367-1"><a href="sec-quant.html#cb367-1" tabindex="-1"></a><span class="fu">barplot</span>(<span class="fu">nNA</span>(cptac_se)<span class="sc">$</span>nNAcols<span class="sc">$</span>pNA)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/na2-1.png" width="672"></p>
-<div class="sourceCode" id="cb368"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb368-1"><a href="sec-quant.html#cb368-1" tabindex="-1"></a><span class="fu">table</span>(<span class="fu">nNA</span>(cptac_se)<span class="sc">$</span>nNArows<span class="sc">$</span>nNA)</span></code></pre></div>
-<pre><code>## 
-##    0    1    2    3    4    5    6 
-## 4059  990  884  717  934  807 3075</code></pre>
-<ul>
-<li>Remove rows that have <em>too many</em> missing values. You can do this by
-hand or using the <code>filterNA()</code> function.</li>
-</ul>
-<div class="sourceCode" id="cb370"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb370-1"><a href="sec-quant.html#cb370-1" tabindex="-1"></a><span class="do">## remove rows that have 4 or more NAs out of 6</span></span>
-<span id="cb370-2"><a href="sec-quant.html#cb370-2" tabindex="-1"></a>cptac_se <span class="ot">&lt;-</span> <span class="fu">filterNA</span>(cptac_se, <span class="at">pNA =</span> <span class="dv">4</span><span class="sc">/</span><span class="dv">6</span>)</span></code></pre></div>
-</div>
-<div id="imputation" class="section level3" number="5.4.2">
-<h3>
-<span class="header-section-number">5.4.2</span> Imputation<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('imputation')" onmouseout="reset_tooltip('imputation-tooltip')"><span class="tooltiptext" id="imputation-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Imputation is the technique of replacing missing data with probable
-values. This can be done with <code>impute()</code> method. As we have discussed
-above, there are however two types of missing values in mass
-spectrometry-based proteomics, namely data missing at random (MAR),
-and data missing not at random (MNAR). These two types of missing
-data, those missing at random, and those missing not at random, need
-to be imputed with <a href="https://rformassspectrometry.github.io/QFeatures/articles/Processing.html#imputation-1">different types of imputation
-methods</a>
-<span class="citation">(<label for="tufte-mn-15" class="margin-toggle">⊕</label><input type="checkbox" id="tufte-mn-15" class="margin-toggle">Lazar et al. 2016<span class="marginnote">Lazar, C, L Gatto, M Ferro, C Bruley, and T Burger. 2016. <span>“Accounting for the Multiple Natures of Missing Values in Label-Free Quantitative Proteomics Data Sets to Compare Imputation Strategies.”</span> <em>J Proteome Res</em> 15 (4): 1116–25. <a href="https://doi.org/10.1021/acs.jproteome.5b00981">https://doi.org/10.1021/acs.jproteome.5b00981</a>.</span>)</span>.</p>
-<div class="figure">
-<span style="display:block;" id="fig:miximp"></span>
-<p class="caption marginnote shownote">
-Figure 5.9: Mixed imputation method. Black cells represent presence of quantitation values and light grey corresponds to missing data. The two groups of interest are depicted in green and blue along the heatmap columns. Two classes of proteins are annotated on the left: yellow are proteins with randomly occurring missing values (if any) while proteins in brown are candidates for non-random missing value imputation.
-</p>
-<img src="R4MS_files/figure-html/miximp-1.png" alt="Mixed imputation method. Black cells represent presence of quantitation values and light grey corresponds to missing data. The two groups of interest are depicted in green and blue along the heatmap columns. Two classes of proteins are annotated on the left: yellow are proteins with randomly occurring missing values (if any) while proteins in brown are candidates for non-random missing value imputation." width="672">
-</div>
-<div class="figure">
-<span style="display:block;" id="fig:lazar"></span>
-<p class="caption marginnote shownote">
-Figure 5.10: Effect of the nature of missing values on their imputation. Root-mean-square error (RMSE) observations standard deviation ratio (RSR), KNN and MinDet imputation. Lower (blue) is better.
-</p>
-<img src="img/imp-sim.png" alt="Effect of the nature of missing values on their imputation. Root-mean-square error (RMSE) observations standard deviation ratio (RSR), KNN and MinDet imputation. Lower (blue) is better." width="100%">
-</div>
-<p>Generally, it is recommended to use <strong>hot deck</strong> methods (nearest
-neighbour (<strong>left</strong>), maximum likelihood, …) when data are missing
-at random.Conversely, MNAR features should ideally be imputed with a
-<strong>left-censor</strong> (minimum value (<strong>right</strong>), but not zero, …) method.</p>
-<p>There are various methods to perform data imputation, as described in
-<code>?impute</code>. The <em><a href="https://CRAN.R-project.org/package=imp4p">imp4p</a></em> package contains additional
-functionality, including some to estimate the randomness of missing
-data.</p>
-<p>The general syntax for imputation is shown below, using the <code>se_na2</code>
-object as an example:</p>
-<div class="sourceCode" id="cb371"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb371-1"><a href="sec-quant.html#cb371-1" tabindex="-1"></a><span class="fu">data</span>(se_na2)</span>
-<span id="cb371-2"><a href="sec-quant.html#cb371-2" tabindex="-1"></a><span class="do">## impute missing values using knn imputation</span></span>
-<span id="cb371-3"><a href="sec-quant.html#cb371-3" tabindex="-1"></a><span class="fu">impute</span>(se_na2, <span class="at">method =</span> <span class="st">"knn"</span>)</span></code></pre></div>
-<pre><code>## Imputing along margin 1 (features/rows).</code></pre>
-<pre><code>## Warning in knnimp(x, k, maxmiss = rowmax, maxp = maxp): 12 rows with more than 50 % entries missing;
-##  mean imputation used for these rows</code></pre>
-<pre><code>## class: SummarizedExperiment 
-## dim: 689 16 
-## metadata(3): MSnbaseFiles MSnbaseProcessing MSnbaseVersion
-## assays(1): ''
-## rownames(689): AT1G09210 AT1G21750 ... AT4G11150 AT4G39080
-## rowData names(2): nNA randna
-## colnames(16): M1F1A M1F4A ... M2F8B M2F11B
-## colData names(1): nNA</code></pre>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Following the example above, apply a mixed imputation, using knn for
-data missing at random and the zero imputation for data missing not at
-random. Hint: the <code>randna</code> variable defines which features are assumed
-to be missing at random.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-30" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-30', 'sol-start-30')"></span>
-</p>
-<div id="sol-body-30" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb375"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb375-1"><a href="sec-quant.html#cb375-1" tabindex="-1"></a><span class="fu">impute</span>(se_na2, <span class="st">"mixed"</span>,</span>
-<span id="cb375-2"><a href="sec-quant.html#cb375-2" tabindex="-1"></a>       <span class="at">randna =</span> <span class="fu">rowData</span>(se_na2)<span class="sc">$</span>randna,</span>
-<span id="cb375-3"><a href="sec-quant.html#cb375-3" tabindex="-1"></a>       <span class="at">mar =</span> <span class="st">"knn"</span>, <span class="at">mnar =</span> <span class="st">"zero"</span>)</span></code></pre></div>
-<pre><code>## class: SummarizedExperiment 
-## dim: 689 16 
-## metadata(3): MSnbaseFiles MSnbaseProcessing MSnbaseVersion
-## assays(1): ''
-## rownames(689): AT1G09210 AT1G21750 ... AT4G11150 AT4G39080
-## rowData names(2): nNA randna
-## colnames(16): M1F1A M1F4A ... M2F8B M2F11B
-## colData names(1): nNA</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>When assessing missing data imputation methods, such as in <a href="https://pubs.acs.org/doi/abs/10.1021/acs.jproteome.5b00981">Lazar et
-al. (2016)</a>,
-one often replaces values with missing data, imputes these with a
-method of choice, then quantifies the difference between original
-(expected) and observed (imputed) values. Here, using the <code>se_na2</code>
-data, use this strategy to assess the difference between knn and
-Bayesian PCA imputation.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-31" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-31', 'sol-start-31')"></span>
-</p>
-<div id="sol-body-31" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb377"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb377-1"><a href="sec-quant.html#cb377-1" tabindex="-1"></a>imp1 <span class="ot">&lt;-</span> <span class="fu">impute</span>(se_na2, <span class="at">method =</span> <span class="st">"knn"</span>)</span></code></pre></div>
-<pre><code>## Warning in knnimp(x, k, maxmiss = rowmax, maxp = maxp): 12 rows with more than 50 % entries missing;
-##  mean imputation used for these rows</code></pre>
-<div class="sourceCode" id="cb379"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb379-1"><a href="sec-quant.html#cb379-1" tabindex="-1"></a>imp2 <span class="ot">&lt;-</span> <span class="fu">impute</span>(se_na2, <span class="at">method =</span> <span class="st">"bpca"</span>)</span>
-<span id="cb379-2"><a href="sec-quant.html#cb379-2" tabindex="-1"></a><span class="fu">summary</span>(<span class="fu">abs</span>(<span class="fu">assay</span>(imp1)[<span class="fu">is.na</span>(<span class="fu">assay</span>(se_na2))] <span class="sc">-</span> <span class="fu">assay</span>(imp2)[<span class="fu">is.na</span>(<span class="fu">assay</span>(se_na2))]))</span></code></pre></div>
-<pre><code>##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-## 5.332e-05 6.594e-03 1.535e-02 2.315e-02 2.855e-02 2.579e-01</code></pre>
-<div class="sourceCode" id="cb381"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb381-1"><a href="sec-quant.html#cb381-1" tabindex="-1"></a><span class="fu">summary</span>(<span class="fu">as.numeric</span>(<span class="fu">na.omit</span>(<span class="fu">assay</span>(se_na2))))</span></code></pre></div>
-<pre><code>##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-##  0.0170  0.1865  0.2440  0.2500  0.3080  0.6587</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>When assessing the impact of missing value imputation on real data,
-one can’t use the strategy above. Another useful approach is to assess
-the impact of the imputation method on the distribution of the
-quantitative data. For instance, here is the intensity distribution of
-the <code>se_na2</code> data. Verify the effect of applying <code>knn</code>, <code>zero</code>,
-<code>MinDet</code> and <code>bpca</code> on this distribution.</p>
-<div class="sourceCode" id="cb383"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb383-1"><a href="sec-quant.html#cb383-1" tabindex="-1"></a><span class="fu">plot</span>(<span class="fu">density</span>(<span class="fu">na.omit</span>(<span class="fu">assay</span>(se_na2))))</span></code></pre></div>
-<div class="figure">
-<span style="display:block;" id="fig:nasetdist"></span>
-<p class="caption marginnote shownote">
-Figure 5.11: Intensity disctribution of the <code>naset</code> data.
-</p>
-<img src="R4MS_files/figure-html/nasetdist-1.png" alt="Intensity disctribution of the `naset` data." width="672">
-</div>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-32" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-32', 'sol-start-32')"></span>
-</p>
-<div id="sol-body-32" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb384"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb384-1"><a href="sec-quant.html#cb384-1" tabindex="-1"></a>cls <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="st">"black"</span>, <span class="st">"red"</span>, <span class="st">"blue"</span>, <span class="st">"steelblue"</span>, <span class="st">"orange"</span>)</span>
-<span id="cb384-2"><a href="sec-quant.html#cb384-2" tabindex="-1"></a><span class="fu">plot</span>(<span class="fu">density</span>(<span class="fu">na.omit</span>(<span class="fu">assay</span>(se_na2))), <span class="at">col =</span> cls[<span class="dv">1</span>])</span>
-<span id="cb384-3"><a href="sec-quant.html#cb384-3" tabindex="-1"></a><span class="fu">lines</span>(<span class="fu">density</span>(<span class="fu">assay</span>(<span class="fu">impute</span>(se_na2, <span class="at">method =</span> <span class="st">"knn"</span>))), <span class="at">col =</span> cls[<span class="dv">2</span>])</span></code></pre></div>
-<pre><code>## Warning in knnimp(x, k, maxmiss = rowmax, maxp = maxp): 12 rows with more than 50 % entries missing;
-##  mean imputation used for these rows</code></pre>
-<div class="sourceCode" id="cb386"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb386-1"><a href="sec-quant.html#cb386-1" tabindex="-1"></a><span class="fu">lines</span>(<span class="fu">density</span>(<span class="fu">assay</span>(<span class="fu">impute</span>(se_na2, <span class="at">method =</span> <span class="st">"zero"</span>))), <span class="at">col =</span> cls[<span class="dv">3</span>])</span>
-<span id="cb386-2"><a href="sec-quant.html#cb386-2" tabindex="-1"></a><span class="fu">lines</span>(<span class="fu">density</span>(<span class="fu">assay</span>(<span class="fu">impute</span>(se_na2, <span class="at">method =</span> <span class="st">"MinDet"</span>))), <span class="at">col =</span> cls[<span class="dv">4</span>])</span>
-<span id="cb386-3"><a href="sec-quant.html#cb386-3" tabindex="-1"></a><span class="fu">lines</span>(<span class="fu">density</span>(<span class="fu">assay</span>(<span class="fu">impute</span>(se_na2, <span class="at">method =</span> <span class="st">"bpca"</span>))), <span class="at">col =</span> cls[<span class="dv">5</span>])</span>
-<span id="cb386-4"><a href="sec-quant.html#cb386-4" tabindex="-1"></a><span class="fu">legend</span>(<span class="st">"topright"</span>, <span class="at">legend =</span> <span class="fu">c</span>(<span class="st">"orig"</span>, <span class="st">"knn"</span>, <span class="st">"zero"</span>, <span class="st">"MinDet"</span>, <span class="st">"bpca"</span>),</span>
-<span id="cb386-5"><a href="sec-quant.html#cb386-5" tabindex="-1"></a>       <span class="at">col =</span> cls, <span class="at">lwd =</span> <span class="dv">2</span>, <span class="at">bty =</span> <span class="st">"n"</span>)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/naex3-1.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<p><strong>Tip</strong>: When downstream analyses permit, it might be safer not to
-impute data and deal explicitly with missing values. Indeed missing
-data imputation is not straightforward, and is likely to dramatically
-fail when a high proportion of data is missing (10s of %). It is
-possible to keep NAs when performing hypothesis tests<a href="#fn7" class="footnote-ref" id="fnref7"><sup>7</sup></a>, but (generally) not to perform a principal component
-analysis.</p>
-</div>
-<div id="identification-quality-control" class="section level3" number="5.4.3">
-<h3>
-<span class="header-section-number">5.4.3</span> Identification quality control<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('identification-quality-control')" onmouseout="reset_tooltip('identification-quality-control-tooltip')"><span class="tooltiptext" id="identification-quality-control-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>As discussed in the previous chapter, PSMs are deemed relevant after
-comparison against hits from a decoy database. The origin of these
-hits is recorded with <code>+</code> in the <code>Reverse</code> variable:</p>
-<div class="sourceCode" id="cb387"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb387-1"><a href="sec-quant.html#cb387-1" tabindex="-1"></a><span class="fu">table</span>(<span class="fu">rowData</span>(cptac_se)<span class="sc">$</span>Reverse)</span></code></pre></div>
-<pre><code>## 
-##         + 
-## 7572   12</code></pre>
-<p>Similarly, a proteomics experiment is also searched against a database
-of contaminants:</p>
-<div class="sourceCode" id="cb389"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb389-1"><a href="sec-quant.html#cb389-1" tabindex="-1"></a><span class="fu">table</span>(<span class="fu">rowData</span>(cptac_se)<span class="sc">$</span>Potential.contaminant)</span></code></pre></div>
-<pre><code>## 
-##         + 
-## 7558   26</code></pre>
-<p>Let’s visualise some of the cptac’s metadata using standard <code>ggplot2</code>
-code:</p>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Visualise the identification score and the posterior probability
-probability (PEP) distributions from forward and reverse hits and
-interpret the figure.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-33" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-33', 'sol-start-33')"></span>
-</p>
-<div id="sol-body-33" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb391"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb391-1"><a href="sec-quant.html#cb391-1" tabindex="-1"></a><span class="fu">rowData</span>(cptac_se) <span class="sc">%&gt;%</span></span>
-<span id="cb391-2"><a href="sec-quant.html#cb391-2" tabindex="-1"></a>    <span class="fu">as_tibble</span>() <span class="sc">%&gt;%</span></span>
-<span id="cb391-3"><a href="sec-quant.html#cb391-3" tabindex="-1"></a>    <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> Score, <span class="at">colour =</span> Reverse)) <span class="sc">+</span></span>
-<span id="cb391-4"><a href="sec-quant.html#cb391-4" tabindex="-1"></a>    <span class="fu">geom_density</span>()</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/idqc1-1.png" width="672"></p>
-<div class="sourceCode" id="cb392"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb392-1"><a href="sec-quant.html#cb392-1" tabindex="-1"></a><span class="fu">rowData</span>(cptac_se) <span class="sc">%&gt;%</span></span>
-<span id="cb392-2"><a href="sec-quant.html#cb392-2" tabindex="-1"></a>    <span class="fu">as_tibble</span>() <span class="sc">%&gt;%</span></span>
-<span id="cb392-3"><a href="sec-quant.html#cb392-3" tabindex="-1"></a>    <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> PEP, <span class="at">colour =</span> Reverse)) <span class="sc">+</span></span>
-<span id="cb392-4"><a href="sec-quant.html#cb392-4" tabindex="-1"></a>    <span class="fu">geom_density</span>()</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/idqc2-1.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<p><strong>Note</strong>: it is also possible to compute and visualise protein groups
-as connected components starting from a quantitative dataset such as a
-<code>SummarizedExperiment</code>. See the <a href="https://rformassspectrometry.github.io/PSMatch/articles/AdjacencyMatrix.html#using-quantitative-data"><em>Using quantitative
-data</em></a>
-section in the <em>Understanding protein groups with adjacency matrices</em>
-vignette.</p>
-</div>
-<div id="creating-the-qfeatures-data" class="section level3" number="5.4.4">
-<h3>
-<span class="header-section-number">5.4.4</span> Creating the QFeatures data<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('creating-the-qfeatures-data')" onmouseout="reset_tooltip('creating-the-qfeatures-data-tooltip')"><span class="tooltiptext" id="creating-the-qfeatures-data-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>We can now create our <code>QFeatures</code> object using the
-<code>SummarizedExperiment</code> as shown below.</p>
-<div class="sourceCode" id="cb393"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb393-1"><a href="sec-quant.html#cb393-1" tabindex="-1"></a>cptac <span class="ot">&lt;-</span> <span class="fu">QFeatures</span>(<span class="fu">list</span>(<span class="at">peptides =</span> cptac_se))</span>
-<span id="cb393-2"><a href="sec-quant.html#cb393-2" tabindex="-1"></a>cptac</span></code></pre></div>
-<pre><code>## An instance of class QFeatures containing 1 assays:
-##  [1] peptides: SummarizedExperiment with 7584 rows and 6 columns</code></pre>
-<p>We should also assign the <code>QFeatures</code> column data with the
-<code>SummarizedExperiment</code> slot.</p>
-<div class="sourceCode" id="cb395"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb395-1"><a href="sec-quant.html#cb395-1" tabindex="-1"></a><span class="fu">colData</span>(cptac) <span class="ot">&lt;-</span> <span class="fu">colData</span>(cptac_se)</span></code></pre></div>
-<p>Note that it is also possible to directly create a <code>QFeatures</code> object
-with the <code>readQFeatures()</code> function and the same arguments as the
-<code>readSummarizedExperiment()</code> used above. In addition, most functions
-used above and below work on single <code>SummarizedExperiment</code> objects or
-assays within a <code>QFeatures</code> object.</p>
-</div>
-<div id="filtering-out-contaminants-and-reverse-hits" class="section level3" number="5.4.5">
-<h3>
-<span class="header-section-number">5.4.5</span> Filtering out contaminants and reverse hits<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('filtering-out-contaminants-and-reverse-hits')" onmouseout="reset_tooltip('filtering-out-contaminants-and-reverse-hits-tooltip')"><span class="tooltiptext" id="filtering-out-contaminants-and-reverse-hits-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Using the <code>filterFeatures()</code> function, filter out the reverse and
-contaminant hits, and also retain those that have a posterior error
-probability smaller than 0.05.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-34" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-34', 'sol-start-34')"></span>
-</p>
-<div id="sol-body-34" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb396"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb396-1"><a href="sec-quant.html#cb396-1" tabindex="-1"></a>cptac <span class="ot">&lt;-</span></span>
-<span id="cb396-2"><a href="sec-quant.html#cb396-2" tabindex="-1"></a>    cptac <span class="sc">%&gt;%</span></span>
-<span id="cb396-3"><a href="sec-quant.html#cb396-3" tabindex="-1"></a>    <span class="fu">filterFeatures</span>(<span class="sc">~</span> Reverse <span class="sc">!=</span> <span class="st">"+"</span>) <span class="sc">%&gt;%</span></span>
-<span id="cb396-4"><a href="sec-quant.html#cb396-4" tabindex="-1"></a>    <span class="fu">filterFeatures</span>(<span class="sc">~</span> Potential.contaminant <span class="sc">!=</span> <span class="st">"+"</span>) <span class="sc">%&gt;%</span></span>
-<span id="cb396-5"><a href="sec-quant.html#cb396-5" tabindex="-1"></a>    <span class="fu">filterFeatures</span>(<span class="sc">~</span> PEP <span class="sc">&lt;</span> <span class="fl">0.05</span>)</span></code></pre></div>
-<pre><code>## 'Reverse' found in 1 out of 1 assay(s)</code></pre>
-<pre><code>## 'Potential.contaminant' found in 1 out of 1 assay(s)</code></pre>
-<pre><code>## 'PEP' found in 1 out of 1 assay(s)</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-</div>
-<div id="log-transformation-and-normalisation" class="section level3" number="5.4.6">
-<h3>
-<span class="header-section-number">5.4.6</span> Log-transformation and normalisation<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('log-transformation-and-normalisation')" onmouseout="reset_tooltip('log-transformation-and-normalisation-tooltip')"><span class="tooltiptext" id="log-transformation-and-normalisation-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>The two code chunks below log-transform and normalise using the assay
-<code>i</code> as input and adding a new one names as defined by <code>name</code>.</p>
-<div class="sourceCode" id="cb400"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb400-1"><a href="sec-quant.html#cb400-1" tabindex="-1"></a>cptac <span class="ot">&lt;-</span> <span class="fu">logTransform</span>(cptac, <span class="at">i =</span> <span class="st">"peptides"</span>,</span>
-<span id="cb400-2"><a href="sec-quant.html#cb400-2" tabindex="-1"></a>                      <span class="at">name =</span> <span class="st">"log_peptides"</span>)</span></code></pre></div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Use the <code>normalize()</code> method to normalise the data. The syntax is the
-same as <code>logTransform()</code>. Use the <code>center.median</code> method.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-35" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-35', 'sol-start-35')"></span>
-</p>
-<div id="sol-body-35" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb401"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb401-1"><a href="sec-quant.html#cb401-1" tabindex="-1"></a>cptac <span class="ot">&lt;-</span> <span class="fu">normalize</span>(cptac, <span class="at">i =</span> <span class="st">"log_peptides"</span>,</span>
-<span id="cb401-2"><a href="sec-quant.html#cb401-2" tabindex="-1"></a>                   <span class="at">name =</span> <span class="st">"lognorm_peptides"</span>,</span>
-<span id="cb401-3"><a href="sec-quant.html#cb401-3" tabindex="-1"></a>                   <span class="at">method =</span> <span class="st">"center.median"</span>)</span></code></pre></div>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Visualise the result of the transformations above. The
-<code>plotDensities()</code> function from the <code>limma</code> package is very
-convenient, but feel free to use boxplots, violin plots, or any other
-visualisation that you deem useful to assess the tranformations.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-36" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-36', 'sol-start-36')"></span>
-</p>
-<div id="sol-body-36" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb402"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb402-1"><a href="sec-quant.html#cb402-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">3</span>))</span>
-<span id="cb402-2"><a href="sec-quant.html#cb402-2" tabindex="-1"></a>limma<span class="sc">::</span><span class="fu">plotDensities</span>(<span class="fu">assay</span>(cptac[[<span class="st">"peptides"</span>]]))</span>
-<span id="cb402-3"><a href="sec-quant.html#cb402-3" tabindex="-1"></a>limma<span class="sc">::</span><span class="fu">plotDensities</span>(<span class="fu">assay</span>(cptac[[<span class="st">"log_peptides"</span>]]))</span>
-<span id="cb402-4"><a href="sec-quant.html#cb402-4" tabindex="-1"></a>limma<span class="sc">::</span><span class="fu">plotDensities</span>(<span class="fu">assay</span>(cptac[[<span class="st">"lognorm_peptides"</span>]]))</span></code></pre></div>
-<div class="figure">
-<span style="display:block;" id="fig:plotdens"></span>
-<p class="caption marginnote shownote">
-Figure 5.12: Three peptide level assays: raw data, log transformed and normalised.
-</p>
-<img src="R4MS_files/figure-html/plotdens-1.png" alt="Three peptide level assays: raw data, log transformed and normalised." width="1440">
-</div>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-</div>
-<div id="aggregation" class="section level3" number="5.4.7">
-<h3>
-<span class="header-section-number">5.4.7</span> Aggregation<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('aggregation')" onmouseout="reset_tooltip('aggregation-tooltip')"><span class="tooltiptext" id="aggregation-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Use median aggregation to aggregation peptides into protein
-values. This is not necessarily the best choice, as we will see
-later, but a good start.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-37" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-37', 'sol-start-37')"></span>
-</p>
-<div id="sol-body-37" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb403"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb403-1"><a href="sec-quant.html#cb403-1" tabindex="-1"></a>cptac <span class="ot">&lt;-</span></span>
-<span id="cb403-2"><a href="sec-quant.html#cb403-2" tabindex="-1"></a>    <span class="fu">aggregateFeatures</span>(cptac,</span>
-<span id="cb403-3"><a href="sec-quant.html#cb403-3" tabindex="-1"></a>                      <span class="st">"lognorm_peptides"</span>,</span>
-<span id="cb403-4"><a href="sec-quant.html#cb403-4" tabindex="-1"></a>                      <span class="at">name =</span> <span class="st">"proteins_med"</span>,</span>
-<span id="cb403-5"><a href="sec-quant.html#cb403-5" tabindex="-1"></a>                      <span class="at">fcol =</span> <span class="st">"Leading.razor.protein"</span>,</span>
-<span id="cb403-6"><a href="sec-quant.html#cb403-6" tabindex="-1"></a>                      <span class="at">fun =</span> colMedians,</span>
-<span id="cb403-7"><a href="sec-quant.html#cb403-7" tabindex="-1"></a>                      <span class="at">na.rm =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<p>Looking at the <code>.n</code> row variable computed during the aggregation, we
-see that most proteins result from the aggregation of 5 peptides or
-less, while very few proteins are accounted for by tens of peptides.</p>
-<div class="sourceCode" id="cb404"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb404-1"><a href="sec-quant.html#cb404-1" tabindex="-1"></a><span class="fu">table</span>(<span class="fu">rowData</span>(cptac[[<span class="st">"proteins_med"</span>]])<span class="sc">$</span>.n)</span></code></pre></div>
-<pre><code>## 
-##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 
-## 327 234 167 132  84  73  62  49  49  29  29  24  20  13  15  12   4   6  11   5 
-##  21  22  23  24  25  26  28  29  30  31  32  34  37  38  39  42  51  52  62 
-##   7   4   7   2   2   3   1   3   1   2   2   1   1   1   1   2   1   1   1</code></pre>
-</div>
-<div id="principal-component-analysis" class="section level3" number="5.4.8">
-<h3>
-<span class="header-section-number">5.4.8</span> Principal component analysis<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('principal-component-analysis')" onmouseout="reset_tooltip('principal-component-analysis-tooltip')"><span class="tooltiptext" id="principal-component-analysis-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<div class="sourceCode" id="cb406"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb406-1"><a href="sec-quant.html#cb406-1" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"factoextra"</span>)</span>
-<span id="cb406-2"><a href="sec-quant.html#cb406-2" tabindex="-1"></a></span>
-<span id="cb406-3"><a href="sec-quant.html#cb406-3" tabindex="-1"></a>pca_pep <span class="ot">&lt;-</span></span>
-<span id="cb406-4"><a href="sec-quant.html#cb406-4" tabindex="-1"></a>    cptac[[<span class="st">"lognorm_peptides"</span>]] <span class="sc">%&gt;%</span></span>
-<span id="cb406-5"><a href="sec-quant.html#cb406-5" tabindex="-1"></a>    <span class="fu">filterNA</span>() <span class="sc">%&gt;%</span></span>
-<span id="cb406-6"><a href="sec-quant.html#cb406-6" tabindex="-1"></a>    <span class="fu">assay</span>() <span class="sc">%&gt;%</span></span>
-<span id="cb406-7"><a href="sec-quant.html#cb406-7" tabindex="-1"></a>    <span class="fu">t</span>() <span class="sc">%&gt;%</span></span>
-<span id="cb406-8"><a href="sec-quant.html#cb406-8" tabindex="-1"></a>    <span class="fu">prcomp</span>(<span class="at">scale =</span> <span class="cn">TRUE</span>, <span class="at">center =</span> <span class="cn">TRUE</span>) <span class="sc">%&gt;%</span></span>
-<span id="cb406-9"><a href="sec-quant.html#cb406-9" tabindex="-1"></a>    <span class="fu">fviz_pca_ind</span>(<span class="at">habillage =</span> cptac<span class="sc">$</span>condition, <span class="at">title =</span> <span class="st">"Peptides"</span>)</span>
-<span id="cb406-10"><a href="sec-quant.html#cb406-10" tabindex="-1"></a></span>
-<span id="cb406-11"><a href="sec-quant.html#cb406-11" tabindex="-1"></a>pca_prot <span class="ot">&lt;-</span></span>
-<span id="cb406-12"><a href="sec-quant.html#cb406-12" tabindex="-1"></a>    cptac[[<span class="st">"proteins_med"</span>]] <span class="sc">%&gt;%</span></span>
-<span id="cb406-13"><a href="sec-quant.html#cb406-13" tabindex="-1"></a>    <span class="fu">filterNA</span>() <span class="sc">%&gt;%</span></span>
-<span id="cb406-14"><a href="sec-quant.html#cb406-14" tabindex="-1"></a>    <span class="fu">assay</span>() <span class="sc">%&gt;%</span></span>
-<span id="cb406-15"><a href="sec-quant.html#cb406-15" tabindex="-1"></a>    <span class="fu">t</span>() <span class="sc">%&gt;%</span></span>
-<span id="cb406-16"><a href="sec-quant.html#cb406-16" tabindex="-1"></a>    <span class="fu">prcomp</span>() <span class="sc">%&gt;%</span></span>
-<span id="cb406-17"><a href="sec-quant.html#cb406-17" tabindex="-1"></a>    <span class="fu">fviz_pca_ind</span>(<span class="at">habillage =</span> cptac<span class="sc">$</span>condition,</span>
-<span id="cb406-18"><a href="sec-quant.html#cb406-18" tabindex="-1"></a>                 <span class="at">title =</span> <span class="st">"Proteins (median aggregation)"</span>)</span></code></pre></div>
-<div class="sourceCode" id="cb407"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb407-1"><a href="sec-quant.html#cb407-1" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"patchwork"</span>)</span>
-<span id="cb407-2"><a href="sec-quant.html#cb407-2" tabindex="-1"></a>pca_pep <span class="sc">+</span> pca_prot</span></code></pre></div>
-<div class="figure">
-<span style="display:block;" id="fig:plotpca"></span>
-<p class="caption marginnote shownote">
-Figure 5.13: Peptide and protein level PCA analyses.
-</p>
-<img src="R4MS_files/figure-html/plotpca-1.png" alt="Peptide and protein level PCA analyses." width="1152">
-</div>
-</div>
-<div id="visualisation" class="section level3" number="5.4.9">
-<h3>
-<span class="header-section-number">5.4.9</span> Visualisation<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('visualisation')" onmouseout="reset_tooltip('visualisation-tooltip')"><span class="tooltiptext" id="visualisation-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Below, we use the <code>longFormat()</code> function to extract the quantitative
-and row data in a long format, that can be directly reused by the
-tidyverse tools.</p>
-<div class="sourceCode" id="cb408"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb408-1"><a href="sec-quant.html#cb408-1" tabindex="-1"></a><span class="fu">longFormat</span>(cptac[<span class="st">"P02787ups|TRFE_HUMAN_UPS"</span>, ,</span>
-<span id="cb408-2"><a href="sec-quant.html#cb408-2" tabindex="-1"></a>                 <span class="fu">c</span>(<span class="st">"lognorm_peptides"</span>, <span class="st">"proteins_med"</span>)]) <span class="sc">%&gt;%</span></span>
-<span id="cb408-3"><a href="sec-quant.html#cb408-3" tabindex="-1"></a>    <span class="fu">as_tibble</span>() <span class="sc">%&gt;%</span></span>
-<span id="cb408-4"><a href="sec-quant.html#cb408-4" tabindex="-1"></a>    <span class="fu">mutate</span>(<span class="at">condition =</span> <span class="fu">ifelse</span>(<span class="fu">grepl</span>(<span class="st">"A"</span>, colname), <span class="st">"A"</span>, <span class="st">"B"</span>)) <span class="sc">%&gt;%</span></span>
-<span id="cb408-5"><a href="sec-quant.html#cb408-5" tabindex="-1"></a>    <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> colname, <span class="at">y =</span> value, <span class="at">colour =</span> rowname, <span class="at">shape =</span> condition)) <span class="sc">+</span></span>
-<span id="cb408-6"><a href="sec-quant.html#cb408-6" tabindex="-1"></a>    <span class="fu">geom_point</span>(<span class="at">size =</span> <span class="dv">3</span>) <span class="sc">+</span></span>
-<span id="cb408-7"><a href="sec-quant.html#cb408-7" tabindex="-1"></a>    <span class="fu">geom_line</span>(<span class="fu">aes</span>(<span class="at">group =</span> rowname)) <span class="sc">+</span></span>
-<span id="cb408-8"><a href="sec-quant.html#cb408-8" tabindex="-1"></a>    <span class="fu">facet_grid</span>(<span class="sc">~</span> assay) <span class="sc">+</span></span>
-<span id="cb408-9"><a href="sec-quant.html#cb408-9" tabindex="-1"></a>    <span class="fu">ggtitle</span>(<span class="st">"P02787ups|TRFE_HUMAN_UPS"</span>)</span></code></pre></div>
-<div class="figure">
-<span style="display:block;" id="fig:vis"></span>
-<p class="caption marginnote shownote">
-Figure 5.14: Peptide and protein expression profile.
-</p>
-<img src="R4MS_files/figure-html/vis-1.png" alt="Peptide and protein expression profile." width="1152">
-</div>
-<p>We can also visualise the assays withing a <code>QFeatures</code> object and
-their relation.</p>
-<div class="sourceCode" id="cb409"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb409-1"><a href="sec-quant.html#cb409-1" tabindex="-1"></a><span class="fu">plot</span>(cptac)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/plotqf-1.png" width="672"></p>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>The example above shows a simple linear relationship between
-assays. Create a more interesting one by applying a different
-normalisation method on the <em>log_peptides</em> assay and aggreate that new
-normalised peptide assay. Visualise the relationship with <code>plot()</code>, as
-above.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-38" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-38', 'sol-start-38')"></span>
-</p>
-<div id="sol-body-38" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb410"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb410-1"><a href="sec-quant.html#cb410-1" tabindex="-1"></a><span class="fu">normalize</span>(cptac, <span class="st">"log_peptides"</span>,</span>
-<span id="cb410-2"><a href="sec-quant.html#cb410-2" tabindex="-1"></a>          <span class="at">name =</span> <span class="st">"logquantiles_peptides"</span>,</span>
-<span id="cb410-3"><a href="sec-quant.html#cb410-3" tabindex="-1"></a>          <span class="at">method =</span> <span class="st">"quantiles"</span>) <span class="sc">|&gt;</span></span>
-<span id="cb410-4"><a href="sec-quant.html#cb410-4" tabindex="-1"></a>    <span class="fu">aggregateFeatures</span>(</span>
-<span id="cb410-5"><a href="sec-quant.html#cb410-5" tabindex="-1"></a>        <span class="st">"logquantiles_peptides"</span>,</span>
-<span id="cb410-6"><a href="sec-quant.html#cb410-6" tabindex="-1"></a>        <span class="at">name =</span> <span class="st">"proteins_med2"</span>,</span>
-<span id="cb410-7"><a href="sec-quant.html#cb410-7" tabindex="-1"></a>        <span class="at">fcol =</span> <span class="st">"Leading.razor.protein"</span>,</span>
-<span id="cb410-8"><a href="sec-quant.html#cb410-8" tabindex="-1"></a>        <span class="at">fun =</span> colMedians,</span>
-<span id="cb410-9"><a href="sec-quant.html#cb410-9" tabindex="-1"></a>        <span class="at">na.rm =</span> <span class="cn">TRUE</span>) <span class="sc">|&gt;</span></span>
-<span id="cb410-10"><a href="sec-quant.html#cb410-10" tabindex="-1"></a>    <span class="fu">plot</span>()</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/plotqf2-1.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-</div>
-<div id="statistical-analysis" class="section level3" number="5.4.10">
-<h3>
-<span class="header-section-number">5.4.10</span> Statistical analysis<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('statistical-analysis')" onmouseout="reset_tooltip('statistical-analysis-tooltip')"><span class="tooltiptext" id="statistical-analysis-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>R in general and Bioconductor in particular are well suited for the
-statistical analysis of quantitative proteomics data. Several
-packages provide dedicated resources for proteomics data:</p>
-<ul>
-<li><p><em><a href="https://bioconductor.org/packages/3.17/MSstats">MSstats</a></em> and <em><a href="https://bioconductor.org/packages/3.17/MSstatsTMT">MSstatsTMT</a></em>: A set of tools
-for statistical relative protein significance analysis in Data
-dependent (DDA), SRM, Data independent acquisition (DIA) and TMT
-experiments.</p></li>
-<li><p><em><a href="https://bioconductor.org/packages/3.17/msmsTests">msmsTests</a></em>: Statistical tests for label-free LC-MS/MS
-data by spectral counts, to discover differentially expressed
-proteins between two biological conditions. Three tests are
-available: Poisson GLM regression, quasi-likelihood GLM regression,
-and the negative binomial of the <em><a href="https://bioconductor.org/packages/3.17/edgeR">edgeR</a></em>
-package. All can be readily applied on <code>MSnSet</code> instances produced,
-for example by <code>MSnID</code>.</p></li>
-<li><p><em><a href="https://bioconductor.org/packages/3.17/DEP">DEP</a></em> provides an integrated analysis workflow for the
-analysis of mass spectrometry proteomics data for differential
-protein expression or differential enrichment.</p></li>
-<li><p><em><a href="https://github.com/statOmics/MSqRob">MSqRob</a></em>: The <code>MSqRob</code> package
-allows a user to do quantitative protein-level statistical inference
-on LC-MS proteomics data. More specifically, our package makes use
-of peptide-level input data, thus correcting for unbalancedness and
-peptide-specific biases. As previously shown (<a href="https://pubs.acs.org/doi/abs/10.1021/pr501223t">Goeminne et
-al. (2015)</a>), this
-approach is both more sensitive and specific than summarizing
-peptide-level input to protein-level values. Model estimates are
-stabilized by ridge regression, empirical Bayes variance estimation
-and downweighing of outliers. Currently, only label-free proteomics
-data types are
-supported. <a href="https://github.com/statOmics/msqrob2/"><code>msqrob2</code></a> is now
-available and makes use of the <code>QFeatures</code> infrastructure.</p></li>
-<li><p><em><a href="https://github.com/const-ae/proDA">proDA</a></em> accounts for missing
-values in label-free mass spectrometry data without imputation. The
-package implements a probabilistic dropout model that ensures that
-the information from observed and missing values are properly
-combined. It adds empirical Bayesian priors to increase power to
-detect differentially abundant proteins.</p></li>
-</ul>
-<p>Others, while not specfic to proteomics, are also recommended, such as
-the <em><a href="https://bioconductor.org/packages/3.17/limma">limma</a></em> package. When analysing spectral counting
-data, methods for high throughput sequencing data are
-applicable. Below, we illustrate how to apply a typical <code>edgeR</code> test
-to count data using the <code>msms.edgeR</code> function from the <code>msmsTests</code>
-package.</p>
-<p>Below, we are going to perform our statistical analysis on the protein
-data using <code>limma</code>.</p>
-<div class="sourceCode" id="cb411"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb411-1"><a href="sec-quant.html#cb411-1" tabindex="-1"></a>prots <span class="ot">&lt;-</span> <span class="fu">getWithColData</span>(cptac, <span class="st">"proteins_med"</span>)</span></code></pre></div>
-<pre><code>## Warning: 'experiments' dropped; see 'metadata'</code></pre>
-<pre><code>## Warning: Ignoring redundant column names in 'colData(x)':</code></pre>
-<p>The <em><a href="https://bioconductor.org/packages/3.17/limma">limma</a></em> package is the precursor package
-that enables the consistent application of linear models to normalliy
-distributed omics data in general, and microarrays in
-particular.</p>
-<p>The <code>limma</code> package implements an empirical Bayes method that
-borrows information across features to estimate the standard error and
-calculate (so called moderated) t statistics. This approach is
-demonstrably more powerful that a standard t-tests when the number of
-samples is low.</p>
-<p>The code chunk below illustrates how to set up the model, fit it, and
-apply the empirical Bayes moderation.</p>
-<div class="sourceCode" id="cb414"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb414-1"><a href="sec-quant.html#cb414-1" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"limma"</span>)</span>
-<span id="cb414-2"><a href="sec-quant.html#cb414-2" tabindex="-1"></a>design <span class="ot">&lt;-</span> <span class="fu">model.matrix</span>(<span class="sc">~</span> prots<span class="sc">$</span>condition)</span>
-<span id="cb414-3"><a href="sec-quant.html#cb414-3" tabindex="-1"></a>fit <span class="ot">&lt;-</span> <span class="fu">lmFit</span>(<span class="fu">assay</span>(prots), design)</span></code></pre></div>
-<pre><code>## Warning: Partial NA coefficients for 25 probe(s)</code></pre>
-<div class="sourceCode" id="cb416"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb416-1"><a href="sec-quant.html#cb416-1" tabindex="-1"></a>fit <span class="ot">&lt;-</span> <span class="fu">eBayes</span>(fit)</span></code></pre></div>
-<p>Finally, the <code>topTable()</code> function is used the extract the results for
-the coefficient of interest.</p>
-<div class="sourceCode" id="cb417"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb417-1"><a href="sec-quant.html#cb417-1" tabindex="-1"></a>res <span class="ot">&lt;-</span></span>
-<span id="cb417-2"><a href="sec-quant.html#cb417-2" tabindex="-1"></a>    <span class="fu">topTable</span>(fit, <span class="at">coef =</span> <span class="st">"prots$condition6B"</span>, <span class="at">number =</span> <span class="cn">Inf</span>) <span class="sc">%&gt;%</span></span>
-<span id="cb417-3"><a href="sec-quant.html#cb417-3" tabindex="-1"></a>    <span class="fu">rownames_to_column</span>(<span class="st">"protein"</span>) <span class="sc">%&gt;%</span></span>
-<span id="cb417-4"><a href="sec-quant.html#cb417-4" tabindex="-1"></a>    <span class="fu">as_tibble</span>() <span class="sc">%&gt;%</span></span>
-<span id="cb417-5"><a href="sec-quant.html#cb417-5" tabindex="-1"></a>    <span class="fu">mutate</span>(<span class="at">TP =</span> <span class="fu">grepl</span>(<span class="st">"ups"</span>, protein))</span></code></pre></div>
-<p>Note the warning about partial <code>NA</code> coefficients for 23 probes:</p>
-<div class="sourceCode" id="cb418"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb418-1"><a href="sec-quant.html#cb418-1" tabindex="-1"></a>na_coefs <span class="ot">&lt;-</span></span>
-<span id="cb418-2"><a href="sec-quant.html#cb418-2" tabindex="-1"></a>    <span class="fu">filter</span>(res, <span class="fu">is.na</span>(t)) <span class="sc">%&gt;%</span></span>
-<span id="cb418-3"><a href="sec-quant.html#cb418-3" tabindex="-1"></a>    <span class="fu">pull</span>(protein)</span>
-<span id="cb418-4"><a href="sec-quant.html#cb418-4" tabindex="-1"></a><span class="fu">assay</span>(prots[na_coefs, ])</span></code></pre></div>
-<pre><code>##                                6A_7      6A_8       6A_9       6B_7       6B_8
-## P00167ups|CYB5_HUMAN_UPS        NaN       NaN        NaN -0.7840558 -2.0282987
-## P01112ups|RASH_HUMAN_UPS        NaN       NaN        NaN -1.5564896        NaN
-## P05413ups|FABPH_HUMAN_UPS       NaN       NaN        NaN -3.3419480        NaN
-## P08758ups|ANXA5_HUMAN_UPS       NaN       NaN        NaN -2.7973872 -2.0137585
-## sp|P06704|CDC31_YEAST           NaN       NaN        NaN -1.2032046 -2.1252371
-## sp|P25574|EMC1_YEAST      -1.506177 -1.983737 -0.7795009        NaN        NaN
-## sp|P32608|RTG2_YEAST            NaN       NaN        NaN        NaN -4.4424189
-## sp|P32769|HBS1_YEAST            NaN -1.384031 -0.7285780        NaN        NaN
-## sp|P34217|PIN4_YEAST            NaN       NaN        NaN -0.8378614 -0.1316397
-## sp|P34237|CASP_YEAST            NaN       NaN        NaN -1.5645172 -1.6600291
-## sp|P38166|SFT2_YEAST      -1.585685 -1.076707        NaN        NaN        NaN
-## sp|P40056|GET2_YEAST            NaN -1.091696 -1.4014211        NaN        NaN
-## sp|P40533|TED1_YEAST            NaN       NaN        NaN -2.0491876        NaN
-## sp|P43582|WWM1_YEAST            NaN       NaN        NaN -0.5538711 -0.7360990
-## sp|P46965|SPC1_YEAST            NaN -3.428771 -3.6321984        NaN        NaN
-## sp|P48363|PFD3_YEAST            NaN       NaN        NaN -0.1904905        NaN
-##                                 6B_9
-## P00167ups|CYB5_HUMAN_UPS  -1.1230809
-## P01112ups|RASH_HUMAN_UPS  -1.5618192
-## P05413ups|FABPH_HUMAN_UPS -3.8907081
-## P08758ups|ANXA5_HUMAN_UPS -2.0894752
-## sp|P06704|CDC31_YEAST     -1.5844104
-## sp|P25574|EMC1_YEAST             NaN
-## sp|P32608|RTG2_YEAST      -2.7873186
-## sp|P32769|HBS1_YEAST             NaN
-## sp|P34217|PIN4_YEAST      -0.1989392
-## sp|P34237|CASP_YEAST      -1.6877463
-## sp|P38166|SFT2_YEAST             NaN
-## sp|P40056|GET2_YEAST             NaN
-## sp|P40533|TED1_YEAST      -1.7474812
-## sp|P43582|WWM1_YEAST      -0.7207043
-## sp|P46965|SPC1_YEAST             NaN
-## sp|P48363|PFD3_YEAST      -0.5087747
-##  [ reached getOption("max.print") -- omitted 9 rows ]</code></pre>
-<p>We can now visualise the results using a volcano plot:</p>
-<div class="sourceCode" id="cb420"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb420-1"><a href="sec-quant.html#cb420-1" tabindex="-1"></a>res <span class="sc">%&gt;%</span></span>
-<span id="cb420-2"><a href="sec-quant.html#cb420-2" tabindex="-1"></a>    <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> logFC, <span class="at">y =</span> <span class="sc">-</span><span class="fu">log10</span>(adj.P.Val))) <span class="sc">+</span></span>
-<span id="cb420-3"><a href="sec-quant.html#cb420-3" tabindex="-1"></a>    <span class="fu">geom_point</span>(<span class="fu">aes</span>(<span class="at">colour =</span> TP)) <span class="sc">+</span></span>
-<span id="cb420-4"><a href="sec-quant.html#cb420-4" tabindex="-1"></a>    <span class="fu">geom_vline</span>(<span class="at">xintercept =</span> <span class="fu">c</span>(<span class="sc">-</span><span class="dv">1</span>, <span class="dv">1</span>)) <span class="sc">+</span></span>
-<span id="cb420-5"><a href="sec-quant.html#cb420-5" tabindex="-1"></a>    <span class="fu">geom_hline</span>(<span class="at">yintercept =</span> <span class="sc">-</span><span class="fu">log10</span>(<span class="fl">0.05</span>)) <span class="sc">+</span></span>
-<span id="cb420-6"><a href="sec-quant.html#cb420-6" tabindex="-1"></a>    <span class="fu">scale_color_manual</span>(<span class="at">values =</span> <span class="fu">c</span>(<span class="st">"black"</span>,<span class="st">"red"</span>))</span></code></pre></div>
-<pre><code>## Warning: Removed 25 rows containing missing values (`geom_point()`).</code></pre>
-<div class="figure">
-<span style="display:block;" id="fig:vp"></span>
-<p class="caption marginnote shownote">
-Figure 5.15: Volcano plot highlighing spiked-in proteins in red.
-</p>
-<img src="R4MS_files/figure-html/vp-1.png" alt="Volcano plot highlighing spiked-in proteins in red." width="672">
-</div>
-<p>Using the pipeline described above, we would would identify a single
-differentially expressed protein at an 5 percent FDR but miss out the
-other 36 expected spike-in proteins.</p>
-<p>We can assess our results in terms of true/false postitves/negatives:</p>
-<ul>
-<li>True positives: 1</li>
-<li>False positives: 0</li>
-<li>True negatives: 1330</li>
-<li>False negatives: 32</li>
-</ul>
-</div>
-</div>
-<div id="summary-exercice" class="section level2" number="5.5">
-<h2>
-<span class="header-section-number">5.5</span> Summary exercice<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('summary-exercice')" onmouseout="reset_tooltip('summary-exercice-tooltip')"><span class="tooltiptext" id="summary-exercice-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>As shown below, it is possible to substantially improve these results
-by aggregating features using a robust summarisation (available as
-<code>MsCoreUtils::robustSummary()</code>), i.e robust regression with
-M-estimation using Huber weights, as described in section 2.7 in
-<span class="citation">(<label for="tufte-mn-16" class="margin-toggle">⊕</label><input type="checkbox" id="tufte-mn-16" class="margin-toggle">Sticker et al. 2019<span class="marginnote">Sticker, Adriaan, Ludger Goeminne, Lennart Martens, and Lieven Clement. 2019. <span>“Robust Summarization and Inference in Proteome-Wide Label-Free Quantification.”</span> <em>bioRxiv</em>. <a href="https://doi.org/10.1101/668863">https://doi.org/10.1101/668863</a>.</span>)</span>.</p>
-<div class="figure">
-<span style="display:block;" id="fig:unnamed-chunk-76"></span>
-<p class="caption marginnote shownote">
-Figure 5.16: Aggregation using robust summarisation.
-</p>
-<img src="img/vp2.png" alt="Aggregation using robust summarisation." width="1048">
-</div>
-<ul>
-<li>True positives: 21</li>
-<li>False positives: 2</li>
-<li>True negatives: 1340</li>
-<li>False negatives: 12</li>
-</ul>
-<p>Repeat and adapt what we have seen here using, for example, the
-<code>robustSummary()</code> function.</p>
-
-</div>
-</div>
-<div class="footnotes">
-<hr>
-<ol start="7">
-<li id="fn7"><p>Still, it is
-recommended to explore missingness as part of the exploratory data
-analysis.<a href="sec-quant.html#fnref7" class="footnote-back">↩︎</a></p></li>
-</ol>
-</div>
-</body></html>
-
-<p style="text-align: center;">
-<a href="sec-id.html"><button class="btn btn-default">Previous</button></a>
-<a href="sec-anx.html"><button class="btn btn-default">Next</button></a>
-</p>
-<p class="build-date">Page built: 
-2023-09-06
- using 
-R version 4.3.1 Patched (2023-07-10 r84676)
-</p>
-</div>
-</div>
-
-
-
-</body>
-</html>
diff --git a/docs/sec-raw.html b/docs/sec-raw.html
deleted file mode 100644
index 16cf473..0000000
--- a/docs/sec-raw.html
+++ /dev/null
@@ -1,1104 +0,0 @@
-<!DOCTYPE html>
-<html lang="" xml:lang="">
-<head>
-
-<meta charset="utf-8" />
-<meta name="generator" content="pandoc" />
-<meta name="viewport" content="width=device-width, initial-scale=1" />
-<meta property="og:title" content="Chapter 3 Raw MS data | R for Mass Spectrometry" />
-<meta property="og:type" content="book" />
-
-
-
-
-<meta name="author" content="Laurent Gatto, Sebastian Gibb, Johannes Rainer" />
-
-<meta name="date" content="2023-09-06" />
-
-
-<meta name="description" content="Chapter 3 Raw MS data | R for Mass Spectrometry">
-
-<title>Chapter 3 Raw MS data | R for Mass Spectrometry</title>
-
-<link href="libs/tufte-css-2015.12.29/tufte.css" rel="stylesheet" />
-<link href="libs/tufte-css-2015.12.29/envisioned.css" rel="stylesheet" />
-<link href="libs/msmb-css-0/msmb.css" rel="stylesheet" />
-<script>
-function toggle_visibility(id1, id2) {
-var e = document.getElementById(id1);
-var f = document.getElementById(id2);
-
-e.style.display = ((e.style.display!='none') ? 'none' : 'block');
-
-if(f.classList.contains('fa-plus-square')) {
-    f.classList.add('fa-minus-square')
-    f.classList.remove('fa-plus-square')
-} else {
-    f.classList.add('fa-plus-square')
-    f.classList.remove('fa-minus-square')
-}
-
-}
-</script>
-<script>
-function copy_link(id) {
-  var dummy = document.createElement('input'),
-  text = window.location.href.split(/[?#]/)[0] + '#' + id;
-  document.body.appendChild(dummy);
-  dummy.value = text;
-  dummy.select();
-  document.execCommand('copy');
-  document.body.removeChild(dummy);
-  
-  var tooltip = document.getElementById(id + '-tooltip');
-  tooltip.innerHTML = 'Copied!';
-}
-
-function reset_tooltip(id) {
-  var tooltip = document.getElementById(id);
-  tooltip.innerHTML = 'Copy link';
-}
-</script>
-
-
-<style type="text/css">code{white-space: pre;}</style>
-<style type="text/css">
-pre > code.sourceCode { white-space: pre; position: relative; }
-pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
-pre > code.sourceCode > span:empty { height: 1.2em; }
-
-code.sourceCode > span { color: inherit; text-decoration: inherit; }
-div.sourceCode { margin: 1em 0; }
-pre.sourceCode { margin: 0; }
-@media screen {
-div.sourceCode { overflow: auto; }
-}
-@media print {
-pre > code.sourceCode { white-space: pre-wrap; }
-pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
-}
-pre.numberSource code
-  { counter-reset: source-line 0; }
-pre.numberSource code > span
-  { position: relative; left: -4em; counter-increment: source-line; }
-pre.numberSource code > span > a:first-child::before
-  { content: counter(source-line);
-    position: relative; left: -1em; text-align: right; vertical-align: baseline;
-    border: none; display: inline-block;
-    -webkit-touch-callout: none; -webkit-user-select: none;
-    -khtml-user-select: none; -moz-user-select: none;
-    -ms-user-select: none; user-select: none;
-    padding: 0 4px; width: 4em;
-    color: #aaaaaa;
-  }
-pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
-div.sourceCode
-  {  background-color: #f8f8f8; }
-@media screen {
-pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
-}
-code span.al { color: #ef2929; } /* Alert */
-code span.an { color: #8f5902; font-weight: bold; font-style: italic; } /* Annotation */
-code span.at { color: #204a87; } /* Attribute */
-code span.bn { color: #0000cf; } /* BaseN */
-code span.cf { color: #204a87; font-weight: bold; } /* ControlFlow */
-code span.ch { color: #4e9a06; } /* Char */
-code span.cn { color: #8f5902; } /* Constant */
-code span.co { color: #8f5902; font-style: italic; } /* Comment */
-code span.cv { color: #8f5902; font-weight: bold; font-style: italic; } /* CommentVar */
-code span.do { color: #8f5902; font-weight: bold; font-style: italic; } /* Documentation */
-code span.dt { color: #204a87; } /* DataType */
-code span.dv { color: #0000cf; } /* DecVal */
-code span.er { color: #a40000; font-weight: bold; } /* Error */
-code span.ex { } /* Extension */
-code span.fl { color: #0000cf; } /* Float */
-code span.fu { color: #204a87; font-weight: bold; } /* Function */
-code span.im { } /* Import */
-code span.in { color: #8f5902; font-weight: bold; font-style: italic; } /* Information */
-code span.kw { color: #204a87; font-weight: bold; } /* Keyword */
-code span.op { color: #ce5c00; font-weight: bold; } /* Operator */
-code span.ot { color: #8f5902; } /* Other */
-code span.pp { color: #8f5902; font-style: italic; } /* Preprocessor */
-code span.sc { color: #ce5c00; font-weight: bold; } /* SpecialChar */
-code span.ss { color: #4e9a06; } /* SpecialString */
-code span.st { color: #4e9a06; } /* String */
-code span.va { color: #000000; } /* Variable */
-code span.vs { color: #4e9a06; } /* VerbatimString */
-code span.wa { color: #8f5902; font-weight: bold; font-style: italic; } /* Warning */
-</style>
-
-
-<style type="text/css">
-/* Used with Pandoc 2.11+ new --citeproc when CSL is used */
-div.csl-bib-body { }
-div.csl-entry {
-  clear: both;
-}
-.hanging div.csl-entry {
-  margin-left:2em;
-  text-indent:-2em;
-}
-div.csl-left-margin {
-  min-width:2em;
-  float:left;
-}
-div.csl-right-inline {
-  margin-left:2em;
-  padding-left:1em;
-}
-div.csl-indent {
-  margin-left: 2em;
-}
-</style>
-
-<link rel="stylesheet" href="style.css" type="text/css" />
-
-</head>
-
-<body>
-
-
-
-<div class="row">
-<div class="col-sm-12">
-<div id="TOC">
-<ul class="navbar">
-<li class="msmb"><p class="title">R for Mass Spectrometry<p><p class="author">Laurent Gatto, Sebastian Gibb, Johannes Rainer</p>
-<li class="dropdown" style="float:right">
-<a href="javascript:void(0)" class="dropbtn">&#x25BE; Chapters</a>
-<div class="dropdown-content">
-<a href="index.html" id="toc-preamble"><span class="toc-section-number">1</span> Preamble</a>
-<a href="sec-msintro.html" id="toc-sec-msintro"><span class="toc-section-number">2</span> Introduction</a>
-<a id="active-page" href="sec-raw.html" id="toc-sec-raw"><span class="toc-section-number">3</span> Raw MS data</a><ul class="toc-sections">
-<li class="toc"><a href="#what-is-raw-data-in-r"> What is raw data in R</a></li>
-<li class="toc"><a href="#visualisation-of-raw-ms-data"> Visualisation of raw MS data</a></li>
-<li class="toc"><a href="#raw-data-processing-and-manipulation"> Raw data processing and manipulation</a></li>
-<li class="toc"><a href="#a-note-on-efficiency"> A note on efficiency</a></li>
-</ul>
-<a href="sec-id.html" id="toc-sec-id"><span class="toc-section-number">4</span> Identification data</a>
-<a href="sec-quant.html" id="toc-sec-quant"><span class="toc-section-number">5</span> Quantitative data</a>
-<a href="sec-anx.html" id="toc-sec-anx"><span class="toc-section-number">6</span> Annex</a>
-<a href="sec-si.html" id="toc-sec-si"><span class="toc-section-number">7</span> Additional materials and session information</a>
-</div>
-</li>
-</ul>
-</div>
-</div>
-</div>
-<div class="row">
-<div class="col-sm-12">
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
-<html><body>
-<div id="sec-raw" class="section level1" number="3">
-<h1>
-<span class="header-section-number">Chapter 3</span> Raw MS data</h1>
-<p>In this section, we will learn how to read raw data in one of the
-commonly used open formats (<code>mzML</code>, <code>mzXML</code>, <code>netCDF</code> or <code>mgf</code>) into
-R.</p>
-<div id="what-is-raw-data-in-r" class="section level2" number="3.1">
-<h2>
-<span class="header-section-number">3.1</span> What is raw data in R<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('what-is-raw-data-in-r')" onmouseout="reset_tooltip('what-is-raw-data-in-r-tooltip')"><span class="tooltiptext" id="what-is-raw-data-in-r-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>When we manipulate complex data, we need a way to abstract it.</p>
-<p>The abstraction saves us from having to know about all
-the details of that data <strong>and</strong> its associated metadata. In R, we
-think of MS data as illustrated on the figure below (taken from
-<span class="citation">(<label for="tufte-mn-5" class="margin-toggle">⊕</label><input type="checkbox" id="tufte-mn-5" class="margin-toggle">Gatto, Gibb, and Rainer 2020<span class="marginnote">Gatto, Laurent, Sebastian Gibb, and Johannes Rainer. 2020. <span>“<span>MSnbase</span>, Efficient and Elegant r-Based Processing and Visualisation of Raw Mass Spectrometry Data.”</span> <em>J. Proteome Res.</em>, September.</span>)</span>): a metadata table and a set of raw spectra. This allows
-to rely on a few easy-to-remember conventions to make mundane and
-repetitive tasks trivial and be able to complete more complex things
-easily. Abstractions provide a smoother approach to handle complex
-data using common patterns.</p>
-<div class="figure" style="text-align: center">
-<span style="display:block;" id="fig:unnamed-chunk-8"></span>
-<p class="caption marginnote shownote">
-Figure 3.1: Schematic representation of what is referred to by <em>raw data</em>: a collection of mass spectra and a table containing spectrum-level annotations along the lines. Raw data are imported from one of the many community-maintained open standards formats (mzML, mzXML, mzData or ANDI-MS/netCDF).
-</p>
-<img src="img/raw.png" alt="Schematic representation of what is referred to by *raw data*: a collection of mass spectra and a table containing spectrum-level annotations along the lines. Raw data are imported from one of the many community-maintained open standards formats (mzML, mzXML, mzData or ANDI-MS/netCDF)." width="100%">
-</div>
-<div id="the-spectra-class" class="section level3" number="3.1.1">
-<h3>
-<span class="header-section-number">3.1.1</span> The <code>Spectra</code> class<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('the-spectra-class')" onmouseout="reset_tooltip('the-spectra-class-tooltip')"><span class="tooltiptext" id="the-spectra-class-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>We are going to use the
-<a href="https://rformassspectrometry.github.io/Spectra/"><code>Spectra</code></a> package
-as an abstraction to raw mass spectrometry data.</p>
-<div class="sourceCode" id="cb29"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb29-1"><a href="sec-raw.html#cb29-1" tabindex="-1"></a><span class="fu">library</span>(Spectra)</span></code></pre></div>
-<p><code>Spectra</code> is part of the <a href="https://www.rformassspectrometry.org/">R for Mass Spectrometry
-initiative</a>. It
-defines the <code>Spectra</code> class that is used as a raw data abstraction, to
-manipulate MS data and metadata. The best way to learn about a data
-structure is to create one by hand.</p>
-<p>Let’s create a <code>DataFrame</code><a href="#fn4" class="footnote-ref" id="fnref4"><sup>4</sup></a> containing MS levels, retention time, m/z and intensities
-for 2 spectra:</p>
-<div class="sourceCode" id="cb30"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb30-1"><a href="sec-raw.html#cb30-1" tabindex="-1"></a>spd <span class="ot">&lt;-</span> <span class="fu">DataFrame</span>(<span class="at">msLevel =</span> <span class="fu">c</span>(1L, 2L), <span class="at">rtime =</span> <span class="fu">c</span>(<span class="fl">1.1</span>, <span class="fl">1.2</span>))</span>
-<span id="cb30-2"><a href="sec-raw.html#cb30-2" tabindex="-1"></a>spd<span class="sc">$</span>mz <span class="ot">&lt;-</span> <span class="fu">list</span>(<span class="fu">c</span>(<span class="dv">100</span>, <span class="fl">103.2</span>, <span class="fl">104.3</span>, <span class="fl">106.5</span>), <span class="fu">c</span>(<span class="fl">45.6</span>, <span class="fl">120.4</span>, <span class="fl">190.2</span>))</span>
-<span id="cb30-3"><a href="sec-raw.html#cb30-3" tabindex="-1"></a>spd<span class="sc">$</span>intensity <span class="ot">&lt;-</span> <span class="fu">list</span>(<span class="fu">c</span>(<span class="dv">200</span>, <span class="dv">400</span>, <span class="fl">34.2</span>, <span class="dv">17</span>), <span class="fu">c</span>(<span class="fl">12.3</span>, <span class="fl">15.2</span>, <span class="fl">6.8</span>))</span>
-<span id="cb30-4"><a href="sec-raw.html#cb30-4" tabindex="-1"></a>spd</span></code></pre></div>
-<pre><code>## DataFrame with 2 rows and 4 columns
-##     msLevel     rtime                    mz             intensity
-##   &lt;integer&gt; &lt;numeric&gt;                &lt;list&gt;                &lt;list&gt;
-## 1         1       1.1 100.0,103.2,104.3,... 200.0,400.0, 34.2,...
-## 2         2       1.2      45.6,120.4,190.2        12.3,15.2, 6.8</code></pre>
-<p>And now convert this <code>DataFrame</code> into a <code>Spectra</code> object:</p>
-<div class="sourceCode" id="cb32"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb32-1"><a href="sec-raw.html#cb32-1" tabindex="-1"></a>sp0 <span class="ot">&lt;-</span> <span class="fu">Spectra</span>(spd)</span>
-<span id="cb32-2"><a href="sec-raw.html#cb32-2" tabindex="-1"></a>sp0</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 2 spectra in a MsBackendMemory backend:
-##     msLevel     rtime scanIndex
-##   &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1         1       1.1        NA
-## 2         2       1.2        NA
-##  ... 16 more variables/columns.</code></pre>
-<div id="exercise" class="section level4 unnumbered">
-<h4>Exercise</h4>
-<p>Explore the newly created object using</p>
-<ul>
-<li>
-<code>spectraVariables</code> to extract all the metadata variables. Compare these to the
-spectra variables available from the previous example.</li>
-<li>
-<code>spectraData</code> to extract all the metadata.</li>
-<li>
-<code>peaksData</code> to extract a list containing the raw data.</li>
-<li>
-<code>[</code> to create subsets.</li>
-</ul>
-</div>
-</div>
-<div id="spectra-from-mzml-files" class="section level3" number="3.1.2">
-<h3>
-<span class="header-section-number">3.1.2</span> <code>Spectra</code> from mzML files<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('spectra-from-mzml-files')" onmouseout="reset_tooltip('spectra-from-mzml-files-tooltip')"><span class="tooltiptext" id="spectra-from-mzml-files-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Let’s now create a new object using the mzML data previously
-downloaded and available in the <code>mzf</code> file.</p>
-<div class="sourceCode" id="cb34"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb34-1"><a href="sec-raw.html#cb34-1" tabindex="-1"></a>mzf</span></code></pre></div>
-<pre><code>## [1] "/home/lgatto/.cache/R/rpx/8ee512042c5ff_TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML"</code></pre>
-<div class="sourceCode" id="cb36"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb36-1"><a href="sec-raw.html#cb36-1" tabindex="-1"></a>sp <span class="ot">&lt;-</span> <span class="fu">Spectra</span>(mzf)</span>
-<span id="cb36-2"><a href="sec-raw.html#cb36-2" tabindex="-1"></a>sp</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 7534 spectra in a MsBackendMzR backend:
-##        msLevel     rtime scanIndex
-##      &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1            1    0.4584         1
-## 2            1    0.9725         2
-## 3            1    1.8524         3
-## 4            1    2.7424         4
-## 5            1    3.6124         5
-## ...        ...       ...       ...
-## 7530         2   3600.47      7530
-## 7531         2   3600.83      7531
-## 7532         2   3601.18      7532
-## 7533         2   3601.57      7533
-## 7534         2   3601.98      7534
-##  ... 33 more variables/columns.
-## 
-## file(s):
-## 8ee512042c5ff_TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML</code></pre>
-<div id="exercise-1" class="section level4 unnumbered">
-<h4>Exercise</h4>
-<ul>
-<li>Repeat the data manipulations above.</li>
-<li>Check the number of scans in the object with <code>length()</code>.</li>
-<li>Note the difference in the first line when showing the object in the
-console. We will get back to this idea of backend later.</li>
-</ul>
-<p>Mass spectrometry data in <code>Spectra</code> objects can be thought of as a
-list of individual spectra, with each spectrum having a set of
-variables associated with it. Besides <em>core</em> spectra variables (such
-as MS level or retention time) an arbitrary number of optional
-variables can be assigned to a spectrum. The core spectra variables
-all have their own accessor method and it is guaranteed that a value
-is returned by it (or <code>NA</code> if the information is not available). The
-core variables and their data type are (alphabetically ordered):</p>
-<ul>
-<li>
-<em>acquisitionNum</em> <code>integer(1)</code>: the index of acquisition of a
-spectrum during a MS run.</li>
-<li>
-<em>centroided</em> <code>logical(1)</code>: whether the spectrum is in profile or
-centroid mode.</li>
-<li>
-<em>collisionEnergy</em> <code>numeric(1)</code>: collision energy used to create an
-MSn spectrum.</li>
-<li>
-<em>dataOrigin</em> <code>character(1)</code>: the <em>origin</em> of the spectrum’s data,
-e.g. the mzML file from which it was read.</li>
-<li>
-<em>dataStorage</em> <code>character(1)</code>: the (current) storage location of the
-spectrum data. This value depends on the backend used to handle and
-provide the data. For an <em>in-memory</em> backend like the
-<code>MsBackendMemory</code> this will be <code>"&lt;memory&gt;"</code>, for an on-disk
-backend such as the <code>MsBackendHdf5Peaks</code> it will be the name of the
-HDF5 file where the spectrum’s peak data is stored.</li>
-<li>
-<em>intensity</em> <code>numeric</code>: intensity values for the spectrum’s peaks.</li>
-<li>
-<em>isolationWindowLowerMz</em> <code>numeric(1)</code>: lower m/z for the isolation
-window in which the (MSn) spectrum was measured.</li>
-<li>
-<em>isolationWindowTargetMz</em> <code>numeric(1)</code>: the target m/z for the
-isolation window in which the (MSn) spectrum was measured.</li>
-<li>
-<em>isolationWindowUpperMz</em> <code>numeric(1)</code>: upper m/z for the isolation
-window in which the (MSn) spectrum was measured.</li>
-<li>
-<em>msLevel</em> <code>integer(1)</code>: the MS level of the spectrum.</li>
-<li>
-<em>mz</em> <code>numeric</code>: the m/z values for the spectrum’s peaks.</li>
-<li>
-<em>polarity</em> <code>integer(1)</code>: the polarity of the spectrum (<code>0</code> and <code>1</code>
-representing negative and positive polarity, respectively).</li>
-<li>
-<em>precScanNum</em> <code>integer(1)</code>: the scan (acquisition) number of the
-precursor for an MSn spectrum.</li>
-<li>
-<em>precursorCharge</em> <code>integer(1)</code>: the charge of the precursor of an
-MSn spectrum.</li>
-<li>
-<em>precursorIntensity</em> <code>numeric(1)</code>: the intensity of the precursor of
-an MSn spectrum.</li>
-<li>
-<em>precursorMz</em> <code>numeric(1)</code>: the m/z of the precursor of an MSn
-spectrum.</li>
-<li>
-<em>rtime</em> <code>numeric(1)</code>: the retention time of a spectrum.</li>
-<li>
-<em>scanIndex</em> <code>integer(1)</code>: the index of a spectrum within a (raw)
-file.</li>
-<li>
-<em>smoothed</em> <code>logical(1)</code>: whether the spectrum was smoothed.</li>
-</ul>
-<p>For details on the individual variables and their getter/setter
-function see the help for <code>Spectra</code> (<code>?Spectra</code>). Also note that these
-variables are suggested, but not required to characterize a
-spectrum. Also, some only make sense for MSn, but not for MS1 spectra.</p>
-<p>In addition to the core spectra variables it is also possible to add additional
-spectra variables to a <code>Spectra</code> object. As an example we add below a spectra
-variable representing the retention times in minutes to the object. This
-information can then be extracted again using the <code>$</code> notation (similar to
-accessing a column in a <code>data.frame</code>, i.e., <code>$</code> and the name of the spectra
-variable).</p>
-<div class="sourceCode" id="cb38"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb38-1"><a href="sec-raw.html#cb38-1" tabindex="-1"></a>sp<span class="sc">$</span>rtime_minute <span class="ot">&lt;-</span> <span class="fu">rtime</span>(sp) <span class="sc">/</span> <span class="dv">60</span></span>
-<span id="cb38-2"><a href="sec-raw.html#cb38-2" tabindex="-1"></a>sp<span class="sc">$</span>rtime_minute <span class="sc">|&gt;</span> <span class="fu">head</span>()</span></code></pre></div>
-<pre><code>## [1] 0.00764000 0.01620833 0.03087333 0.04570667 0.06020667 0.07487500</code></pre>
-</div>
-<div id="exercise-2" class="section level4 unnumbered">
-<h4>Exercise</h4>
-<ul>
-<li>Extract a set of spectra variables using the accessor (for example
-<code>msLevel(.)</code>) or using the <code>$</code> notation (for example <code>.$msLevel</code>).</li>
-<li>How many MS level are there, and how many scans of each level?</li>
-<li>Extract the index of the MS2 spectrum with the highest base peak
-intensity.</li>
-<li>Are the data centroided or in profile mode?</li>
-<li>Pick a spectrum of each level and visually check whether it is
-centroided or in profile mode. You can use the <code>plotSpectra()</code>
-function to visualise peaks and set the m/z range with the <code>xlim</code>
-arguments.</li>
-</ul>
-</div>
-<div id="exercise-3" class="section level4 unnumbered">
-<h4>Exercise</h4>
-<p>Using the first raw data file starting with <code>MS3TMT10</code>, answer the
-following questions:</p>
-<ul>
-<li>How many spectra are there in that file?</li>
-<li>How many MS levels, and how many spectra per MS level?</li>
-<li>What is the index of the MS2 spectrum with the highest precursor
-intensity?</li>
-<li>Plot one spectrum of each level. Are they centroided or in profile
-mode?</li>
-</ul>
-<p>These objects and their manipulations are not limited to single files or
-samples. Below we load data from two mzML files. The MS data from both files in
-the <code>Spectra</code> is organized linearly (first all spectra from the first file
-and then from the second). The <code>dataOrigin</code> function can be used to identify
-spectra from the different data files.</p>
-<div class="sourceCode" id="cb40"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb40-1"><a href="sec-raw.html#cb40-1" tabindex="-1"></a>(fls <span class="ot">&lt;-</span> <span class="fu">dir</span>(<span class="fu">system.file</span>(<span class="st">"sciex"</span>, <span class="at">package =</span> <span class="st">"msdata"</span>), <span class="at">full.names =</span> <span class="cn">TRUE</span>))</span></code></pre></div>
-<pre><code>## [1] "/home/lgatto/disk/R/x86_64-pc-linux-gnu-library/4.3/msdata/sciex/20171016_POOL_POS_1_105-134.mzML"
-## [2] "/home/lgatto/disk/R/x86_64-pc-linux-gnu-library/4.3/msdata/sciex/20171016_POOL_POS_3_105-134.mzML"</code></pre>
-<div class="sourceCode" id="cb42"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb42-1"><a href="sec-raw.html#cb42-1" tabindex="-1"></a>sp_sciex <span class="ot">&lt;-</span> <span class="fu">Spectra</span>(fls)</span>
-<span id="cb42-2"><a href="sec-raw.html#cb42-2" tabindex="-1"></a><span class="fu">table</span>(<span class="fu">dataOrigin</span>(sp_sciex))</span></code></pre></div>
-<pre><code>## 
-## /home/lgatto/disk/R/x86_64-pc-linux-gnu-library/4.3/msdata/sciex/20171016_POOL_POS_1_105-134.mzML 
-##                                                                                               931 
-## /home/lgatto/disk/R/x86_64-pc-linux-gnu-library/4.3/msdata/sciex/20171016_POOL_POS_3_105-134.mzML 
-##                                                                                               931</code></pre>
-</div>
-</div>
-<div id="backends" class="section level3" number="3.1.3">
-<h3>
-<span class="header-section-number">3.1.3</span> Backends<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('backends')" onmouseout="reset_tooltip('backends-tooltip')"><span class="tooltiptext" id="backends-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Backends allow to use different <em>backends</em> to store mass spectrometry data while
-providing <em>via</em> the <code>Spectra</code> class a unified interface to use that data. With
-the <code>setBackend</code> function it is possible to change between different backends
-and hence different data representations. The <code>Spectra</code> package defines a set of
-example backends but any object extending the base <code>MsBackend</code> class could be
-used instead. The default backends are:</p>
-<ul>
-<li>
-<code>MsBackendMzR</code>: this backend keeps only general spectra variables in memory
-and relies on the <em><a href="https://bioconductor.org/packages/3.17/mzR">mzR</a></em> package to read mass peaks (m/z and
-intensity values) from the original MS files on-demand.</li>
-</ul>
-<div class="sourceCode" id="cb44"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb44-1"><a href="sec-raw.html#cb44-1" tabindex="-1"></a>sp_sciex</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 1862 spectra in a MsBackendMzR backend:
-##        msLevel     rtime scanIndex
-##      &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1            1     0.280         1
-## 2            1     0.559         2
-## 3            1     0.838         3
-## 4            1     1.117         4
-## 5            1     1.396         5
-## ...        ...       ...       ...
-## 1858         1   258.636       927
-## 1859         1   258.915       928
-## 1860         1   259.194       929
-## 1861         1   259.473       930
-## 1862         1   259.752       931
-##  ... 33 more variables/columns.
-## 
-## file(s):
-## 20171016_POOL_POS_1_105-134.mzML
-## 20171016_POOL_POS_3_105-134.mzML</code></pre>
-<ul>
-<li>
-<code>MsBackendMemory</code> and <code>MsBackendDataFrame</code>: the full mass spectrometry data is
-stored (in-memory) within the object. Keeping the data in memory guarantees
-high performance but has also, depending on the number of mass peaks in each
-spectrum, a much higher memory footprint.</li>
-</ul>
-<div class="sourceCode" id="cb46"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb46-1"><a href="sec-raw.html#cb46-1" tabindex="-1"></a><span class="fu">setBackend</span>(sp_sciex, <span class="fu">MsBackendMemory</span>())</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 1862 spectra in a MsBackendMemory backend:
-##        msLevel     rtime scanIndex
-##      &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1            1     0.280         1
-## 2            1     0.559         2
-## 3            1     0.838         3
-## 4            1     1.117         4
-## 5            1     1.396         5
-## ...        ...       ...       ...
-## 1858         1   258.636       927
-## 1859         1   258.915       928
-## 1860         1   259.194       929
-## 1861         1   259.473       930
-## 1862         1   259.752       931
-##  ... 33 more variables/columns.
-## Processing:
-##  Switch backend from MsBackendMzR to MsBackendMemory [Wed Sep  6 11:50:11 2023]</code></pre>
-<ul>
-<li>
-<code>MsBackendHdf5Peaks</code>: similar to <code>MsBackendMzR</code> this backend reads peak data
-only on-demand from disk while all other spectra variables are kept in
-memory. The peak data are stored in Hdf5 files which guarantees scalability.</li>
-</ul>
-<p>With the example below we load the data from a single mzML file and use a
-<code>MsBackendHdf5Peaks</code> backend for data storage. The <code>hdf5path</code> parameter allows
-us to specify the storage location of the HDF5 file.</p>
-<div class="sourceCode" id="cb48"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb48-1"><a href="sec-raw.html#cb48-1" tabindex="-1"></a>sp_hdf5 <span class="ot">&lt;-</span> <span class="fu">setBackend</span>(sp_sciex, <span class="fu">MsBackendHdf5Peaks</span>(), <span class="at">hdf5path =</span> <span class="fu">tempdir</span>())</span>
-<span id="cb48-2"><a href="sec-raw.html#cb48-2" tabindex="-1"></a>sp_hdf5</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 1862 spectra in a MsBackendHdf5Peaks backend:
-##        msLevel     rtime scanIndex
-##      &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1            1     0.280         1
-## 2            1     0.559         2
-## 3            1     0.838         3
-## 4            1     1.117         4
-## 5            1     1.396         5
-## ...        ...       ...       ...
-## 1858         1   258.636       927
-## 1859         1   258.915       928
-## 1860         1   259.194       929
-## 1861         1   259.473       930
-## 1862         1   259.752       931
-##  ... 33 more variables/columns.
-## 
-## file(s):
-##  20171016_POOL_POS_1_105-134.h5
-##  20171016_POOL_POS_3_105-134.h5
-## Processing:
-##  Switch backend from MsBackendMzR to MsBackendHdf5Peaks [Wed Sep  6 11:50:18 2023]</code></pre>
-<div class="sourceCode" id="cb50"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb50-1"><a href="sec-raw.html#cb50-1" tabindex="-1"></a><span class="fu">table</span>(sp_hdf5<span class="sc">$</span>dataOrigin)</span></code></pre></div>
-<pre><code>## 
-## /home/lgatto/disk/R/x86_64-pc-linux-gnu-library/4.3/msdata/sciex/20171016_POOL_POS_1_105-134.mzML 
-##                                                                                               931 
-## /home/lgatto/disk/R/x86_64-pc-linux-gnu-library/4.3/msdata/sciex/20171016_POOL_POS_3_105-134.mzML 
-##                                                                                               931</code></pre>
-<div class="sourceCode" id="cb52"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb52-1"><a href="sec-raw.html#cb52-1" tabindex="-1"></a><span class="fu">table</span>(sp_hdf5<span class="sc">$</span>dataStorage)</span></code></pre></div>
-<pre><code>## 
-## /tmp/RtmphN3t3B/20171016_POOL_POS_1_105-134.h5 
-##                                            931 
-## /tmp/RtmphN3t3B/20171016_POOL_POS_3_105-134.h5 
-##                                            931</code></pre>
-<p>All of the above mentioned backends support changing all of their their spectra
-variables, <strong>except</strong> the <code>MsBackendMzR</code> that does not support changing m/z or
-intensity values for the mass peaks.</p>
-<p>Next to these default backends there are a set of other backend implementations
-provided by additional R packages. The
-<a href="https://rformassspectrometry.github.io/MsBackendSql"><code>MsBackendSql</code></a> for
-example allows to store (and retrieve) all MS data in (from) an SQL database
-guaranteeing thus a minimal memory footprint.</p>
-<p>Other backends focus on specific file formats such as
-<a href="https://rformassspectrometry.github.io/MsBackendMgf/"><code>MsBackendMgf</code></a> for files
-in <code>mgf</code> file format or on specific acquisitions such as
-<a href="https://rformassspectrometry.github.io/MsBackendTimsTof/"><code>MsBackendTimsTof</code></a>
-or provide access to certain MS data resources such as the
-<a href="https://rformassspectrometry.github.io/MsBackendMassbank/"><code>MsBackendMassbank</code></a>.
-Additional backends are being developed to address specific needs or
-technologies, while remaining compliant with the <code>Spectra</code> interface.</p>
-<p>If you would like to learn more about how the raw MS formats are
-handled by <code>Spectra</code> via the <em><a href="https://bioconductor.org/packages/3.17/mzR">mzR</a></em> package,
-check out the <a href="sec-anx.html#sec-raw2">6.1</a> section in the annex.</p>
-<p>See also <a href="https://jorainer.github.io/SpectraTutorials/articles/Spectra-backends.html">Spectra
-backends</a>
-for more information on different backends, their properties and
-advantages/disadvantages.</p>
-</div>
-</div>
-<div id="visualisation-of-raw-ms-data" class="section level2" number="3.2">
-<h2>
-<span class="header-section-number">3.2</span> Visualisation of raw MS data<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('visualisation-of-raw-ms-data')" onmouseout="reset_tooltip('visualisation-of-raw-ms-data-tooltip')"><span class="tooltiptext" id="visualisation-of-raw-ms-data-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>The importance of flexible access to specialised data becomes visible
-in the figure below (taken from the <code>RforProteomics</code> <a href="http://bioconductor.org/packages/release/data/experiment/vignettes/RforProteomics/inst/doc/RProtVis.html">visualisation
-vignette</a>).
-Not only can we access specific data and understand/visualise them,
-but we can transverse all the data and extract/visualise/understand
-structured slices of data.</p>
-<p>The figure below shows an illustration of how mass spectrometry
-works:</p>
-<ol style="list-style-type: decimal">
-<li><p>The chromatogram at the top displays the total ion current along the
-retention time. The vertical line identifies one scan in particular
-at retention time 1800.68 seconds (the 2807th scan).</p></li>
-<li><p>The spectra on the second line represent the full MS1 spectrum
-marked by the red line. The vertical lines identify the 10
-precursor ions that where selected for MS2 analysis. The zoomed in
-on the right shows one specific precursor peak.</p></li>
-<li><p>The MS2 spectra displayed along the two rows at the bottom are
-those resulting from the fragmentation of the 10 precursor peaks
-identified by the vertical bars above.</p></li>
-</ol>
-<p><img src="img/msvisfig.png" width="100%" style="display: block; margin: auto;"></p>
-<p>We are going to reproduce the figure above through a set of exercices.</p>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ol style="list-style-type: decimal">
-<li>The chromatogram can be created by extracting the <code>totIonCurrent</code>
-and <code>rtime</code> variables for all MS1 spectra. Annotate the spectrum of
-interest.</li>
-</ol>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-1" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-1', 'sol-start-1')"></span>
-</p>
-<div id="sol-body-1" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb54"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb54-1"><a href="sec-raw.html#cb54-1" tabindex="-1"></a><span class="fu">with</span>(<span class="fu">spectraData</span>(<span class="fu">filterMsLevel</span>(sp, <span class="dv">1</span>)),</span>
-<span id="cb54-2"><a href="sec-raw.html#cb54-2" tabindex="-1"></a>     <span class="fu">plot</span>(rtime, totIonCurrent, <span class="at">type =</span> <span class="st">"l"</span>))</span>
-<span id="cb54-3"><a href="sec-raw.html#cb54-3" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">v =</span> <span class="fu">rtime</span>(sp)[<span class="dv">2807</span>], <span class="at">col =</span> <span class="st">"red"</span>)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-12-1.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ol start="2" style="list-style-type: decimal">
-<li>The <code>filterPrecursorScan()</code> function can be used to retain a set
-parent (MS1) and children scans (MS2), as defined by an acquisition
-number. Use it to extract the MS1 scan of interest and all its MS2
-children.</li>
-</ol>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-2" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-2', 'sol-start-2')"></span>
-</p>
-<div id="sol-body-2" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb55"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb55-1"><a href="sec-raw.html#cb55-1" tabindex="-1"></a>ms_2 <span class="ot">&lt;-</span> <span class="fu">filterPrecursorScan</span>(sp, <span class="dv">2807</span>)</span>
-<span id="cb55-2"><a href="sec-raw.html#cb55-2" tabindex="-1"></a>ms_2</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 11 spectra in a MsBackendMzR backend:
-##      msLevel     rtime scanIndex
-##    &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1          1   1800.68      2807
-## 2          2   1801.26      2808
-## 3          2   1801.92      2809
-## 4          2   1802.20      2810
-## 5          2   1802.48      2811
-## 6          2   1802.77      2812
-## 7          2   1803.05      2813
-## 8          2   1803.34      2814
-## 9          2   1803.64      2815
-## 10         2   1803.93      2816
-## 11         2   1804.21      2817
-##  ... 34 more variables/columns.
-## 
-## file(s):
-## 8ee512042c5ff_TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML
-## Processing:
-##  Filter: select parent/children scans for 2807 [Wed Sep  6 11:50:18 2023]</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ol start="3" style="list-style-type: decimal">
-<li>Plot the MS1 spectrum of interest and highlight all the peaks that
-will be selected for MS2 analysis.</li>
-</ol>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-3" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-3', 'sol-start-3')"></span>
-</p>
-<div id="sol-body-3" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb57"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb57-1"><a href="sec-raw.html#cb57-1" tabindex="-1"></a><span class="fu">plotSpectra</span>(sp[<span class="dv">2807</span>], <span class="at">xlim =</span> <span class="fu">c</span>(<span class="dv">400</span>, <span class="dv">1000</span>))</span>
-<span id="cb57-2"><a href="sec-raw.html#cb57-2" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">v =</span> <span class="fu">precursorMz</span>(ms_2)[<span class="sc">-</span><span class="dv">1</span>], <span class="at">col =</span> <span class="st">"grey"</span>)</span>
-<span id="cb57-3"><a href="sec-raw.html#cb57-3" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">v =</span> <span class="fu">precursorMz</span>(ms_2)[<span class="dv">2</span>], <span class="at">col =</span> <span class="st">"red"</span>)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-14-1.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ol start="4" style="list-style-type: decimal">
-<li>Zoom in mz values 521.1 and 522.5 to reveal the isotopic envelope
-of that peak.</li>
-</ol>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-4" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-4', 'sol-start-4')"></span>
-</p>
-<div id="sol-body-4" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb58"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb58-1"><a href="sec-raw.html#cb58-1" tabindex="-1"></a><span class="fu">plotSpectra</span>(sp[<span class="dv">2807</span>], <span class="at">xlim =</span> <span class="fu">c</span>(<span class="fl">521.2</span>, <span class="fl">522.5</span>), <span class="at">type =</span> <span class="st">"l"</span>)</span>
-<span id="cb58-2"><a href="sec-raw.html#cb58-2" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">v =</span> <span class="fu">precursorMz</span>(ms_2)[<span class="dv">2</span>], <span class="at">col =</span> <span class="st">"red"</span>)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-15-1.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<ol start="5" style="list-style-type: decimal">
-<li>The <code>plotSpectra()</code> function is used to plot all 10 MS2 spectra in
-one call.</li>
-</ol>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-5" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-5', 'sol-start-5')"></span>
-</p>
-<div id="sol-body-5" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb59"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb59-1"><a href="sec-raw.html#cb59-1" tabindex="-1"></a><span class="fu">plotSpectra</span>(ms_2[<span class="sc">-</span><span class="dv">1</span>])</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-16-1.png" width="768"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<p>It is possible to label the peaks with the <code>plotSpectra()</code>
-function. The <code>labels</code> argument is either a <code>character</code> of appropriate
-length (i.e. with a label for each peak) or, as illustrated below, a
-function that computes the labels.</p>
-<div class="sourceCode" id="cb60"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb60-1"><a href="sec-raw.html#cb60-1" tabindex="-1"></a>mzLabel <span class="ot">&lt;-</span> <span class="cf">function</span>(z) {</span>
-<span id="cb60-2"><a href="sec-raw.html#cb60-2" tabindex="-1"></a>    z <span class="ot">&lt;-</span> <span class="fu">peaksData</span>(z)[[1L]]</span>
-<span id="cb60-3"><a href="sec-raw.html#cb60-3" tabindex="-1"></a>    lbls <span class="ot">&lt;-</span> <span class="fu">format</span>(z[, <span class="st">"mz"</span>], <span class="at">digits =</span> <span class="dv">4</span>)</span>
-<span id="cb60-4"><a href="sec-raw.html#cb60-4" tabindex="-1"></a>    lbls[z[, <span class="st">"intensity"</span>] <span class="sc">&lt;</span> <span class="fl">1e5</span>] <span class="ot">&lt;-</span> <span class="st">""</span></span>
-<span id="cb60-5"><a href="sec-raw.html#cb60-5" tabindex="-1"></a>    lbls</span>
-<span id="cb60-6"><a href="sec-raw.html#cb60-6" tabindex="-1"></a>}</span>
-<span id="cb60-7"><a href="sec-raw.html#cb60-7" tabindex="-1"></a></span>
-<span id="cb60-8"><a href="sec-raw.html#cb60-8" tabindex="-1"></a><span class="fu">plotSpectra</span>(ms_2[<span class="dv">7</span>],</span>
-<span id="cb60-9"><a href="sec-raw.html#cb60-9" tabindex="-1"></a>            <span class="at">xlim =</span> <span class="fu">c</span>(<span class="dv">126</span>, <span class="dv">132</span>),</span>
-<span id="cb60-10"><a href="sec-raw.html#cb60-10" tabindex="-1"></a>            <span class="at">labels =</span> mzLabel,</span>
-<span id="cb60-11"><a href="sec-raw.html#cb60-11" tabindex="-1"></a>            <span class="at">labelSrt =</span> <span class="sc">-</span><span class="dv">30</span>, <span class="at">labelPos =</span> <span class="dv">2</span>,</span>
-<span id="cb60-12"><a href="sec-raw.html#cb60-12" tabindex="-1"></a>            <span class="at">labelOffset =</span> <span class="fl">0.1</span>)</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-17-1.png" width="672"></p>
-<p>Spectra can also be compared either by overlay or mirror plotting
-using the <code>plotSpectraOverlay()</code> and <code>plotSpectraMirror()</code> functions.</p>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Filter MS2 level spectra and find any 2 MS2 spectra that have matching
-precursor peaks based on the precursor m/z values.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-6" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-6', 'sol-start-6')"></span>
-</p>
-<div id="sol-body-6" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb61"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb61-1"><a href="sec-raw.html#cb61-1" tabindex="-1"></a>sp2 <span class="ot">&lt;-</span> <span class="fu">filterMsLevel</span>(sp, 2L)</span>
-<span id="cb61-2"><a href="sec-raw.html#cb61-2" tabindex="-1"></a><span class="fu">anyDuplicated</span>(<span class="fu">precursorMz</span>(<span class="fu">filterMsLevel</span>(sp, <span class="dv">2</span>)))</span></code></pre></div>
-<pre><code>## [1] 37</code></pre>
-<div class="sourceCode" id="cb63"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb63-1"><a href="sec-raw.html#cb63-1" tabindex="-1"></a>i <span class="ot">&lt;-</span> <span class="fu">which</span>(<span class="fu">precursorMz</span>(sp2) <span class="sc">==</span> <span class="fu">precursorMz</span>(sp2)[<span class="dv">37</span>])</span>
-<span id="cb63-2"><a href="sec-raw.html#cb63-2" tabindex="-1"></a>sp2i <span class="ot">&lt;-</span> sp2[i]</span></code></pre></div>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Visualise the matching pair using the <code>plotSpectraOverlay()</code> and
-<code>plotSpectraMirror()</code> functions.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-7" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-7', 'sol-start-7')"></span>
-</p>
-<div id="sol-body-7" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb64"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb64-1"><a href="sec-raw.html#cb64-1" tabindex="-1"></a><span class="fu">plotSpectraOverlay</span>(sp2i, <span class="at">col =</span> <span class="fu">c</span>(<span class="st">"red"</span>, <span class="st">"steelblue"</span>))</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-19-1.png" width="672"></p>
-<div class="sourceCode" id="cb65"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb65-1"><a href="sec-raw.html#cb65-1" tabindex="-1"></a><span class="fu">plotSpectraMirror</span>(sp2i[<span class="dv">1</span>], sp2i[<span class="dv">2</span>])</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-20-1.png" width="672"></p>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<p>It is also possible to explore raw data interactively with the
-<a href="https://rformassspectrometry.github.io/SpectraVis/"><code>SpectraVis</code>
-package</a>:</p>
-<ul>
-<li><p>The
-<a href="https://rformassspectrometry.github.io/SpectraVis/reference/browseSpectra.html"><code>browseSpectra()</code></a>
-function opens a simple shiny application that allows to browse
-through the individual scans of a Spectra object.</p></li>
-<li><p>The
-<a href="https://rformassspectrometry.github.io/SpectraVis/reference/plotlySpectra.html"><code>plotlySpectra()</code></a>
-function displays a single spectrum using
-<a href="https://plotly.com/r/"><code>plotly</code></a> allowing to explore (zooming,
-panning) the spectrum interactively.</p></li>
-</ul>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Test the <code>SpectraVis</code> function on some the <code>Spectra</code> objects produce
-above.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-</div>
-<div id="raw-data-processing-and-manipulation" class="section level2" number="3.3">
-<h2>
-<span class="header-section-number">3.3</span> Raw data processing and manipulation<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('raw-data-processing-and-manipulation')" onmouseout="reset_tooltip('raw-data-processing-and-manipulation-tooltip')"><span class="tooltiptext" id="raw-data-processing-and-manipulation-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>Apart from <em>classical</em> subsetting operations such as <code>[</code> and <code>split</code>, a set of
-filter functions are defined for <code>Spectra</code> objects that filter/reduce the number
-of spectra within the object (for detailed help please see the <code>?Spectra</code> help):</p>
-<ul>
-<li>
-<code>filterAcquisitionNum</code>: retains spectra with certain acquisition numbers.</li>
-<li>
-<code>filterDataOrigin</code>: subsets to spectra from specific origins.</li>
-<li>
-<code>filterDataStorage</code>: subsets to spectra from certain data storage files.</li>
-<li>
-<code>filterEmptySpectra</code>: removes spectra without mass peaks.</li>
-<li>
-<code>filterMzRange</code>: subsets spectra keeping only peaks with an m/z within the
-provided m/z range.</li>
-<li>
-<code>filterIsolationWindow</code>: keeps spectra with the provided <code>mz</code> in their
-isolation window (m/z range).</li>
-<li>
-<code>filterMsLevel</code>: filters by MS level.</li>
-<li>
-<code>filterPolarity</code>: filters by polarity.</li>
-<li>
-<code>filterPrecursorIsotopes</code>: identifies precursor ions (from fragment spectra)
-that could represent isotopes of the same molecule. For each of these spectra
-groups only the spectrum of the monoisotopic precursor ion is returned. MS1
-spectra are returned without filtering.</li>
-<li>
-<code>filterPrecursorMaxIntensity</code>: filters spectra keeping, for groups of spectra
-with similar precursor m/z, the one spectrum with the highest precursor
-intensity. All MS1 spectra are returned without filtering.</li>
-<li>
-<code>filterPrecursorMzRange</code>: retains (MSn) spectra with a precursor m/z within
-the provided m/z range.</li>
-<li>
-<code>filterPrecursorMzValues</code>: retains (MSn) spectra with precursor m/z value
-matching the provided value(s) considering also a <code>tolerance</code> and <code>ppm</code>.</li>
-<li>
-<code>filterPrecursorCharge</code>: retains (MSn) spectra with speified
-precursor charge(s).</li>
-<li>
-<code>filterPrecursorScan</code>: retains (parent and children) scans of an acquisition
-number.</li>
-<li>
-<code>filterRt</code>: filters based on retention time range.</li>
-</ul>
-<p>In addition to these, there is also a set of filter functions that operate on
-the peak data, filtering and modifying the number of peaks of each spectrum
-within a <code>Spectra</code>:</p>
-<ul>
-<li>
-<code>combinePeaks</code>: groups peaks within each spectrum based on similarity of their
-m/z values and combines these into a single peak per peak group.</li>
-<li>
-<code>deisotopeSpectra</code>: deisotopes each individual spectrum keeping only the
-monoisotopic peak for peaks groups of potential isotopologues.</li>
-<li>
-<code>filterIntensity</code>: filter each spectrum keeping only peaks with intensities
-meeting certain criteria.</li>
-<li>
-<code>filterMzRange</code>: subsets peaks data within each spectrum keeping only peaks
-with their m/z values within the specified m/z range.</li>
-<li>
-<code>filterPrecursorPeaks</code>: removes peaks with either an m/z value matching the
-precursor m/z of the respective spectrum (with parameter <code>mz = "=="</code>) or peaks
-with an m/z value larger or equal to the precursor m/z (with parameter
-<code>mz = "&gt;="</code>).</li>
-<li>
-<code>filterMzValues</code>: subsets peaks within each spectrum keeping or removing (all)
-peaks matching provided m/z value(s) (given parameters <code>ppm</code> and <code>tolerance</code>).</li>
-<li>
-<code>reduceSpectra</code>: filters individual spectra keeping only the largest peak for
-groups of peaks with similar m/z values.</li>
-</ul>
-<div class="question">
-<p class="question-begin">
-► Question
-</p>
-<div class="question-body">
-<p>Using the <code>sp_sciex</code> data, select all spectra measured in the second
-mzML file and subsequently filter them to retain spectra measured
-between 175 and 189 seconds in the measurement run.</p>
-<p class="question-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<div class="msmb-solution">
-<p class="solution-begin">
-► Solution<span id="sol-start-8" class="fa fa-plus-square solution-icon clickable" onclick="toggle_visibility('sol-body-8', 'sol-start-8')"></span>
-</p>
-<div id="sol-body-8" class="solution-body" style="display: none;">
-<div class="sourceCode" id="cb66"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb66-1"><a href="sec-raw.html#cb66-1" tabindex="-1"></a>fls <span class="ot">&lt;-</span> <span class="fu">unique</span>(<span class="fu">dataOrigin</span>(sp_sciex))</span>
-<span id="cb66-2"><a href="sec-raw.html#cb66-2" tabindex="-1"></a>fls</span></code></pre></div>
-<pre><code>## [1] "/home/lgatto/disk/R/x86_64-pc-linux-gnu-library/4.3/msdata/sciex/20171016_POOL_POS_1_105-134.mzML"
-## [2] "/home/lgatto/disk/R/x86_64-pc-linux-gnu-library/4.3/msdata/sciex/20171016_POOL_POS_3_105-134.mzML"</code></pre>
-<div class="sourceCode" id="cb68"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb68-1"><a href="sec-raw.html#cb68-1" tabindex="-1"></a>file_2 <span class="ot">&lt;-</span> <span class="fu">filterDataOrigin</span>(sp_sciex, <span class="at">dataOrigin =</span> fls[<span class="dv">2</span>])</span>
-<span id="cb68-2"><a href="sec-raw.html#cb68-2" tabindex="-1"></a><span class="fu">length</span>(file_2)</span></code></pre></div>
-<pre><code>## [1] 931</code></pre>
-<div class="sourceCode" id="cb70"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb70-1"><a href="sec-raw.html#cb70-1" tabindex="-1"></a>sps_sub <span class="ot">&lt;-</span> <span class="fu">filterRt</span>(file_2, <span class="at">rt =</span> <span class="fu">c</span>(<span class="dv">175</span>, <span class="dv">189</span>))</span>
-<span id="cb70-2"><a href="sec-raw.html#cb70-2" tabindex="-1"></a><span class="fu">length</span>(sps_sub)</span></code></pre></div>
-<pre><code>## [1] 50</code></pre>
-<div class="sourceCode" id="cb72"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb72-1"><a href="sec-raw.html#cb72-1" tabindex="-1"></a>sp_sciex <span class="sc">|&gt;</span></span>
-<span id="cb72-2"><a href="sec-raw.html#cb72-2" tabindex="-1"></a>    <span class="fu">filterDataOrigin</span>(fls[<span class="dv">2</span>]) <span class="sc">|&gt;</span></span>
-<span id="cb72-3"><a href="sec-raw.html#cb72-3" tabindex="-1"></a>    <span class="fu">filterRt</span>(<span class="fu">c</span>(<span class="dv">175</span>, <span class="dv">189</span>))</span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 50 spectra in a MsBackendMzR backend:
-##       msLevel     rtime scanIndex
-##     &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1           1   175.212       628
-## 2           1   175.491       629
-## 3           1   175.770       630
-## 4           1   176.049       631
-## 5           1   176.328       632
-## ...       ...       ...       ...
-## 46          1   187.768       673
-## 47          1   188.047       674
-## 48          1   188.326       675
-## 49          1   188.605       676
-## 50          1   188.884       677
-##  ... 33 more variables/columns.
-## 
-## file(s):
-## 20171016_POOL_POS_3_105-134.mzML
-## Processing:
-##  Filter: select data origin(s) /home/lgatto/disk/R/x86_64-pc-linux-gnu-library/4.3/msdata/sciex/20171016_POOL_POS_3_105-134.mzML [Wed Sep  6 11:50:20 2023]
-##  Filter: select retention time [175..189] on MS level(s) 1 [Wed Sep  6 11:50:20 2023]</code></pre>
-<p class="solution-end">
-<span class="fa fa-square-o solution-icon"></span>
-</p>
-</div>
-</div>
-<p>As an example of data processing, we use below the <code>pickPeaks()</code>
-function. This function allows to convert <em>profile mode</em> MS data to <em>centroid
-mode</em> data (a process also referred to as <em>centroiding</em>).</p>
-<div class="sourceCode" id="cb74"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb74-1"><a href="sec-raw.html#cb74-1" tabindex="-1"></a><span class="fu">plotSpectra</span>(sp[<span class="dv">2807</span>], <span class="at">xlim =</span> <span class="fu">c</span>(<span class="fl">521.2</span>, <span class="fl">522.5</span>))</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-21-1.png" width="672"></p>
-<p>Centroiding reduces the profile mode MS data to a <em>representative</em> single mass
-peak per ion.</p>
-<div class="sourceCode" id="cb75"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb75-1"><a href="sec-raw.html#cb75-1" tabindex="-1"></a><span class="fu">pickPeaks</span>(sp[<span class="dv">2807</span>]) <span class="sc">|&gt;</span></span>
-<span id="cb75-2"><a href="sec-raw.html#cb75-2" tabindex="-1"></a>    <span class="fu">filterIntensity</span>(<span class="fl">1e7</span>) <span class="sc">|&gt;</span></span>
-<span id="cb75-3"><a href="sec-raw.html#cb75-3" tabindex="-1"></a>    <span class="fu">plotSpectra</span>(<span class="at">xlim =</span> <span class="fu">c</span>(<span class="fl">521.2</span>, <span class="fl">522.5</span>))</span></code></pre></div>
-<p><img src="R4MS_files/figure-html/unnamed-chunk-22-1.png" width="672"></p>
-</div>
-<div id="a-note-on-efficiency" class="section level2" number="3.4">
-<h2>
-<span class="header-section-number">3.4</span> A note on efficiency<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('a-note-on-efficiency')" onmouseout="reset_tooltip('a-note-on-efficiency-tooltip')"><span class="tooltiptext" id="a-note-on-efficiency-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<div id="backends-1" class="section level3" number="3.4.1">
-<h3>
-<span class="header-section-number">3.4.1</span> Backends<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('backends-1')" onmouseout="reset_tooltip('backends-1-tooltip')"><span class="tooltiptext" id="backends-1-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>The figure below (taken from <span class="citation">(<label for="tufte-mn-6" class="margin-toggle">⊕</label><input type="checkbox" id="tufte-mn-6" class="margin-toggle">Gatto, Gibb, and Rainer 2020<span class="marginnote">Gatto, Laurent, Sebastian Gibb, and Johannes Rainer. 2020. <span>“<span>MSnbase</span>, Efficient and Elegant r-Based Processing and Visualisation of Raw Mass Spectrometry Data.”</span> <em>J. Proteome Res.</em>, September.</span>)</span>) illustrates the respective
-advantages of storing data in memory or on disk. The benchmarking was
-done for the <code>MSnbase</code> package but also applies to the <code>Spectra</code> backends.</p>
-<div class="figure" style="text-align: center">
-<span style="display:block;" id="fig:unnamed-chunk-23"></span>
-<p class="caption marginnote shownote">
-Figure 3.2: (a) Reading time (triplicates, in seconds) and (b) data size in memory (in MB) to read/store 1, 5, and 10 files containing 1431 MS1 (on-disk only) and 6103 MS2 (on-disk and in-memory) spectra. (c) Filtering benchmark assessed over 10 interactions on in-memory and on-disk data containing 6103 MS2 spectra. (d) Access time to spectra for the in-memory (left) and on-disk (right) backends for 1, 10, 100 1000, 5000, and all 6103 spectra. Benchmarks were performed on a Dell XPS laptop with an Intel i5-8250U processor 1.60 GHz (4 cores, 8 threads), 7.5 GB RAM running Ubuntu 18.04.4 LTS 64-bit, and an SSD drive. The data used for the benchmarking are a TMT 4-plex experiment acquired on a LTQ Orbitrap Velos (Thermo Fisher Scientific) available in the msdata package.
-</p>
-<img src="img/pr0c00313_0002.gif" alt="(a) Reading time (triplicates, in seconds) and (b) data size in memory (in MB) to read/store 1, 5, and 10 files containing 1431 MS1 (on-disk only) and 6103 MS2 (on-disk and in-memory) spectra. (c) Filtering benchmark assessed over 10 interactions on in-memory and on-disk data containing 6103 MS2 spectra. (d) Access time to spectra for the in-memory (left) and on-disk (right) backends for 1, 10, 100 1000, 5000, and all 6103 spectra. Benchmarks were performed on a Dell XPS laptop with an Intel i5-8250U processor 1.60 GHz (4 cores, 8 threads), 7.5 GB RAM running Ubuntu 18.04.4 LTS 64-bit, and an SSD drive. The data used for the benchmarking are a TMT 4-plex experiment acquired on a LTQ Orbitrap Velos (Thermo Fisher Scientific) available in the msdata package." width="70%">
-</div>
-</div>
-<div id="parallel-processing" class="section level3" number="3.4.2">
-<h3>
-<span class="header-section-number">3.4.2</span> Parallel processing<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('parallel-processing')" onmouseout="reset_tooltip('parallel-processing-tooltip')"><span class="tooltiptext" id="parallel-processing-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Most functions on <code>Spectra</code> support (and use) parallel processing out
-of the box. Peak data access and manipulation methods perform by
-default parallel processing on a per-file basis (i.e. using the
-dataStorage variable as splitting factor). Spectra uses
-<a href="https://bioconductor.org/packages/BiocParallel"><code>BiocParallel</code></a> for
-parallel processing and all functions use the default registered
-parallel processing setup of that package.</p>
-</div>
-<div id="lazy-evaluation" class="section level3" number="3.4.3">
-<h3>
-<span class="header-section-number">3.4.3</span> Lazy evaluation<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('lazy-evaluation')" onmouseout="reset_tooltip('lazy-evaluation-tooltip')"><span class="tooltiptext" id="lazy-evaluation-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h3>
-<p>Data manipulations on Spectra objects are not immediately applied to
-the peak data. They are added to a so called processing queue which is
-applied each time peak data is accessed (with the <code>peaksData</code>, <code>mz</code> or
-<code>intensity</code> functions). Thanks to this processing queue data
-manipulation operations are also possible for read-only backends
-(e.g. mzML-file based backends or database-based backends). The
-information about the number of such processing steps can be seen
-below (next to Lazy evaluation queue).</p>
-<div class="sourceCode" id="cb76"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb76-1"><a href="sec-raw.html#cb76-1" tabindex="-1"></a><span class="fu">min</span>(<span class="fu">intensity</span>(sp_sciex[<span class="dv">1</span>]))</span></code></pre></div>
-<pre><code>## [1] 0</code></pre>
-<div class="sourceCode" id="cb78"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb78-1"><a href="sec-raw.html#cb78-1" tabindex="-1"></a>sp_sciex <span class="ot">&lt;-</span> <span class="fu">filterIntensity</span>(sp_sciex, <span class="at">intensity =</span> <span class="fu">c</span>(<span class="dv">10</span>, <span class="cn">Inf</span>))</span>
-<span id="cb78-2"><a href="sec-raw.html#cb78-2" tabindex="-1"></a>sp_sciex <span class="do">## Note the lazy evaluation queue</span></span></code></pre></div>
-<pre><code>## MSn data (Spectra) with 1862 spectra in a MsBackendMzR backend:
-##        msLevel     rtime scanIndex
-##      &lt;integer&gt; &lt;numeric&gt; &lt;integer&gt;
-## 1            1     0.280         1
-## 2            1     0.559         2
-## 3            1     0.838         3
-## 4            1     1.117         4
-## 5            1     1.396         5
-## ...        ...       ...       ...
-## 1858         1   258.636       927
-## 1859         1   258.915       928
-## 1860         1   259.194       929
-## 1861         1   259.473       930
-## 1862         1   259.752       931
-##  ... 33 more variables/columns.
-## 
-## file(s):
-## 20171016_POOL_POS_1_105-134.mzML
-## 20171016_POOL_POS_3_105-134.mzML
-## Lazy evaluation queue: 1 processing step(s)
-## Processing:
-##  Remove peaks with intensities outside [10, Inf] in spectra of MS level(s) 1. [Wed Sep  6 11:50:20 2023]</code></pre>
-<div class="sourceCode" id="cb80"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb80-1"><a href="sec-raw.html#cb80-1" tabindex="-1"></a><span class="fu">min</span>(<span class="fu">intensity</span>(sp_sciex[<span class="dv">1</span>]))</span></code></pre></div>
-<pre><code>## [1] 412</code></pre>
-<div class="sourceCode" id="cb82"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb82-1"><a href="sec-raw.html#cb82-1" tabindex="-1"></a>sp_sciex<span class="sc">@</span>processingQueue</span></code></pre></div>
-<pre><code>## [[1]]
-## Object of class "ProcessingStep"
-##  Function: user-provided function
-##  Arguments:
-##   o intensity = 10Inf
-##   o msLevel = 1</code></pre>
-<p>Through this lazy evaluation system it is also possible to <em>undo</em> data
-manipulations:</p>
-<div class="sourceCode" id="cb84"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb84-1"><a href="sec-raw.html#cb84-1" tabindex="-1"></a>sp_sciex <span class="ot">&lt;-</span> <span class="fu">reset</span>(sp_sciex)</span>
-<span id="cb84-2"><a href="sec-raw.html#cb84-2" tabindex="-1"></a>sp_sciex<span class="sc">@</span>processingQueue</span></code></pre></div>
-<pre><code>## list()</code></pre>
-<div class="sourceCode" id="cb86"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb86-1"><a href="sec-raw.html#cb86-1" tabindex="-1"></a><span class="fu">min</span>(<span class="fu">intensity</span>(sp_sciex[<span class="dv">1</span>]))</span></code></pre></div>
-<pre><code>## [1] 0</code></pre>
-<p>More information on this lazy evaluation concept implemented in <code>Spectra</code> is
-provided in the <a href="https://jorainer.github.io/SpectraTutorials/articles/Spectra-backends.html">Spectra
-backends</a>
-vignette.</p>
-
-</div>
-</div>
-</div>
-<div class="footnotes">
-<hr>
-<ol start="4">
-<li id="fn4"><p>As defined in the Bioconductor <code>S4Vectors</code>
-package.<a href="sec-raw.html#fnref4" class="footnote-back">↩︎</a></p></li>
-</ol>
-</div>
-</body></html>
-
-<p style="text-align: center;">
-<a href="sec-msintro.html"><button class="btn btn-default">Previous</button></a>
-<a href="sec-id.html"><button class="btn btn-default">Next</button></a>
-</p>
-<p class="build-date">Page built: 
-2023-09-06
- using 
-R version 4.3.1 Patched (2023-07-10 r84676)
-</p>
-</div>
-</div>
-
-
-
-</body>
-</html>
diff --git a/docs/sec-si.html b/docs/sec-si.html
deleted file mode 100644
index 2a9fdf8..0000000
--- a/docs/sec-si.html
+++ /dev/null
@@ -1,358 +0,0 @@
-<!DOCTYPE html>
-<html lang="" xml:lang="">
-<head>
-
-<meta charset="utf-8" />
-<meta name="generator" content="pandoc" />
-<meta name="viewport" content="width=device-width, initial-scale=1" />
-<meta property="og:title" content="Chapter 7 Additional materials and session information | R for Mass Spectrometry" />
-<meta property="og:type" content="book" />
-
-
-
-
-<meta name="author" content="Laurent Gatto, Sebastian Gibb, Johannes Rainer" />
-
-<meta name="date" content="2023-09-06" />
-
-
-<meta name="description" content="Chapter 7 Additional materials and session information | R for Mass Spectrometry">
-
-<title>Chapter 7 Additional materials and session information | R for Mass Spectrometry</title>
-
-<link href="libs/tufte-css-2015.12.29/tufte.css" rel="stylesheet" />
-<link href="libs/tufte-css-2015.12.29/envisioned.css" rel="stylesheet" />
-<link href="libs/msmb-css-0/msmb.css" rel="stylesheet" />
-<script>
-function toggle_visibility(id1, id2) {
-var e = document.getElementById(id1);
-var f = document.getElementById(id2);
-
-e.style.display = ((e.style.display!='none') ? 'none' : 'block');
-
-if(f.classList.contains('fa-plus-square')) {
-    f.classList.add('fa-minus-square')
-    f.classList.remove('fa-plus-square')
-} else {
-    f.classList.add('fa-plus-square')
-    f.classList.remove('fa-minus-square')
-}
-
-}
-</script>
-<script>
-function copy_link(id) {
-  var dummy = document.createElement('input'),
-  text = window.location.href.split(/[?#]/)[0] + '#' + id;
-  document.body.appendChild(dummy);
-  dummy.value = text;
-  dummy.select();
-  document.execCommand('copy');
-  document.body.removeChild(dummy);
-  
-  var tooltip = document.getElementById(id + '-tooltip');
-  tooltip.innerHTML = 'Copied!';
-}
-
-function reset_tooltip(id) {
-  var tooltip = document.getElementById(id);
-  tooltip.innerHTML = 'Copy link';
-}
-</script>
-
-
-<style type="text/css">code{white-space: pre;}</style>
-<style type="text/css">
-pre > code.sourceCode { white-space: pre; position: relative; }
-pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
-pre > code.sourceCode > span:empty { height: 1.2em; }
-
-code.sourceCode > span { color: inherit; text-decoration: inherit; }
-div.sourceCode { margin: 1em 0; }
-pre.sourceCode { margin: 0; }
-@media screen {
-div.sourceCode { overflow: auto; }
-}
-@media print {
-pre > code.sourceCode { white-space: pre-wrap; }
-pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
-}
-pre.numberSource code
-  { counter-reset: source-line 0; }
-pre.numberSource code > span
-  { position: relative; left: -4em; counter-increment: source-line; }
-pre.numberSource code > span > a:first-child::before
-  { content: counter(source-line);
-    position: relative; left: -1em; text-align: right; vertical-align: baseline;
-    border: none; display: inline-block;
-    -webkit-touch-callout: none; -webkit-user-select: none;
-    -khtml-user-select: none; -moz-user-select: none;
-    -ms-user-select: none; user-select: none;
-    padding: 0 4px; width: 4em;
-    color: #aaaaaa;
-  }
-pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
-div.sourceCode
-  {  background-color: #f8f8f8; }
-@media screen {
-pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
-}
-code span.al { color: #ef2929; } /* Alert */
-code span.an { color: #8f5902; font-weight: bold; font-style: italic; } /* Annotation */
-code span.at { color: #204a87; } /* Attribute */
-code span.bn { color: #0000cf; } /* BaseN */
-code span.cf { color: #204a87; font-weight: bold; } /* ControlFlow */
-code span.ch { color: #4e9a06; } /* Char */
-code span.cn { color: #8f5902; } /* Constant */
-code span.co { color: #8f5902; font-style: italic; } /* Comment */
-code span.cv { color: #8f5902; font-weight: bold; font-style: italic; } /* CommentVar */
-code span.do { color: #8f5902; font-weight: bold; font-style: italic; } /* Documentation */
-code span.dt { color: #204a87; } /* DataType */
-code span.dv { color: #0000cf; } /* DecVal */
-code span.er { color: #a40000; font-weight: bold; } /* Error */
-code span.ex { } /* Extension */
-code span.fl { color: #0000cf; } /* Float */
-code span.fu { color: #204a87; font-weight: bold; } /* Function */
-code span.im { } /* Import */
-code span.in { color: #8f5902; font-weight: bold; font-style: italic; } /* Information */
-code span.kw { color: #204a87; font-weight: bold; } /* Keyword */
-code span.op { color: #ce5c00; font-weight: bold; } /* Operator */
-code span.ot { color: #8f5902; } /* Other */
-code span.pp { color: #8f5902; font-style: italic; } /* Preprocessor */
-code span.sc { color: #ce5c00; font-weight: bold; } /* SpecialChar */
-code span.ss { color: #4e9a06; } /* SpecialString */
-code span.st { color: #4e9a06; } /* String */
-code span.va { color: #000000; } /* Variable */
-code span.vs { color: #4e9a06; } /* VerbatimString */
-code span.wa { color: #8f5902; font-weight: bold; font-style: italic; } /* Warning */
-</style>
-
-
-<style type="text/css">
-/* Used with Pandoc 2.11+ new --citeproc when CSL is used */
-div.csl-bib-body { }
-div.csl-entry {
-  clear: both;
-}
-.hanging div.csl-entry {
-  margin-left:2em;
-  text-indent:-2em;
-}
-div.csl-left-margin {
-  min-width:2em;
-  float:left;
-}
-div.csl-right-inline {
-  margin-left:2em;
-  padding-left:1em;
-}
-div.csl-indent {
-  margin-left: 2em;
-}
-</style>
-
-<link rel="stylesheet" href="style.css" type="text/css" />
-
-</head>
-
-<body>
-
-
-
-<div class="row">
-<div class="col-sm-12">
-<div id="TOC">
-<ul class="navbar">
-<li class="msmb"><p class="title">R for Mass Spectrometry<p><p class="author">Laurent Gatto, Sebastian Gibb, Johannes Rainer</p>
-<li class="dropdown" style="float:right">
-<a href="javascript:void(0)" class="dropbtn">&#x25BE; Chapters</a>
-<div class="dropdown-content">
-<a href="index.html" id="toc-preamble"><span class="toc-section-number">1</span> Preamble</a>
-<a href="sec-msintro.html" id="toc-sec-msintro"><span class="toc-section-number">2</span> Introduction</a>
-<a href="sec-raw.html" id="toc-sec-raw"><span class="toc-section-number">3</span> Raw MS data</a>
-<a href="sec-id.html" id="toc-sec-id"><span class="toc-section-number">4</span> Identification data</a>
-<a href="sec-quant.html" id="toc-sec-quant"><span class="toc-section-number">5</span> Quantitative data</a>
-<a href="sec-anx.html" id="toc-sec-anx"><span class="toc-section-number">6</span> Annex</a>
-<a id="active-page" href="sec-si.html" id="toc-sec-si"><span class="toc-section-number">7</span> Additional materials and session information</a><ul class="toc-sections">
-<li class="toc"><a href="#additional-materials"> Additional materials</a></li>
-<li class="toc"><a href="#questions-and-help"> Questions and help</a></li>
-<li class="toc"><a href="#session-information"> Session information</a></li>
-</ul>
-</div>
-</li>
-</ul>
-</div>
-</div>
-</div>
-<div class="row">
-<div class="col-sm-12">
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
-<html><body><div id="sec-si" class="section level1" number="7">
-<h1>
-<span class="header-section-number">Chapter 7</span> Additional materials and session information</h1>
-<div id="additional-materials" class="section level2" number="7.1">
-<h2>
-<span class="header-section-number">7.1</span> Additional materials<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('additional-materials')" onmouseout="reset_tooltip('additional-materials-tooltip')"><span class="tooltiptext" id="additional-materials-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<ul>
-<li><p>The <a href="https://lgatto.github.io/QFeaturesScpWorkshop2021/">Single-cell proteomics data analysis using <code>QFeatures</code> and
-<code>scp</code></a> workshop
-is provided as two vignettes. The first one provides a general
-introduction to the <code>QFeatures</code> class in the general context of mass
-spectrometry-based proteomics data manipulation. The second vignette
-focuses on single-cell application and introduces the <code>scp</code> package
-<span class="citation">(<label for="tufte-mn-17" class="margin-toggle">⊕</label><input type="checkbox" id="tufte-mn-17" class="margin-toggle">Vanderaa and Gatto 2021<span class="marginnote">Vanderaa, Christophe, and Laurent Gatto. 2021. <span>“Replication of Single-Cell Proteomics Data Reveals Important Computational Challenges.”</span> <em>Expert Rev. Proteomics</em>, October.</span>)</span> as an extension of <code>QFeatures</code>. This second
-vignette also provides exercises that give the attendee the
-opportunity to apply the learned concepts to reproduce a published
-analysis on a subset of a real data set.</p></li>
-<li>
-<p>The <a href="https://jorainer.github.io/SpectraTutorials">SpectraTutorials</a> package
-provides three different vignettes:</p>
-<ul>
-<li>
-<a href="https://jorainer.github.io/SpectraTutorials/articles/analyzing-MS-data-from-different-sources-with-Spectra.html">Seamless Integration of Mass Spectrometry Data from Different
-Sources</a>:
-describes import/export of MS data from/to files in different format as well
-as processing and handling of MS data with the <em>Spectra</em> package.</li>
-<li>
-<a href="https://jorainer.github.io/SpectraTutorials/articles/Spectra-backends.html">Spectra: an Expandable Infrastructure to Handle Mass Spectrometry
-Data</a>:
-explains the concept of backends in <em>Spectra</em>, their properties, use cases
-along with performance considerations.</li>
-<li>
-<a href="https://jorainer.github.io/SpectraTutorials/articles/Spectra-matching-with-MetaboAnnotation.html">MS/MS Spectra Matching with the MetaboAnnotation
-Package</a>:
-explains how the <em>Spectra</em> package can be used together with the <code>r BiocStyle::Biocpkg("MetaboAnnotation")</code> package in LC-MS/MS annotation
-workflows for untargeted metabolomics data.</li>
-</ul>
-</li>
-<li><p>A tutorial presenting <a href="https://jorainer.github.io/MetaboAnnotationTutorials/">Use Cases and Examples for Annotation of
-Untargeted Metabolomics
-Data</a> using
-the <code>MetaboAnnotation</code> and <code>MetaboCoreUtils</code> packages
-<span class="citation">(<label for="tufte-mn-18" class="margin-toggle">⊕</label><input type="checkbox" id="tufte-mn-18" class="margin-toggle">Rainer et al. 2022<span class="marginnote">Rainer, Johannes, Andrea Vicini, Liesa Salzer, Jan Stanstrup, Josep M Badia, Steffen Neumann, Michael A Stravs, et al. 2022. <span>“A Modular and Expandable Ecosystem for Metabolomics Data Annotation in <span>R</span>.”</span> <em>Metabolites</em> 12 (2): 173.</span>)</span>.</p></li>
-<li><p><a href="https://jorainer.github.io/xcmsTutorials/">Exploring and analyzing LC-MS data with Spectra and
-xcms</a> provides an
-overview of recent developments in Bioconductor to work with mass
-spectrometry
-(<a href="https://github.com/RforMassSpectrometry/MsExperiment">MsExperiment</a>,
-<a href="https://github.com/RforMassSpectrometry/Spectra">Spectra</a>) and
-specifically LC-MS data (<a href="https://github.com/sneumann/xcms">xcms</a>)
-and walks through the preprocessing of a small data set emphasizing
-on selection of data-dependent settings for the individual
-pre-processing steps.</p></li>
-</ul>
-</div>
-<div id="questions-and-help" class="section level2" number="7.2">
-<h2>
-<span class="header-section-number">7.2</span> Questions and help<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('questions-and-help')" onmouseout="reset_tooltip('questions-and-help-tooltip')"><span class="tooltiptext" id="questions-and-help-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>For questions about specific software or their usage, please refer to
-the software’s github issue page, or use the <a href="http://support.bioconductor.org/">Bioconductor support
-site</a>.</p>
-</div>
-<div id="session-information" class="section level2" number="7.3">
-<h2>
-<span class="header-section-number">7.3</span> Session information<div class="tooltip"><button class="internal-link-btn" onclick="copy_link('session-information')" onmouseout="reset_tooltip('session-information-tooltip')"><span class="tooltiptext" id="session-information-tooltip">Copy link</span><i class="fa fa-link"></i></button></div>
-</h2>
-<p>The following packages have been used to generate this document.</p>
-<div class="sourceCode" id="cb461"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb461-1"><a href="sec-si.html#cb461-1" tabindex="-1"></a><span class="fu">sessionInfo</span>()</span></code></pre></div>
-<pre><code>## R version 4.3.1 Patched (2023-07-10 r84676)
-## Platform: x86_64-pc-linux-gnu (64-bit)
-## Running under: Manjaro Linux
-## 
-## Matrix products: default
-## BLAS:   /usr/lib/libblas.so.3.11.0 
-## LAPACK: /usr/lib/liblapack.so.3.11.0
-## 
-## locale:
-##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
-##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
-##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
-##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
-##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
-## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
-## 
-## time zone: Europe/Brussels
-## tzcode source: system (glibc)
-## 
-## attached base packages:
-## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
-## [8] base     
-## 
-## other attached packages:
-##  [1] mzID_1.38.0                 patchwork_1.1.3            
-##  [3] factoextra_1.0.7            gplots_3.1.3               
-##  [5] limma_3.56.2                lubridate_1.9.2            
-##  [7] forcats_1.0.0               stringr_1.5.0              
-##  [9] purrr_1.0.2                 readr_2.1.4                
-## [11] tibble_3.2.1                tidyverse_2.0.0            
-## [13] MSnID_1.34.0                magrittr_2.0.3             
-## [15] tidyr_1.3.0                 ggplot2_3.4.3              
-## [17] dplyr_1.1.2                 msdata_0.40.0              
-## [19] rpx_2.8.0                   MsCoreUtils_1.12.0         
-## [21] QFeatures_1.11.1            MultiAssayExperiment_1.26.0
-## [23] SummarizedExperiment_1.30.2 Biobase_2.60.0             
-## [25] GenomicRanges_1.52.0        GenomeInfoDb_1.36.1        
-## [27] IRanges_2.34.1              MatrixGenerics_1.12.3      
-## [29] matrixStats_1.0.0           Spectra_1.10.2             
-## [31] ProtGenerics_1.32.0         BiocParallel_1.34.2        
-## [33] S4Vectors_0.38.1            BiocGenerics_0.46.0        
-## [35] mzR_2.34.1                  Rcpp_1.0.11                
-## [37] BiocStyle_2.28.0           
-## 
-## loaded via a namespace (and not attached):
-##   [1] later_1.3.1             bitops_1.0-7            filelock_1.0.2         
-##   [4] R.oo_1.25.0             preprocessCore_1.62.1   XML_3.99-0.14          
-##   [7] lifecycle_1.0.3         rstatix_0.7.2           doParallel_1.0.17      
-##  [10] lattice_0.21-8          MASS_7.3-60             backports_1.4.1        
-##  [13] sass_0.4.7              rmarkdown_2.24          jquerylib_0.1.4        
-##  [16] yaml_2.3.7              httpuv_1.6.11           DBI_1.1.3              
-##  [19] RColorBrewer_1.1-3      abind_1.4-5             zlibbioc_1.46.0        
-##  [22] R.cache_0.16.0          R.utils_2.12.2          AnnotationFilter_1.24.0
-##  [25] RCurl_1.98-1.12         rappdirs_0.3.3          GenomeInfoDbData_1.2.10
-##  [28] ggrepel_0.9.3           pheatmap_1.0.12         MSnbase_2.27.1         
-##  [31] ncdf4_1.21              codetools_0.2-19        DelayedArray_0.26.7    
-##  [34] xml2_1.3.5              tidyselect_1.2.0        farver_2.1.1           
-##  [37] BiocFileCache_2.8.0     jsonlite_1.8.7          ellipsis_0.3.2         
-##  [40] iterators_1.0.14        foreach_1.5.2           tools_4.3.1            
-##  [43] glue_1.6.2              BiocBaseUtils_1.2.0     xfun_0.40              
-##  [46] withr_2.5.0             BiocManager_1.30.22     fastmap_1.1.1          
-##  [49] rhdf5filters_1.12.1     fansi_1.0.4             caTools_1.18.2         
-##  [52] digest_0.6.33           timechange_0.2.0        R6_2.5.1               
-##  [55] mime_0.12               colorspace_2.1-0        gtools_3.9.4           
-##  [58] RSQLite_2.3.1           R.methodsS3_1.8.2       utf8_1.2.3             
-##  [61] generics_0.1.3          data.table_1.14.8       httr_1.4.7             
-##  [64] S4Arrays_1.0.5          pkgconfig_2.0.3         gtable_0.3.4           
-##  [67] blob_1.2.4              impute_1.74.1           XVector_0.40.0         
-##  [70] htmltools_0.5.6         carData_3.0-5           bookdown_0.34.2        
-##  [73] MALDIquant_1.22.1       clue_0.3-64             scales_1.2.1           
-##  [76] png_0.1-8               knitr_1.43              rstudioapi_0.15.0      
-##  [79] tzdb_0.4.0              reshape2_1.4.4          curl_5.0.2             
-##  [82] cachem_1.0.8            rhdf5_2.44.0            BiocVersion_3.17.1     
-##  [85] KernSmooth_2.23-22      parallel_4.3.1          AnnotationDbi_1.62.2   
-##  [88] vsn_3.68.0              msmbstyle_0.0.19        pillar_1.9.0           
-##  [91] grid_4.3.1              vctrs_0.6.3             pcaMethods_1.92.0      
-##  [94] promises_1.2.1          ggpubr_0.6.0            car_3.1-2              
-##  [97] dbplyr_2.3.3            xtable_1.8-4            cluster_2.1.4          
-## [100] evaluate_0.21          
-##  [ reached getOption("max.print") -- omitted 28 entries ]</code></pre>
-
-</div>
-</div></body></html>
-
-<p style="text-align: center;">
-<a href="sec-anx.html"><button class="btn btn-default">Previous</button></a>
-</p>
-<p class="build-date">Page built: 
-2023-09-06
- using 
-R version 4.3.1 Patched (2023-07-10 r84676)
-</p>
-</div>
-</div>
-
-
-
-</body>
-</html>
diff --git a/docs/style.css b/docs/style.css
deleted file mode 100644
index 4c51529..0000000
--- a/docs/style.css
+++ /dev/null
@@ -1,5 +0,0 @@
-/* original background colour is #1881c2 */
-
-:root {
-  --main-bg-color: #115a88;
-}
diff --git a/img/F02-3D-MS1-MS2-scans-100-1200-lattice.png b/img/F02-3D-MS1-MS2-scans-100-1200-lattice.png
deleted file mode 100644
index 4440af6..0000000
Binary files a/img/F02-3D-MS1-MS2-scans-100-1200-lattice.png and /dev/null differ
diff --git a/img/F02-3D-MS1-scans-400-1200-lattice.png b/img/F02-3D-MS1-scans-400-1200-lattice.png
deleted file mode 100644
index 3672c9c..0000000
Binary files a/img/F02-3D-MS1-scans-400-1200-lattice.png and /dev/null differ
diff --git a/img/MS1-MS2-spectra.png b/img/MS1-MS2-spectra.png
deleted file mode 100644
index 8713843..0000000
Binary files a/img/MS1-MS2-spectra.png and /dev/null differ
diff --git a/img/MSGFgui.png b/img/MSGFgui.png
deleted file mode 100644
index 7195618..0000000
Binary files a/img/MSGFgui.png and /dev/null differ
diff --git a/img/SE.png b/img/SE.png
deleted file mode 100644
index 54974c1..0000000
Binary files a/img/SE.png and /dev/null differ
diff --git a/img/SchematicMS2.png b/img/SchematicMS2.png
deleted file mode 100644
index 7ff3198..0000000
Binary files a/img/SchematicMS2.png and /dev/null differ
diff --git a/img/Silac.png b/img/Silac.png
deleted file mode 100644
index b799a6c..0000000
Binary files a/img/Silac.png and /dev/null differ
diff --git a/img/chromatogram.png b/img/chromatogram.png
deleted file mode 100644
index 578e1bb..0000000
Binary files a/img/chromatogram.png and /dev/null differ
diff --git a/img/chrompeaks.png b/img/chrompeaks.png
deleted file mode 100644
index b6d71a3..0000000
Binary files a/img/chrompeaks.png and /dev/null differ
diff --git a/img/cptac.png b/img/cptac.png
deleted file mode 100644
index fa4b3d4..0000000
Binary files a/img/cptac.png and /dev/null differ
diff --git a/img/frag.png b/img/frag.png
deleted file mode 100644
index 309ba91..0000000
Binary files a/img/frag.png and /dev/null differ
diff --git a/img/imp-sim.png b/img/imp-sim.png
deleted file mode 100644
index 45054a1..0000000
Binary files a/img/imp-sim.png and /dev/null differ
diff --git a/img/itraq.png b/img/itraq.png
deleted file mode 100644
index f995f61..0000000
Binary files a/img/itraq.png and /dev/null differ
diff --git a/img/mstut.gif b/img/mstut.gif
deleted file mode 100644
index 63dd457..0000000
Binary files a/img/mstut.gif and /dev/null differ
diff --git a/img/msvisfig.png b/img/msvisfig.png
deleted file mode 100644
index c8d368b..0000000
Binary files a/img/msvisfig.png and /dev/null differ
diff --git a/img/pbase.png b/img/pbase.png
deleted file mode 100644
index b02a089..0000000
Binary files a/img/pbase.png and /dev/null differ
diff --git a/img/pr0c00313_0002.gif b/img/pr0c00313_0002.gif
deleted file mode 100644
index 8ddfa50..0000000
Binary files a/img/pr0c00313_0002.gif and /dev/null differ
diff --git a/img/raw.png b/img/raw.png
deleted file mode 100644
index e961670..0000000
Binary files a/img/raw.png and /dev/null differ
diff --git a/img/vp2.png b/img/vp2.png
deleted file mode 100644
index de9f234..0000000
Binary files a/img/vp2.png and /dev/null differ
diff --git a/inst/.gitignore b/inst/.gitignore
new file mode 100644
index 0000000..c1adf9a
--- /dev/null
+++ b/inst/.gitignore
@@ -0,0 +1,4 @@
+/.quarto/
+/docs/
+*_cache
+*_files
diff --git a/inst/LICENSE.qmd b/inst/LICENSE.qmd
new file mode 100644
index 0000000..7ce3af0
--- /dev/null
+++ b/inst/LICENSE.qmd
@@ -0,0 +1,9 @@
+---
+title: "Open Source License"
+---
+
+The "R for Mass Spectrometry" *package* is licensed under the [GNU GPL v3](https://www.gnu.org/licenses/gpl-3.0.en.html).  
+
+The "R for Mass Spectrometry" *book* rendered by the package is licensed under the [Creative Commons Attribution-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-sa/4.0/).
+
+
diff --git a/inst/_quarto.yml b/inst/_quarto.yml
new file mode 100644
index 0000000..0f09571
--- /dev/null
+++ b/inst/_quarto.yml
@@ -0,0 +1,12 @@
+project:
+  type: book
+  output-dir: docs
+
+metadata-files:
+  - assets/_book.yml
+  - assets/_website.yml
+  - assets/_format.yml
+  - assets/_knitr.yml
+
+filters:
+  - extensions/tools-tabset-ext/tools-tabset.lua
diff --git a/inst/assets/_book.yml b/inst/assets/_book.yml
new file mode 100644
index 0000000..6ab17cf
--- /dev/null
+++ b/inst/assets/_book.yml
@@ -0,0 +1,24 @@
+book:
+  license: "CC BY-NC-SA"
+  title: "R for Mass Spectrometry"
+  chapters:
+    - index.qmd
+    - pages/introduction.qmd
+    - pages/raw-ms-data.qmd
+    - pages/20-id.qmd
+    - pages/30-quant.qmd
+    - pages/95-annex.qmd
+    - pages/99-si.qmd
+  cover-image: assets/cover.png
+  favicon: assets/favicon.png
+  sidebar: 
+    tools:
+      - icon: git
+        menu:
+          - text: Source Code
+            url:  https://github.com/js2264/R4MS/
+          - text: Browse version `devel`
+            url:  https://js2264.github.io/R4MS/docs/devel/
+    style: "docked"
+    background: "light"
+    collapse-level: 5
diff --git a/inst/assets/_format.yml b/inst/assets/_format.yml
new file mode 100644
index 0000000..5c1b255
--- /dev/null
+++ b/inst/assets/_format.yml
@@ -0,0 +1,15 @@
+format: 
+  html:
+    grid:
+      sidebar-width: 500px
+      body-width: 800px
+    theme: 
+      - cosmo
+      - assets/book.scss
+    mainfont: "Atkinson Hyperlegible, sans-serif"
+    highlight-style: atom-one
+    code-link: true
+    editor: visual
+    bibliography: assets/bibliography.bib
+    from: markdown+emoji
+  # pdf: default
diff --git a/inst/assets/_knitr.yml b/inst/assets/_knitr.yml
new file mode 100644
index 0000000..d0001a2
--- /dev/null
+++ b/inst/assets/_knitr.yml
@@ -0,0 +1,15 @@
+knitr:
+  opts_chunk: 
+    collapse: true
+    comment: "## " 
+    cache: false
+    fig.align: "center"
+    python.reticulate: false
+    R.options:
+      dplyr.print_min: 6
+      dplyr.print_max: 6
+      pillar.max_footer_lines: 2
+      pillar.min_chars: 15
+      stringr.view_n: 6
+      pillar.bold: TRUE
+      width: 77 # 80 - 3 for #> comments
diff --git a/inst/assets/_website.yml b/inst/assets/_website.yml
new file mode 100644
index 0000000..7ebfca5
--- /dev/null
+++ b/inst/assets/_website.yml
@@ -0,0 +1,14 @@
+website:
+  back-to-top-navigation: true
+  search: 
+    location: sidebar
+  page-footer:
+    background: light
+    left: |
+      This book was built with <a href="https://github.com/js2264/BiocBook/">BiocBook</a> with :heart:
+    center: 
+      - text: "License"
+        href: LICENSE.qmd
+  repo-branch: devel
+  repo-actions: [edit, issue]
+  open-graph: true
diff --git a/packages.bib b/inst/assets/bibliography.bib
similarity index 50%
rename from packages.bib
rename to inst/assets/bibliography.bib
index cdcaaf6..9c2f8d7 100644
--- a/packages.bib
+++ b/inst/assets/bibliography.bib
@@ -1,3 +1,74 @@
+@Manual{serizay2023,
+    title = {BiocBook: Write, publish and maintain versioned Quarto books with Bioconductor},
+    author = {Jacques Serizay},
+    year = {2023},
+    note = {R package version 0.99.0},
+    url = {https://github.com/js2264/BiocBook},
+}
+
+@Manual{lun2023,
+    title = {rebook: Re-using Content in Bioconductor Books},
+    author = {Aaron Lun},
+    year = {2023},
+    note = {R package version 1.11.1},
+    url = {https://bioconductor.org/packages/rebook},
+    doi = {10.18129/B9.bioc.rebook},
+}
+
+@software{Allaire_Quarto_2022,
+author = {Allaire, J.J. and Teague, Charles and Scheidegger, Carlos and Xie, Yihui and Dervieux, Christophe},
+doi = {10.5281/zenodo.5960048},
+month = jan,
+title = {{Quarto}},
+url = {https://github.com/quarto-dev/quarto-cli},
+version = {1.2},
+year = {2022}
+}
+
+@Manual{Wickham2022,
+    title = {devtools: Tools to Make Developing R Packages Easier},
+    author = {Hadley Wickham and Jim Hester and Winston Chang and Jennifer Bryan},
+    year = {2022},
+    note = {R package version 2.4.5},
+    url = {https://CRAN.R-project.org/package=devtools},
+}
+
+
+@Manual{serizay2023,
+    title = {BiocBook: Write, publish and maintain versioned Quarto books with Bioconductor},
+    author = {Jacques Serizay},
+    year = {2023},
+    note = {R package version 0.99.0},
+    url = {https://github.com/js2264/BiocBook},
+}
+
+@Manual{lun2023,
+    title = {rebook: Re-using Content in Bioconductor Books},
+    author = {Aaron Lun},
+    year = {2023},
+    note = {R package version 1.11.1},
+    url = {https://bioconductor.org/packages/rebook},
+    doi = {10.18129/B9.bioc.rebook},
+}
+
+@software{Allaire_Quarto_2022,
+author = {Allaire, J.J. and Teague, Charles and Scheidegger, Carlos and Xie, Yihui and Dervieux, Christophe},
+doi = {10.5281/zenodo.5960048},
+month = jan,
+title = {{Quarto}},
+url = {https://github.com/quarto-dev/quarto-cli},
+version = {1.2},
+year = {2022}
+}
+
+@Manual{Wickham2022,
+    title = {devtools: Tools to Make Developing R Packages Easier},
+    author = {Hadley Wickham and Jim Hester and Winston Chang and Jennifer Bryan},
+    year = {2022},
+    note = {R package version 2.4.5},
+    url = {https://CRAN.R-project.org/package=devtools},
+}
+
 @Manual{R-base,
   title = {R: A Language and Environment for Statistical Computing},
   author = {{R Core Team}},
@@ -617,3 +688,435 @@ @Article{tidyverse2019
   doi = {10.21105/joss.01686},
 }
 
+@Manual{R-base,
+  title = {R: A Language and Environment for Statistical Computing},
+  author = {{R Core Team}},
+  organization = {R Foundation for Statistical Computing},
+  address = {Vienna, Austria},
+  year = {2021},
+  url = {https://www.R-project.org/},
+}
+
+@Manual{R-bookdown,
+  title = {bookdown: Authoring Books and Technical Documents with R Markdown},
+  author = {Yihui Xie},
+  year = {2021},
+  note = {R package version 0.21.6},
+  url = {https://github.com/rstudio/bookdown},
+}
+
+@Manual{R-msmbstyle,
+  title = {msmbstyle: MSMB Styles for R Markdown Documents},
+  author = {Mike Smith},
+  year = {2021},
+  note = {R package version 0.0.18},
+}
+
+@Manual{R-rmarkdown,
+  title = {rmarkdown: Dynamic Documents for R},
+  author = {JJ Allaire and Yihui Xie and Jonathan McPherson and Javier Luraschi and Kevin Ushey and Aron Atkins and Hadley Wickham and Joe Cheng and Winston Chang and Richard Iannone},
+  year = {2021},
+  note = {R package version 2.7},
+  url = {https://CRAN.R-project.org/package=rmarkdown},
+}
+
+@Book{bookdown2016,
+  title = {bookdown: Authoring Books and Technical Documents with {R} Markdown},
+  author = {Yihui Xie},
+  publisher = {Chapman and Hall/CRC},
+  address = {Boca Raton, Florida},
+  year = {2016},
+  note = {ISBN 978-1138700109},
+  url = {https://github.com/rstudio/bookdown},
+}
+
+@Book{rmarkdown2018,
+  title = {R Markdown: The Definitive Guide},
+  author = {Yihui Xie and J.J. Allaire and Garrett Grolemund},
+  publisher = {Chapman and Hall/CRC},
+  address = {Boca Raton, Florida},
+  year = {2018},
+  note = {ISBN 9781138359338},
+  url = {https://bookdown.org/yihui/rmarkdown},
+}
+
+@Book{rmarkdown2020,
+  title = {R Markdown Cookbook},
+  author = {Yihui Xie and Christophe Dervieux and Emily Riederer},
+  publisher = {Chapman and Hall/CRC},
+  address = {Boca Raton, Florida},
+  year = {2020},
+  note = {ISBN 9780367563837},
+  url = {https://bookdown.org/yihui/rmarkdown-cookbook},
+}
+
+
+@ARTICLE{Gatto:2020,
+  title    = "{MSnbase}, efficient and elegant R-based processing and
+              visualisation of raw mass spectrometry data",
+  author   = "Gatto, Laurent and Gibb, Sebastian and Rainer, Johannes",
+  abstract = "We present version 2 of the MSnbase R/Bioconductor package.
+              MSnbase provides infrastructure for the manipulation, processing
+              and visualisation of mass spectrometry data. We focus on the new
+              on-disk infrastructure, that allows the handling of large raw
+              mass spectrometry experiments on commodity hardware and
+              illustrate how the package is used for elegant data processing,
+              method development and visualisation.",
+  journal  = "J. Proteome Res.",
+  month    =  sep,
+  year     =  2020,
+  language = "en"
+}
+
+@Article{MAE,
+    title = {Software For The Integration Of Multi-Omics Experiments
+                  In Bioconductor},
+    author = {Marcel Ramos and Lucas Schiffer and Angela Re and Rimsha
+                  Azhar and Azfar Basunia and Carmen Rodriguez Cabrera
+                  and Tiffany Chan and Philip Chapman and Sean Davis
+                  and David Gomez-Cabrero and Aedin C. Culhane and
+                  Benjamin Haibe-Kains and Kasper Hansen and Hanish
+                  Kodali and Marie Stephie Louis and Arvind Singh Mer
+                  and Markus Reister and Martin Morgan and Vincent
+                  Carey and Levi Waldron},
+    journal = {Cancer Research},
+    year = {2017},
+    volume = {77(21); e39-42},
+  }
+
+
+@Manual{SE,
+    title = {SummarizedExperiment: SummarizedExperiment container},
+    author = {Martin Morgan and Valerie Obenchain and Jim Hester and Hervé Pagès},
+    year = {2020},
+    note = {R package version 1.21.0},
+    url = {https://bioconductor.org/packages/SummarizedExperiment},
+  }
+
+@Article{Christoforou:2016,
+  author =       {Christoforou, Andy and Mulvey, Claire M and
+                  Breckels, Lisa M and Geladaki, Aikaterini and
+                  Hurrell, Tracey and Hayward, Penelope C and Naake,
+                  Thomas and Gatto, Laurent and Viner, Rosa and
+                  Martinez Arias, Alfonso and Lilley, Kathryn S},
+  title =        {A draft map of the mouse pluripotent stem cell
+                 spatial proteome.},
+  journal =      {Nat Commun},
+  year =         {2016},
+  month =        {},
+  number =       {},
+  volume =       {7},
+  pages =        {8992},
+  doi =          {10.1038/ncomms9992},
+  PMID =         {26754106}}
+
+@article{Sticker:2019,
+        author = {Sticker, Adriaan and Goeminne, Ludger and Martens, Lennart and Clement, Lieven},
+        title = {Robust summarization and inference in proteome-wide label-free quantification},
+        elocation-id = {668863},
+        year = {2019},
+        doi = {10.1101/668863},
+        publisher = {Cold Spring Harbor Laboratory},
+        abstract = {Label-Free Quantitative mass spectrometry based
+                  workflows for differential expression (DE) analysis
+                  of proteins impose important challenges on the data
+                  analysis due to peptide-specific effects and context
+                  dependent missingness of peptide
+                  intensities. Peptide-based workflows, like MSqRob,
+                  test for DE directly from peptide intensities and
+                  outper-form summarization methods which first
+                  aggregate MS1 peptide intensities to protein
+                  intensities before DE analysis. However, these
+                  methods are computationally expensive, often hard to
+                  understand for the non-specialised end-user, and do
+                  not provide protein summaries, which are important
+                  for visualisation or downstream processing. In this
+                  work, we therefore evaluate state-of-the-art
+                  summarization strategies using a benchmark spike-in
+                  dataset and discuss why and when these fail compared
+                  to the state-of-the-art peptide based model,
+                  MSqRob. Based on this evaluation, we propose a novel
+                  summarization strategy, MSqRob-Sum, which estimates
+                  MSqRob{\textquoteright}s model parameters in a
+                  two-stage procedure circumventing the drawbacks of
+                  peptide-based workflows. MSqRobSum maintains
+                  MSqRob{\textquoteright}s superior performance, while
+                  providing useful protein expression summaries for
+                  plotting and downstream analysis. Summarising
+                  peptide to protein intensities considerably reduces
+                  the computational complexity, the memory footprint
+                  and the model complexity, and makes it easier to
+                  disseminate DE inferred on protein
+                  summaries. Moreover, MSqRobSum provides a highly
+                  modular analysis framework, which provides
+                  researchers with full flexibility to develop data
+                  analysis workflows tailored towards their specific
+                  applications.},
+        URL = {https://www.biorxiv.org/content/early/2019/06/13/668863},
+        eprint = {https://www.biorxiv.org/content/early/2019/06/13/668863.full.pdf},
+        journal = {bioRxiv}
+}
+
+@ARTICLE{Paulovich:2010,
+  title    = "Interlaboratory study characterizing a yeast performance standard
+              for benchmarking {LC-MS} platform performance",
+  author   = "Paulovich, Amanda G and Billheimer, Dean and Ham, Amy-Joan L and
+              Vega-Montoto, Lorenzo and Rudnick, Paul A and Tabb, David L and
+              Wang, Pei and Blackman, Ronald K and Bunk, David M and Cardasis,
+              Helene L and Clauser, Karl R and Kinsinger, Christopher R and
+              Schilling, Birgit and Tegeler, Tony J and Variyath, Asokan
+              Mulayath and Wang, Mu and Whiteaker, Jeffrey R and Zimmerman,
+              Lisa J and Fenyo, David and Carr, Steven A and Fisher, Susan J
+              and Gibson, Bradford W and Mesri, Mehdi and Neubert, Thomas A and
+              Regnier, Fred E and Rodriguez, Henry and Spiegelman, Cliff and
+              Stein, Stephen E and Tempst, Paul and Liebler, Daniel C",
+  abstract = "Optimal performance of LC-MS/MS platforms is critical to
+              generating high quality proteomics data. Although individual
+              laboratories have developed quality control samples, there is no
+              widely available performance standard of biological complexity
+              (and associated reference data sets) for benchmarking of platform
+              performance for analysis of complex biological proteomes across
+              different laboratories in the community. Individual preparations
+              of the yeast Saccharomyces cerevisiae proteome have been used
+              extensively by laboratories in the proteomics community to
+              characterize LC-MS platform performance. The yeast proteome is
+              uniquely attractive as a performance standard because it is the
+              most extensively characterized complex biological proteome and
+              the only one associated with several large scale studies
+              estimating the abundance of all detectable proteins. In this
+              study, we describe a standard operating protocol for large scale
+              production of the yeast performance standard and offer aliquots
+              to the community through the National Institute of Standards and
+              Technology where the yeast proteome is under development as a
+              certified reference material to meet the long term needs of the
+              community. Using a series of metrics that characterize LC-MS
+              performance, we provide a reference data set demonstrating
+              typical performance of commonly used ion trap instrument
+              platforms in expert laboratories; the results provide a basis for
+              laboratories to benchmark their own performance, to improve upon
+              current methods, and to evaluate new technologies. Additionally,
+              we demonstrate how the yeast reference, spiked with human
+              proteins, can be used to benchmark the power of proteomics
+              platforms for detection of differentially expressed proteins at
+              different levels of concentration in a complex matrix, thereby
+              providing a metric to evaluate and minimize pre-analytical and
+              analytical variation in comparative proteomics experiments.",
+  journal  = "Mol. Cell. Proteomics",
+  volume   =  9,
+  number   =  2,
+  pages    = "242--254",
+  month    =  feb,
+  year     =  2010,
+  language = "en"
+}
+
+@Article{Lazar:2016,
+  author = {Lazar, C and Gatto, L and Ferro, M and Bruley, C
+                 and Burger, T},
+  title = {Accounting for the Multiple Natures of Missing
+                 Values in Label-Free Quantitative Proteomics Data
+                 Sets to Compare Imputation Strategies.},
+  journal = {J Proteome Res},
+  year = {2016},
+  month = {Apr},
+  number = {4},
+  volume = {15},
+  pages = {1116-25},
+  doi = {10.1021/acs.jproteome.5b00981},
+  PMID = {26906401}
+}
+
+@Article{Cox:2008,
+  author =       {Cox, J and Mann, M},
+  title =        {MaxQuant enables high peptide identification
+                 rates, individualized p.p.b.-range mass accuracies
+                 and proteome-wide protein quantification.},
+  journal =      {Nat Biotechnol},
+  year =         {2008},
+  month =        {Dec},
+  number =       {12},
+  volume =       {26},
+  pages =        {1367-72},
+  doi =          {10.1038/nbt.1511},
+  PMID =         {19029910}}
+
+@article{Morgenstern:2020,
+   author = {Morgenstern, David and Barzilay, Rotem and Levin, Yishai},
+   title = {{RawBeans}: A Simple, Vendor-Independent, Raw-Data Quality-Control Tool},
+   journal = {Journal of Proteome Research},
+   year = {2021},
+   doi = {10.1021/acs.jproteome.0c00956},
+   note ={PMID: 33657803},
+   URL = {https://doi.org/10.1021/acs.jproteome.0c00956},
+   eprint = {https://doi.org/10.1021/acs.jproteome.0c00956}
+}
+
+
+@ARTICLE{Vanderaa:2021,
+  title    = "Replication of single-cell proteomics data reveals important
+              computational challenges",
+  author   = "Vanderaa, Christophe and Gatto, Laurent",
+  abstract = "INTRODUCTION: Mass spectrometry-based proteomics is actively
+              embracing quantitative, single-cell level analyses. Indeed,
+              recent advances in sample preparation and mass spectrometry (MS)
+              have enabled the emergence of quantitative MS-based single-cell
+              proteomics (SCP). While exciting and promising, SCP still has
+              many rough edges. The current analysis workflows are custom and
+              built from scratch. The field is therefore craving for
+              standardized software that promotes principled and reproducible
+              SCP data analyses. AREAS COVERED: This special report is the
+              first step toward the formalization and standardization of SCP
+              data analysis. scp, the software that accompanies this work,
+              successfully replicates one of the landmark SCP studies and is
+              applicable to other experiments and designs. We created a
+              repository containing the replicated workflow with comprehensive
+              documentation in order to favor further dissemination and
+              improvements of SCP data analyses. EXPERT OPINION: Replicating
+              SCP data analyses uncovers important challenges in SCP data
+              analysis. We describe two such challenges in detail: batch
+              correction and data missingness. We provide the current
+              state-of-the-art and illustrate the associated limitations. We
+              also highlight the intimate dependence that exists between batch
+              effects and data missingness and offer avenues for dealing with
+              these exciting challenges.",
+  journal  = "Expert Rev. Proteomics",
+  month    =  oct,
+  year     =  2021,
+  keywords = "Bioconductor; R; batch correction; imputation; mass spectrometry;
+              proteomics; replication; reproducible research; single-cell;
+              software",
+  language = "en"
+}
+
+
+@ARTICLE{Rainer:2022,
+  title     = "A Modular and Expandable Ecosystem for Metabolomics Data
+               Annotation in {R}",
+  author    = "Rainer, Johannes and Vicini, Andrea and Salzer, Liesa and
+               Stanstrup, Jan and Badia, Josep M and Neumann, Steffen and
+               Stravs, Michael A and Verri Hernandes, Vinicius and Gatto,
+               Laurent and Gibb, Sebastian and Witting, Michael",
+  abstract  = "Liquid chromatography-mass spectrometry (LC-MS)-based untargeted
+               metabolomics experiments have become increasingly popular
+               because of the wide range of metabolites that can be analyzed
+               and the possibility to measure novel compounds. LC-MS
+               instrumentation and analysis conditions can differ substantially
+               among laboratories and experiments, thus resulting in
+               non-standardized datasets demanding customized annotation
+               workflows. We present an ecosystem of R packages, centered
+               around the MetaboCoreUtils, MetaboAnnotation and CompoundDb
+               packages that together provide a modular infrastructure for the
+               annotation of untargeted metabolomics data. Initial annotation
+               can be performed based on MS1 properties such as m/z and
+               retention times, followed by an MS2-based annotation in which
+               experimental fragment spectra are compared against a reference
+               library. Such reference databases can be created and managed
+               with the CompoundDb package. The ecosystem supports data from a
+               variety of formats, including, but not limited to, MSP, MGF,
+               mzML, mzXML, netCDF as well as MassBank text files and SQL
+               databases. Through its highly customizable functionality, the
+               presented infrastructure allows to build reproducible annotation
+               workflows tailored for and adapted to most untargeted
+               LC-MS-based datasets. All core functionality, which supports
+               base R data types, is exported, also facilitating its re-use in
+               other R packages. Finally, all packages are thoroughly
+               unit-tested and documented and are available on GitHub and
+               through Bioconductor.",
+  journal   = "Metabolites",
+  publisher = "Multidisciplinary Digital Publishing Institute",
+  volume    =  12,
+  number    =  2,
+  pages     = "173",
+  month     =  feb,
+  year      =  2022,
+  language  = "en"
+}
+
+@article{Sinha:2020,
+    author = {Sinha, Ankit and Mann, Matthias},
+    title = "{A beginner’s guide to mass spectrometry–based proteomics}",
+    journal = {The Biochemist},
+    year = {2020},
+    month = {09},
+    abstract = "{Mass spectrometry (MS)-based proteomics is the most
+                  comprehensive approach for the quantitative
+                  profiling of proteins, their interactions and
+                  modifications. It is a challenging topic as a firm
+                  grasp requires expertise in biochemistry for sample
+                  preparation, analytical chemistry for
+                  instrumentation and computational biology for data
+                  analysis. In this short guide, we highlight the
+                  various components of a mass spectrometer, the
+                  sample preparation process for conversion of
+                  proteins into peptides, and quantification and
+                  analysis strategies. The advancing technology of
+                  MS-based proteomics now opens up opportunities in
+                  clinical applications and single-cell analysis.}",
+    issn = {0954-982X},
+    doi = {10.1042/BIO20200057},
+    url = {https://doi.org/10.1042/BIO20200057},
+    note = {BIO20200057},
+    eprint = {https://portlandpress.com/biochemist/article-pdf/doi/10.1042/BIO20200057/892770/bio20200057.pdf},
+}
+
+@Article{Steen:2004,
+  title    = "The {ABC's} (and {XYZ's}) of peptide sequencing",
+  author   = "Steen, Hanno and Mann, Matthias",
+  abstract = "Proteomics is an increasingly powerful and indispensable
+              technology in molecular cell biology. It can be used to identify
+              the components of small protein complexes and large organelles,
+              to determine post-translational modifications and in
+              sophisticated functional screens. The key - but little understood
+              - technology in mass-spectrometry-based proteomics is peptide
+              sequencing, which we describe and review here in an easily
+              accessible format.",
+  journal  = "Nat. Rev. Mol. Cell Biol.",
+  volume   =  5,
+  number   =  9,
+  pages    = "699--711",
+  month    =  sep,
+  year     =  2004,
+  language = "en"
+}
+
+@ARTICLE{Marcotte:2007,
+  title    = "How do shotgun proteomics algorithms identify proteins?",
+  author   = "Marcotte, Edward M",
+  journal  = "Nat. Biotechnol.",
+  volume   =  25,
+  number   =  7,
+  pages    = "755--757",
+  month    =  jul,
+  year     =  2007,
+  language = "en"
+}
+
+
+@ARTICLE{Shuken:2023,
+  title    = "An Introduction to Mass {Spectrometry-Based} Proteomics",
+  author   = "Shuken, Steven R",
+  abstract = "Mass spectrometry is unmatched in its versatility for studying
+              practically any aspect of the proteome. Because the foundations
+              of mass spectrometry-based proteomics are complex and span
+              multiple scientific fields, proteomics can be perceived as having
+              a high barrier to entry. This tutorial is intended to be an
+              accessible illustrated guide to the technical details of a
+              relatively simple quantitative proteomic experiment. An attempt
+              is made to explain the relevant concepts to those with limited
+              knowledge of mass spectrometry and a basic understanding of
+              proteins. An experimental overview is provided, from the
+              beginning of sample preparation to the analysis of protein group
+              quantities, with explanations of how the data are acquired,
+              processed, and analyzed. A selection of advanced topics is
+              briefly surveyed and works for further reading are cited. To
+              conclude, a brief discussion of the future of proteomics is
+              given, considering next-generation protein sequencing
+              technologies that may complement mass spectrometry to create a
+              fruitful future for proteomics.",
+  journal  = "J. Proteome Res.",
+  month    =  jun,
+  year     =  2023,
+  keywords = "bottom-up; data-dependent acquisition; label-free quantification;
+              mass spectrometry; proteomics; untargeted proteomics",
+  language = "en"
+}
diff --git a/inst/assets/book.scss b/inst/assets/book.scss
new file mode 100644
index 0000000..174271f
--- /dev/null
+++ b/inst/assets/book.scss
@@ -0,0 +1,249 @@
+/*-- scss:defaults --*/
+
+$primary: #070707 !default;
+$body-color: #070707 !default;
+
+/*-- scss:rules --*/
+
+/* ------------------------------------------------------ */
+/* ------------------------------------------------------ */
+/* ------------------ CUSTOM RULES ---------------------- */
+/* ------------------------------------------------------ */
+/* ------------------------------------------------------ */
+
+// Add any custom css styling here... 
+
+/* Callout  ------------------------------------------------ */
+
+$icon: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-chat-left-text" viewBox="0 0 16 16"><path d="M14 1a1 1 0 0 1 1 1v8a1 1 0 0 1-1 1H4.414A2 2 0 0 0 3 11.586l-2 2V2a1 1 0 0 1 1-1h12zM2 0a2 2 0 0 0-2 2v12.793a.5.5 0 0 0 .854.353l2.853-2.853A1 1 0 0 1 4.414 12H14a2 2 0 0 0 2-2V2a2 2 0 0 0-2-2H2z"/><path d="M3 3.5a.5.5 0 0 1 .5-.5h9a.5.5 0 0 1 0 1h-9a.5.5 0 0 1-.5-.5zM3 6a.5.5 0 0 1 .5-.5h9a.5.5 0 0 1 0 1h-9A.5.5 0 0 1 3 6zm0 2.5a.5.5 0 0 1 .5-.5h5a.5.5 0 0 1 0 1h-5a.5.5 0 0 1-.5-.5z"/></svg>') !default;
+
+div.callout-question.callout {
+  border-left-color: #0df0a1;
+}
+
+div.callout-question.callout-style-default>.callout-header {
+  background-color: #bfebce;
+}
+
+.callout-question.icon.callout-style-default div.callout-icon-container {
+  padding-top: 0.1em;
+  padding-right: 0.35em;
+}
+
+div.callout-answer.callout {
+  border-left-color: #0dcaf0;
+}
+
+div.callout-answer.callout-style-default>.callout-header {
+  background-color: #bfe4eb;
+}
+
+.callout-answer.icon .callout-icon {
+  display: unset !important;
+}
+
+div.callout-answer.icon.callout-captioned .callout-icon::before {
+  background-image: $icon;
+}
+
+.callout-answer.icon.callout-style-default div.callout-icon-container {
+  padding-top: 0.1em;
+  padding-right: 0.35em;
+}
+
+
+/* ------------------------------------------------------ */
+/* ------------------------------------------------------ */
+/* ----------------- DEFAULT RULES ---------------------- */
+/* ------------------------------------------------------ */
+/* ------------------------------------------------------ */
+
+code { color: #070707; }
+
+/* Code chunks ------------------------------------------ */
+
+div.sourceCode {
+  background-color: #ffffff00;
+  border: 2px;
+  border-radius: 8px;
+  box-shadow: 0 0 0 0 rgba(0, 0, 0, 0.06), 0 2px 5px 0 rgba(0, 0, 0, 0.06), 0 10px 10px 0 rgba(0, 0, 0, 0.05), 0 22px 13px 0 rgba(0, 0, 0, 0.03), 0 39px 16px 0 rgba(0, 0, 0, 0.01), 0 61px 17px 0 rgba(0, 0, 0, 0);
+}
+
+pre.sourceCode.r, code.sourceCode.r {
+  
+  $border: 2px;
+  color: #070707;
+  background: #FFF;
+  background-clip: padding-box; /* !importanté */
+  border: solid $border transparent; /* !importanté */
+  border-radius: 8px;
+  
+  &:before {
+    content: '';
+    position: absolute;
+    top: 0; right: 0; bottom: 0; left: 0;
+    z-index: -1;
+    margin: -$border; /* !importanté */
+    border-radius: inherit; /* !importanté */
+    background: linear-gradient(to right, #18a603, #0484a9, #0087af);
+  }
+
+}
+
+/* Chapter label ---------------------------------------- */
+
+.chapter-number::before {
+  content: "Chapter ";
+}
+
+.chapter-number::after {
+  content: " –";
+}
+
+/* Cover image ------------------------------------------ */
+
+.quarto-cover-image {
+  max-width: 250px;
+  float: right;
+  margin-left: 30px;
+  margin-top: -30px;
+  margin-right: 10%;
+}
+
+/* Left navbar ------------------------------------------ */
+
+div.sidebar-item-container .active, div.sidebar-item-container .show>.nav-link, div.sidebar-item-container .sidebar-link>code {
+  font-weight: 800;
+}
+
+.sidebar-title {
+  font-weight: 800;
+  background: linear-gradient(to right, #18a603, #0484a9, #0087af);
+  -webkit-background-clip: text;
+  -webkit-text-fill-color: transparent;
+}
+
+.sidebar-tools-main {
+  font-weight: normal;
+  background: white;
+  color: black;
+  -webkit-background-clip: text;
+  -webkit-text-fill-color: black;
+}
+
+.sidebar-navigation li a {
+  text-decoration: underline;
+}
+
+.text-start {
+  text-align: left !important;
+  font-weight: 800;
+}
+
+#quarto-sidebar {
+  transition: width .15s ease-in;
+  padding: 14px 10%;
+  background-color: white;
+}
+
+.sidebar.sidebar-navigation:not(.rollup) {
+  border-right: 0px !important
+}
+
+.sidebar-navigation .sidebar-item {
+  font-size: 1rem;
+  line-height: 2em;
+}
+
+.sidebar-menu-container {
+  border: solid #add2dd 1px;
+  border-radius: 8px;
+  padding: 8px;
+  margin-top: 25px;
+}
+
+/* right navbar ----------------------------------------- */
+
+.sidebar nav[role=doc-toc] ul>li>a.active, .sidebar nav[role=doc-toc] ul>li>ul>li>a.active {
+  border-left: 4px solid #3792ad;
+  font-weight: 800;
+}
+
+#toc-title+ ul > li > .nav-link {
+  font-weight: 800;
+}
+
+/* Headings and text styling --------------------------- */
+
+h1 {
+  font-size: 36px;
+  color: #070707;
+  font-weight: 700;
+  border-image: linear-gradient(to right, #18a603, #0484a9, #0087af) 1;
+  border-bottom-style: solid;
+  border-bottom-width: 4px;
+}
+h2 {
+  margin-top: 3rem;
+  margin-bottom: 1rem;
+  font-size: 32px;
+  border-bottom: 0px;
+}
+h3 { margin-top: 1.5em; font-size: 1.2rem; }
+h4 { margin-top: 1.5em; font-size: 1.1rem; }
+h5 { margin-top: 1.5em; font-size: 1rem; }
+
+h1, .h1, h2, .h2, h3, .h3, h4, .h4, h5, .h5 {
+  line-height: 120%;
+  margin: 0 0 1rem;
+  width: fit-content;
+  padding-top: 0.5rem;
+  color: #070707;
+}
+
+p {
+  margin: 0 0 1rem;
+  font-size: 1rem;
+  color: #070707;
+  line-height: 130%;
+  display: block;
+  margin-block-start: 1em;
+  margin-block-end: 1em;
+  margin-inline-start: 0px;
+  margin-inline-end: 0px;
+}
+
+.quarto-section-identifier {
+  color: #070707;
+  font-weight: normal;
+}
+
+ul li::marker {
+  color: #3792ad; 
+}
+
+/* Underlining links ------------------------------------ */
+
+.citation a, .footnote-ref {
+  text-decoration: underline;
+}
+
+/* Printing --------------------------------------------- */
+
+@media print {
+  :root {
+    font-size: 11pt;
+  }
+  #quarto-sidebar, #TOC, .nav-page {
+    display: none;
+  }
+  .page-columns .content {
+    grid-column-start: page-start;
+  }
+  .fixed-top {
+    position: relative;
+  }
+  .panel-caption, .figure-caption, figcaption {
+    color: #666;
+  }
+}
diff --git a/inst/assets/cover.png b/inst/assets/cover.png
new file mode 100644
index 0000000..0c59537
Binary files /dev/null and b/inst/assets/cover.png differ
diff --git a/inst/assets/favicon.png b/inst/assets/favicon.png
new file mode 100644
index 0000000..d36072a
Binary files /dev/null and b/inst/assets/favicon.png differ
diff --git a/inst/extensions/pandoc-ext/section-bibliographies/_extension.yml b/inst/extensions/pandoc-ext/section-bibliographies/_extension.yml
new file mode 100644
index 0000000..2f3e8c7
--- /dev/null
+++ b/inst/extensions/pandoc-ext/section-bibliographies/_extension.yml
@@ -0,0 +1,6 @@
+name: section-bibliographies
+author: Albert Krewinkel
+version: 0.0.1
+contributes:
+  filters:
+    - section-bibliographies.lua
diff --git a/inst/extensions/pandoc-ext/section-bibliographies/section-bibliographies.lua b/inst/extensions/pandoc-ext/section-bibliographies/section-bibliographies.lua
new file mode 100644
index 0000000..957aad3
--- /dev/null
+++ b/inst/extensions/pandoc-ext/section-bibliographies/section-bibliographies.lua
@@ -0,0 +1,148 @@
+--- greetings.lua – turns any document into a friendly greeting
+---
+--- Copyright: © 2018 Jesse Rosenthal, 2020–2022 Albert Krewinkel
+--- License: MIT – see LICENSE for details
+
+-- pandoc.utils.make_sections exists since pandoc 2.8
+PANDOC_VERSION:must_be_at_least {2,8}
+
+local utils = require 'pandoc.utils'
+local run_json_filter = utils.run_json_filter
+
+--- The document's metadata
+local meta
+-- Lowest level at which bibliographies should be generated.
+local section_refs_level
+-- original bibliography value
+local orig_bibliography
+
+-- Returns true iff a div is a section div.
+local function is_section_div (div)
+  return div.t == 'Div'
+    and div.classes[1] == 'section'
+    and div.attributes.number
+end
+
+local function section_header (div)
+  local header = div.content and div.content[1]
+  local is_header = is_section_div(div)
+    and header
+    and header.t == 'Header'
+  return is_header and header or nil
+end
+
+local function adjust_refs_components (div)
+  local header = section_header(div)
+  if not header then
+    return div
+  end
+  local blocks = div.content
+  local bib_header = blocks:find_if(function (b)
+      return b.attr and b.identifier == 'bibliography'
+  end)
+  local refs = blocks:find_if(function (b)
+      return b.attr and b.identifier == 'refs'
+  end)
+  if bib_header then
+    bib_header.identifier = 'bibliography-' .. header.attributes.number
+    bib_header.level = header.level + 1
+  end
+  if refs and refs.identifier == 'refs' then
+    refs.identifier = 'refs-' .. header.attributes.number
+  end
+  return div
+end
+
+local function run_citeproc (doc)
+  if PANDOC_VERSION >= '2.19.1' then
+    return pandoc.utils.citeproc(doc)
+  elseif PANDOC_VERSION >= '2.11' then
+    local args = {'--from=json', '--to=json', '--citeproc'}
+    return run_json_filter(doc, 'pandoc', args)
+  else
+    return run_json_filter(doc, 'pandoc-citeproc', {FORMAT, '-q'})
+  end
+end
+
+--- Create a bibliography for a given topic. This acts on all
+-- section divs at or above `section_refs_level`
+local function create_section_bibliography (div)
+  -- don't do anything if there is no bibliography
+  if not meta.bibliography and not meta.references then
+    return nil
+  end
+  local header = section_header(div)
+  -- Blocks for which a bibliography will be generated
+  local subsections
+  local blocks
+  if not header or section_refs_level < header.level then
+    -- Don't do anything for lower level sections.
+    return nil
+  elseif section_refs_level == header.level then
+    blocks = div.content
+    subsections = pandoc.List:new{}
+  else
+    blocks = div.content:filter(function (b)
+        return not is_section_div(b)
+    end)
+    subsections = div.content:filter(is_section_div)
+  end
+  local tmp_doc = pandoc.Pandoc(blocks, meta)
+  local new_doc = run_citeproc(tmp_doc)
+  div.content = new_doc.blocks .. subsections
+  return adjust_refs_components(div)
+end
+
+--- Remove remaining section divs
+local function flatten_sections (div)
+  local header = section_header(div)
+  if not header then
+    return nil
+  else
+    header.identifier = div.identifier
+    header.attributes.number = nil
+    div.content[1] = header
+    return div.content
+  end
+end
+
+--- Filter to the references div and bibliography header added by
+--- pandoc-citeproc.
+local remove_pandoc_citeproc_results = {
+  Header = function (header)
+    return header.identifier == 'bibliography'
+      and {}
+      or nil
+  end,
+  Div = function (div)
+    return div.identifier == 'refs'
+      and {}
+      or nil
+  end
+}
+
+local function restore_bibliography (meta)
+  meta.bibliography = orig_bibliography
+  return meta
+end
+
+--- Setup the document for further processing by wrapping all
+--- sections in Div elements.
+function setup_document (doc)
+  -- save meta for other filter functions
+  meta = doc.meta
+  section_refs_level = tonumber(meta["section-bibs-level"]) or 1
+  orig_bibliography = meta.bibliography
+  meta.bibliography = meta['section-bibs-bibliography'] or meta.bibliography
+  local sections = utils.make_sections(true, nil, doc.blocks)
+  return pandoc.Pandoc(sections, doc.meta)
+end
+
+return {
+  -- remove result of previous pandoc-citeproc run (for backwards
+  -- compatibility)
+  remove_pandoc_citeproc_results,
+  {Pandoc = setup_document},
+  {Div = create_section_bibliography},
+  {Div = flatten_sections, Meta = restore_bibliography}
+}
diff --git a/inst/extensions/tools-tabset-ext/tools-tabset.lua b/inst/extensions/tools-tabset-ext/tools-tabset.lua
new file mode 100644
index 0000000..6b11e00
--- /dev/null
+++ b/inst/extensions/tools-tabset-ext/tools-tabset.lua
@@ -0,0 +1,39 @@
+
+
+local kTabsetIcons = {
+  ["VS Code"] = "vscode-logo.jpg",
+  ["R"] = "rstudio-logo.jpg",
+  ["Terminal"] = "text-editor-logo.jpg"
+}
+
+local injected = false
+local function injectChooseYourTool()
+  if not injected then
+    injected = true
+    quarto.doc.include_text('after-body', [[
+      <script type="text/javascript">
+        for (const navTab of document.querySelectorAll(".panel-tabset[data-group='tools-tabset'] ul")) {
+          const choose = document.createElement("h3");
+          choose.classList.add("no-anchor");
+          choose.classList.add("choose-your-tool");
+          choose.innerText = "Choose your tool";
+          navTab.prepend(choose);
+        }       
+      </script>
+    ]])
+  end
+end
+
+function Tabset(el)
+  if el.attr.attributes["group"] == "tools-tabset" then
+    injectChooseYourTool()
+    for i, tab in ipairs(el.tabs) do
+      local text = pandoc.utils.stringify(tab.title)
+      local icon = kTabsetIcons[text]
+      if icon then
+        tab.title.content:insert(1, pandoc.Image("", "/pages/images/" .. icon))
+      end
+    end
+  end
+  return el
+end
\ No newline at end of file
diff --git a/index.Rmd b/inst/index.qmd
similarity index 69%
rename from index.Rmd
rename to inst/index.qmd
index fb8478c..eacc643 100644
--- a/index.Rmd
+++ b/inst/index.qmd
@@ -1,25 +1,37 @@
 ---
-title: "R for Mass Spectrometry"
-subtitle: "Applications in Proteomics and Metabolomics"
-author: "Laurent Gatto, Sebastian Gibb, Johannes Rainer"
-date: "`r Sys.Date()`"
-output:
-  msmbstyle::msmb_html_book:
-    highlight: tango
-    toc: TRUE
-    toc_depth: 1
-    split_by: chapter
-    margin_references: TRUE
-    css: style.css
-bibliography: [refs.bib, packages.bib]
-link-citations: yes
+license: "CC BY-SA"
 ---
 
-# Preamble
+```{r "intro"}
+#| echo: false
+intro <- tryCatch(
+    {
+        description <- packageDescription("R4MS")
+        pkg <- description$Package
+        version <- description$Version
+        authors <- eval(parse(text = description$Authors))
+        license <- description$License
+        glue::glue(
+            "**Package:** {pkg}<br/>\n", 
+            "**Authors:** {paste(format(authors, include = c('given', 'family', 'role')), collapse = ', ')}<br/>\n", 
+            "**Compiled:** {as.character(Sys.Date())}<br/>\n", 
+            "**Package version:** {version}<br/>\n", 
+            "**R version:** <b style='color:red;'>{R.version.string}</b><br/>\n", 
+            "**BioC version:** <b style='color:red;'>{BiocManager::version()}</b><br/>\n", 
+            "**Package license:** {license}<br/>\n",
+            "**Book license:** CC BY-NC-SA<br/>"
+        )
+    }, 
+    error = function(e) {"Local preview"}
+)
+```
+
+`r intro`
+
+# Welcome {-}
 
 ```{r, echo = FALSE}
 options(bitmapType="cairo")
-
 ```
 
 The aim of the [R for Mass
@@ -32,10 +44,6 @@ development efforts of its core members under the RforMassSpectrometry
 organisation to facilitate dissemination and accessibility of their
 work.
 
-```{r sticker, fig.cap = "The *R for Mass Spectrometry* intiative sticker, designed by Johannes Rainer.", out.width = '50%', fig.margin=TRUE, echo=FALSE}
-knitr::include_graphics("https://github.com/rformassspectrometry/stickers/raw/master/sticker/RforMassSpectrometry.png")
-```
-
 This material introduces participants to the analysis and exploration
 of mass spectrometry (MS) based proteomics data using R and
 Bioconductor. The course will cover all levels of MS data, from raw
@@ -66,12 +74,6 @@ data structures such as data frames, vectors, matrices, ... and their
 manipulation) is required. Familiarity with other Bioconductor omics
 data classes and the tidyverse syntax is useful, but not necessary.
 
-
-```{r bib, include=FALSE}
-# create a bib file for the R packages used in this document
-knitr::write_bib(c('base', 'rmarkdown', 'bookdown', 'msmbstyle'), file = 'skeleton.bib')
-```
-
 ```{r env_0, echo = FALSE, message = FALSE, warning = FALSE}
 suppressPackageStartupMessages(library("BiocStyle"))
 suppressPackageStartupMessages(library("mzR"))
@@ -107,7 +109,6 @@ BiocManager::install("PSMatch")
 BiocManager::install("pheatmap")
 BiocManager::install("limma")
 BiocManager::install("MSnID")
-BiocManager::install("RforMassSpectrometry/SpectraVis")
 ```
 
 Follow the instructions in [this
@@ -116,25 +117,6 @@ to install the packages and download some of the data used in the
 following chapters. All software versions used to generate this
 document are recoded at the end of the book in \@ref(sec-si).
 
-To compile and render the teaching material, you will also need
-the `r BiocStyle::Biocpkg("BiocStyle")` package and the (slighly
-modified) [Modern Statistics for Model Biology (msmb) HTML Book
-Style](https://www-huber.embl.de/users/msmith/msmbstyle/) by Mike
-Smith:
-
-```{r setup2, eval = FALSE}
-BiocManager::install(c("bookdown", "BiocStyle", "lgatto/msmbstyle"))
-```
-
-Run the [installation
-script](https://github.com/rformassspectrometry/docs/blob/main/install_docs_deps.R)
-by executing the line below to install all requirements to compile the
-book:
-
-```{r source, eval = FALSE}
-source("https://raw.githubusercontent.com/rformassspectrometry/docs/main/install_docs_deps.R")
-```
-
 ## Acknowledgments {-}
 
 Thank you to [Charlotte Soneson](https://github.com/csoneson) for
@@ -153,3 +135,57 @@ Attribution-ShareAlike 4.0 International License</a>. You are free to
 and **adapt** (remix, transform, and build upon the material) for any
 purpose, even commercially, as long as you give appropriate credit and
 distribute your contributions under the same license as the original.
+
+
+# Docker image {-}
+
+A `Docker` image built from this repository is available here: 
+
+👉 [ghcr.io/js2264/r4ms](https://ghcr.io/js2264/r4ms) 🐳
+
+::: {.callout-tip icon='true'}
+## Get started now 🎉
+
+You can get access to all the packages used in this book in < 1 minute, 
+using this command in a terminal: 
+
+```{sh "docker", filename="bash"}
+#| eval: false
+docker run -it ghcr.io/js2264/r4ms:devel R
+```
+
+:::
+
+# RStudio Server {-}
+
+An RStudio Server instance can be initiated from the `Docker` image as follows: 
+
+```{sh "rstudio", filename="bash"}
+#| eval: false
+docker run \
+    --volume <local_folder>:<destination_folder> \
+    -e PASSWORD=OHCA \
+    -p 8787:8787 \
+    ghcr.io/js2264/r4ms:devel
+```
+
+The initiated RStudio Server instance will be available at 
+[https://localhost:8787](https://localhost:8787).
+
+# Session info {-}
+
+::: {.callout-note collapse="true"}
+
+## Click to expand 👇
+
+```{r "session info"}
+#| cache: false
+sessioninfo::session_info(
+    installed.packages()[,"Package"], 
+    include_base = TRUE
+)
+```
+
+:::
+
+# References {-}
diff --git a/20-id.Rmd b/inst/pages/20-id.qmd
similarity index 84%
rename from 20-id.Rmd
rename to inst/pages/20-id.qmd
index ac65891..ea37dcb 100644
--- a/20-id.Rmd
+++ b/inst/pages/20-id.qmd
@@ -11,17 +11,18 @@ the exact command.
 The example below illustrates this for 3 mzML files to be searched
 using `MSGFplus`:
 
-```{r msgf, eval = TRUE}
-(mzmls <- paste0("file_", 1:3, ".mzML"))
-(mzids <- sub("mzML", "mzid", mzmls))
+```{r msgf}
+mzmls <- paste0("file_", 1:3, ".mzML")
+mzids <- sub("mzML", "mzid", mzmls)
 
-paste0("java -jar /path/to/MSGFPlus.jar",
+cmds <- paste0("java -jar /path/to/MSGFPlus.jar",
        " -s ", mzmls,
        " -o ", mzids,
        " -d uniprot.fas",
        " -t 20ppm",
        " -m 0",
        " int 1")
+cmds
 ```
 
 ## Identification data.frame
@@ -55,14 +56,12 @@ n_scans <- length(unique(id$spectrumID))
 n_seqs <- length(unique(id$sequence))
 ```
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 Verify that this table contains `r n_matches` matches for `r n_scans`
 scans and `r n_seqs` peptides sequences.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r}
 nrow(id) ## number of matches
@@ -70,7 +69,9 @@ length(unique(id$spectrumID)) ## number of scans
 length(unique(id$sequence))   ## number of peptide sequences
 ```
 
-`r msmbstyle::solution_end()`
+:::
+
+:::
 
 The PSM data are read as is, without any filtering. As we can see
 below, we still have all the hits from the forward and reverse (decoy)
@@ -93,7 +94,6 @@ table(table(id$spectrumID))
 Below, we can see how scan 1774 has 4 matches, all to sequence
 `RTRYQAEVR`, which itself matches to 4 different proteins:
 
-
 ```{r}
 i <- which(id$spectrumID == "controllerType=0 controllerNumber=1 scan=1774")
 data.frame(id[i, ])[1:5]
@@ -138,44 +138,36 @@ Here, the `filter()` from the `dplyr` package comes very handy. We
 will thus start by converting the `DataFrame` to a `tibble`.
 
 ```{r, message = FALSE}
-library("dplyr")
+library(dplyr)
 id_tbl <- tidyr::as_tibble(id)
 id_tbl
 ```
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 - Remove decoy hits
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r, message = FALSE}
-id_tbl <- id_tbl %>%
+id_tbl <- id_tbl |> 
     filter(!isDecoy)
 id_tbl
 ```
-`r msmbstyle::solution_end()`
 
-`r msmbstyle::question_begin()`
+:::
 
 - Keep first rank matches
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
-
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r}
-id_tbl <- id_tbl %>%
+id_tbl <- id_tbl |> 
     filter(rank == 1)
 id_tbl
 ```
-`r msmbstyle::solution_end()`
 
-
-`r msmbstyle::question_begin()`
+:::
 
 - Remove shared peptides. Start by identifying scans that match
   different proteins. For example scan 4884 matches proteins
@@ -183,27 +175,28 @@ id_tbl
   `XXX_ECA4416_2` and `XXX_ECA4416_3`. Then remove the scans that
   match any of these proteins.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r}
 mltm <-
-    id_tbl %>%
-    group_by(spectrumID) %>%
-    mutate(nProts = length(unique(DatabaseAccess))) %>%
-    filter(nProts > 1) %>%
+    id_tbl |> 
+    group_by(spectrumID) |> 
+    mutate(nProts = length(unique(DatabaseAccess))) |> 
+    filter(nProts > 1) |> 
     select(spectrumID, nProts)
 mltm
 ```
+
 ```{r}
 id_tbl <-
-    id_tbl %>%
+    id_tbl |> 
     filter(!spectrumID %in% mltm$spectrumID)
 id_tbl
 ```
-`r msmbstyle::solution_end()`
 
+:::
+
+:::
 
 Which leaves us with `r nrow(id_tbl)` PSMs.
 
@@ -219,7 +212,6 @@ The `describePeptides()` and `describeProteins()` functions from the
 `PSMatch` package provide useful summaries of preptides and proteins
 in a PSM search result.
 
-
 - `describePeptides()` gives the number of unique and shared peptides
   and for the latter, the size of their protein groups:
 
@@ -239,28 +231,28 @@ matrices](https://rformassspectrometry.github.io/PSMatch/articles/AdjacencyMatri
 `PSMatch` vignette provides additional tools to explore how proteins
 were inferred from peptides.
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 Compare the distribution of raw identification scores of the decoy and
 non-decoy hits. Interpret the figure.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r}
 library(ggplot2)
-as_tibble(id) %>%
+as_tibble(id) |> 
     ggplot(aes(x = MS.GF.RawScore,
                colour = isDecoy)) +
     geom_density()
 ```
 
-`r msmbstyle::solution_end()`
+:::
 
-`r msmbstyle::question_begin()`
+:::
 
-The `r CRANpkg("tidyverse")`
+::: {.callout-question .icon .callout-note}
+
+The `r BiocStyle::CRANpkg("tidyverse")`
 tools are fit for data wrangling with identification data. Using the
 above identification dataframe, calculate the length of each peptide
 (you can use `nchar` with the peptide sequence `sequence`) and the
@@ -268,27 +260,22 @@ number of peptides for each protein (defined as
 `DatabaseDescription`). Plot the length of the proteins against their
 respective number of peptides.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
-
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r answid1, fig.cap = "Identifcation data wrangling."}
-suppressPackageStartupMessages(library("dplyr"))
-iddf <- as_tibble(id_filtered) %>%
+iddf <- as_tibble(id_filtered) |> 
     mutate(peplen = nchar(sequence))
-npeps <- iddf %>%
-    group_by(DatabaseAccess) %>%
-    tally
+npeps <- iddf |> 
+    group_by(DatabaseAccess) |> 
+    tally()
 iddf <- full_join(iddf, npeps)
 
-library("ggplot2")
 ggplot(iddf, aes(x = n, y = DBseqLength)) + geom_point()
 ```
 
-`r msmbstyle::solution_end()`
-
+:::
 
+:::
 
 If you would like to learn more about how the mzid data are handled by
 `PSMatch` via the `r BiocStyle::Biocpkg("mzR")` and `r BiocStyle::Biocpkg("mzID")`
@@ -300,19 +287,33 @@ packages, check out the \@ref(sec-id2) section in the annex.
 We are goind to use the `sp` object created in the previous chapter
 and the `id_filtered` variable generated above.
 
+::: {.callout-tip collapse="true"}
+
+## Generating the `sp` object from scratch  👇
+
+```{r}
+library(rpx)
+library(Spectra)
+fn <- "TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML"
+px <- PXDataset("PXD000001")
+mzf <- pxget(px, fn)
+sp <- Spectra(mzf)
+sp
+```
+
+:::
+
 Identification data (as a `DataFrame`) can be merged into raw data (as
 a `Spectra` object) by adding new spectra variables to the appropriate
 MS2 spectra. Scans and peptide-spectrum matches can be matched by
 their spectrum identifers.
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 Identify the spectum identifier columns in the `sp` the `id_filtered`
 variables.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 In the raw data, it is encoded as `spectrumId`, while in the
 identification data, we have `spectrumID`.
@@ -322,7 +323,9 @@ spectraVariables(sp)
 names(id_filtered)
 ```
 
-`r msmbstyle::solution_end()`
+:::
+
+:::
 
 We still have several PTMs that are matched to a single spectrum
 identifier:
@@ -334,10 +337,9 @@ table(table(id_filtered$spectrumID))
 Let's look at `"controllerType=0 controllerNumber=1 scan=5490"`, the
 has 4 matching PSMs in detail.
 
-
 ```{r}
 which(table(id_filtered$spectrumID) == 4)
-id_4 <- id_filtered[id_filtered$spectrumID == "controllerType=0 controllerNumber=1 scan=5490", ] %>%
+id_4 <- id_filtered[id_filtered$spectrumID == "controllerType=0 controllerNumber=1 scan=5490", ] |> 
     as.data.frame()
 id_4
 ```
@@ -353,14 +355,11 @@ Let's reduce that PSM table before joining it to the `Spectra` object,
 to make sure we have unique one-to-one matches between the raw spectra
 and the PSMs.
 
-
 ```{r, warning = FALSE}
 id_filtered <- reducePSMs(id_filtered, id_filtered$spectrumID)
 id_filtered
 ```
 
-
-
 These two data can thus simply be joined using:
 
 ```{r}
@@ -370,15 +369,12 @@ sp <- joinSpectraData(sp, id_filtered,
 spectraVariables(sp)
 ```
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 Verify that the identification data has been added to the correct
 spectra.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
-
+::: {.callout-answer .icon .callout-note collapse=true}
 
 Let's first verify that no identification data has been added to the
 MS1 scans.
@@ -400,8 +396,10 @@ Let's compare the precursor/peptide mass to charges
 sp_2 <- sp_2[!is.na(sp_2$sequence)]
 summary(sp_2$precursorMz - sp_2$experimentalMassToCharge)
 ```
-`r msmbstyle::solution_end()`
 
+:::
+
+:::
 
 ## An identification-annotated chromatogram
 
@@ -417,7 +415,6 @@ either 1 or 0, depending on the presence of a sequence. For MS1 scans,
 the function will count the number of sequences for the descendant MS2
 scans, i.e. those produced from precursor ions from each MS1 scan.
 
-
 ```{r nSequence, cache = FALSE}
 sp <- countIdentifications(sp)
 ```
@@ -435,11 +432,11 @@ These data can also be visualised on the total ion chromatogram:
 
 ```{r nSequencePlot, fig.fullwidth = TRUE, fig.width = 8, fig.height = 4}
 sp |>
-filterMsLevel(1) |>
-spectraData() |>
-as_tibble() |>
-ggplot(aes(x = rtime,
-           y = totIonCurrent)) +
+    filterMsLevel(1) |>
+    spectraData() |>
+    as_tibble() |>
+    ggplot(aes(x = rtime,
+            y = totIonCurrent)) +
     geom_line(alpha = 0.25) +
     geom_point(aes(colour = ifelse(countIdentifications == 0,
                                    NA, countIdentifications)),
@@ -474,12 +471,10 @@ plotSpectra(sp[i], labels = addFragments,
             labelPos = 3, labelCol = "steelblue")
 ```
 
-
 When a precursor peptide ion is fragmented in a CID cell, it breaks at
 specific bonds, producing sets of peaks (*a*, *b*, *c* and *x*, *y*,
 *z*) that can be predicted.
 
-
 ```{r frag_img, results='markup', fig.margin=FALSE, fig.cap="Peptide fragmentation.", echo=FALSE, out.width = "80%"}
 knitr::include_graphics("img/frag.png")
 ```
@@ -497,34 +492,31 @@ calculateFragments(sp[i]$sequence)
 The `compareSpectra()` function can be used to compare spectra (by default,
 computing the normalised dot product).
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
-1. Create a new `Spectra` object containing the MS2 spectra with
-   sequences `"SQILQQAGTSVLSQANQVPQTVLSLLR"` and
-   `"TKGLNVMQNLLTAHPDVQAVFAQNDEMALGALR"`.
+Create a new `Spectra` object containing the MS2 spectra with
+sequences `"SQILQQAGTSVLSQANQVPQTVLSLLR"` and
+`"TKGLNVMQNLLTAHPDVQAVFAQNDEMALGALR"`.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r}
 k <- which(sp$sequence %in% c("SQILQQAGTSVLSQANQVPQTVLSLLR", "TKGLNVMQNLLTAHPDVQAVFAQNDEMALGALR"))
 sp_k <- sp[k]
 sp_k
 ```
-`r msmbstyle::solution_end()`
-
 
-`r msmbstyle::question_begin()`
+:::
 
-2. Calculate the `r length(sp_k)` by `r length(sp_k)` similarity
-   matrix between all spectra using `compareSpectra`. See the
-   `?Spectra` man page for details. Draw a heatmap of that matrix.
+:::
 
-`r msmbstyle::question_end()`
+::: {.callout-question .icon .callout-note}
 
-`r msmbstyle::solution_begin()`
+Calculate the `r length(sp_k)` by `r length(sp_k)` similarity
+matrix between all spectra using `compareSpectra`. See the
+`?Spectra` man page for details. Draw a heatmap of that matrix.
 
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r}
 mat <- compareSpectra(sp_k)
@@ -533,41 +525,37 @@ mat
 pheatmap::pheatmap(mat)
 ```
 
-`r msmbstyle::solution_end()`
+:::
 
-`r msmbstyle::question_begin()`
+:::
 
-3. Compare the spectra with the plotting function seen previously.
+::: {.callout-question .icon .callout-note}
 
-`r msmbstyle::question_end()`
+Compare the spectra with the plotting function seen previously.
 
-`r msmbstyle::solution_begin()`
-
-
-```{r}
-filterIntensity(sp_k, 1e3) %>% plotSpectra(main = sp_k$sequence)
-```
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r}
+filterIntensity(sp_k, 1e3) |>  plotSpectra(main = sp_k$sequence)
 par(mfrow = c(3, 1))
 plotSpectraMirror(sp_k[1], sp_k[2], main = "TK...")
 plotSpectraMirror(sp_k[3], sp_k[4], main = "SQ...")
 plotSpectraMirror(sp_k[3], sp_k[4], main = "SQ...")
 ```
-`r msmbstyle::solution_end()`
 
+:::
+
+:::
 
 ## Summary exercise
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 Download the 3 first mzML and mzID files from the
 [PXD022816](https://www.ebi.ac.uk/pride/archive/projects/PXD022816)
 project [@Morgenstern:2020].
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r}
 ## Getting data from PX/PRIDE
@@ -586,41 +574,41 @@ pxfiles(PXD022816)
 (mzmls <- pxget(PXD022816, grep("mzML", pxfiles(PXD022816))[1:3]))
 ```
 
-`r msmbstyle::solution_end()`
+:::
 
+:::
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 Generate a `Spectra` object and a table of filtered PSMs. Visualise
 the total ion chromatograms and check the quality of the
 identification data by comparing the density of the decoy and target
 PSMs id scores for each file.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r, message = FALSE}
 ## Loading raw data
-library("Spectra")
+library(Spectra)
 sp <- Spectra(mzmls)
 sp
 
 ## number of spectra per file
 table(basename(sp$dataOrigin))
+
 ## all levels are centroided
 table(sp$centroided, sp$msLevel)
 ```
 
 ```{r, message = FALSE, fig.width = 12}
-library("ggplot2")
-library("tidyr")
-library("magrittr")
+library(ggplot2)
+library(tidyr)
+library(magrittr)
 
 ## Chromatograms
-filterMsLevel(sp, 1) %>%
-    spectraData() %>%
-    as_tibble() %>%
+filterMsLevel(sp, 1) |> 
+    spectraData() |> 
+    as_tibble() |> 
     ggplot(aes(x = rtime,
                y = totIonCurrent,
                colour = basename(dataOrigin))) +
@@ -629,13 +617,13 @@ filterMsLevel(sp, 1) %>%
 
 ```{r, message = FALSE, fig.width = 12}
 ## Identification data
-library("PSMatch")
+library(PSMatch)
 id <- PSM(mzids)
 
 ## Number of PSMs per acquisition
 table(id$idFile)
 
-tidyr::as_tibble(id) %>%
+tidyr::as_tibble(id) |> 
     ggplot(aes(x = MetaMorpheus.score,
                colour = isDecoy)) +
     geom_density() +
@@ -647,17 +635,16 @@ id_filtered <- filterPSMs(id)
 max(id_filtered$PSM.level.q.value)
 ```
 
-`r msmbstyle::solution_end()`
+:::
 
+:::
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 Join the raw and identification data. Beware though that the joining
 must now be performed by spectrum ids and by files.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r}
 ## primary key for spectra
@@ -683,19 +670,19 @@ sp <- joinSpectraData(sp, id_filtered, by.x = "pkey")
 ## Number of MS2 scans with a PSM
 table(!is.na(filterMsLevel(sp, 2)$sequence))
 ```
-`r msmbstyle::solution_end()`
 
+:::
+
+:::
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 Extract the PSMs that have been matched to peptides from protein
 `O43175` and compare and cluster the scans. Hint: once you have
 created the smaller `Spectra` object with the scans of interest,
 switch to an in-memory backend to seed up the calculations.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r}
 sp_O43175 <- sp[which(sp$DatabaseAccess == "O43175")]
@@ -724,20 +711,19 @@ spectraData(sp_O43175[i])$modName
 plotSpectraMirror(sp_O43175[4], sp_O43175[9])
 plotSpectraMirror(sp_O43175[2], sp_O43175[10])
 ```
-`r msmbstyle::solution_end()`
 
+:::
+
+:::
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 Generate total ion chromatograms for each acquisition and annotate the
 MS1 scans with the number of PSMs using the `countIdentifications()`
 function, as shown above. The function will automatically perform the
 counts in parallel for each acquisition.
 
-`r msmbstyle::question_end()`
-
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r}
 sp <- countIdentifications(sp)
@@ -746,22 +732,23 @@ table(msLevel(sp), sp$countIdentifications)
 
 ```{r}
 sp |>
- filterMsLevel(1) |>
- spectraData() |>
- as_tibble() |>
- ggplot(aes(x = rtime,
-            y = totIonCurrent)) +
-     geom_line(alpha = 0.25) +
-     geom_point(aes(colour = ifelse(countIdentifications == 0,
-                                    NA, countIdentifications)),
-                size = 0.75,
-                alpha = 0.5) +
-     scale_colour_gradient(low = "orange", high = "red") +
-     facet_grid(sub("^.+_", "", basename(dataOrigin)) ~ .) +
-     labs(colour = "Number of ids")
+    filterMsLevel(1) |>
+    spectraData() |>
+    as_tibble() |>
+    ggplot(aes(x = rtime, y = totIonCurrent)) +
+    geom_line(alpha = 0.25) +
+    geom_point(aes(colour = ifelse(countIdentifications == 0,
+                                NA, countIdentifications)),
+            size = 0.75,
+            alpha = 0.5) +
+    scale_colour_gradient(low = "orange", high = "red") +
+    facet_grid(sub("^.+_", "", basename(dataOrigin)) ~ .) +
+    labs(colour = "Number of ids")
 ```
 
-`r msmbstyle::solution_end()`
+:::
+
+:::
 
 ## Exploration and Assessment of Identifications using `MSnID`
 
@@ -787,7 +774,7 @@ vignette. You can explore more with
 vignette("msnid_vignette", package = "MSnID")
 ```
 
-The `r Biocpkg("MSnID")` package can be used for post-search filtering
+The `r BiocStyle::Biocpkg("MSnID")` package can be used for post-search filtering
 of MS/MS identifications. One starts with the construction of an
 `MSnID` object that is populated with identification results that can
 be imported from a `data.frame` or from `mzIdenML` files. Here, we
@@ -803,7 +790,7 @@ add the identification result from our `mzid` file (there could of
 course be more than one).
 
 ```{r msnid1, warning = FALSE}
-library("MSnID")
+library(MSnID)
 msnid <- MSnID(".")
 msnid <- read_mzIDs(msnid, mzids)
 show(msnid)
@@ -817,7 +804,6 @@ Printing the `MSnID` object returns some basic information such as
 * Number of unique peptide sequences and corresponding FDR.
 * Number of unique proteins or amino acid sequence accessions and corresponding FDR.
 
-
 The package then enables to define, optimise and apply filtering based
 for example on missed cleavages, identification scores, precursor mass
 errors, etc. and assess PSM, peptide and protein FDR levels. To
@@ -836,7 +822,6 @@ names(msnid)
 Here, we summarise a few steps and redirect the reader to the
 package's vignette for more details:
 
-
 ### Analysis of peptide sequences
 
 Cleaning irregular cleavages at the termini of the peptides and
@@ -962,3 +947,5 @@ further processed and analysed using appropriate statistical tests.
 ```{r}
 head(psms(msnid))
 ```
+
+# References {-}
diff --git a/30-quant.Rmd b/inst/pages/30-quant.qmd
similarity index 90%
rename from 30-quant.Rmd
rename to inst/pages/30-quant.qmd
index 40e6502..6b6ce3d 100644
--- a/30-quant.Rmd
+++ b/inst/pages/30-quant.qmd
@@ -55,7 +55,6 @@ that sample.
 knitr::include_graphics("./img/chrompeaks.png")
 ```
 
-
 ### Labelled MS1: SILAC
 
 In SILAC quantitation, sample are grown in a medium that contains
@@ -67,7 +66,6 @@ peptides precursor peaks are systematically shifted compared to the
 light ones, and the ratio between the height of a heavy and light
 peaks can be used to calculate peptide and protein fold-changes.
 
-
 ```{r silab, echo=FALSE, out.width = "75%", fig.cap = "Silac quantitation. Figure credit Wikimedia Commons."}
 knitr::include_graphics("./img/Silac.png")
 ```
@@ -78,20 +76,19 @@ processing, data transformation and normalisation, missing values, and
 different underlying statistical models for the quantitative data
 (count data for spectral counting, continuous data for the others).
 
-
 In terms of raw data quantitation in R/Bioconductor, most efforts have
 been devoted to MS2-level quantitation. Label-free XIC quantitation
 has been addressed in the frame of metabolomics data processing by the
-`r Biocpkg("xcms")` infrastructure.
+`r BiocStyle::Biocpkg("xcms")` infrastructure.
 
 <!-- Below is a list of suggested packages for some common proteomics -->
 <!-- quantitation technologies: -->
 
-<!-- * Isobaric tagging (iTRAQ and TMT): `r Biocpkg("MSnbase")` and `r Biocpkg("isobar")`. -->
-<!-- * Label-free: `r Biocpkg("xcms")` (metabolomics). -->
-<!-- * Counting: `r Biocpkg("MSnbase")` and `r Biocpkg("MSnID")` for -->
+<!-- * Isobaric tagging (iTRAQ and TMT): `r BiocStyle::Biocpkg("MSnbase")` and `r BiocStyle::Biocpkg("isobar")`. -->
+<!-- * Label-free: `r BiocStyle::Biocpkg("xcms")` (metabolomics). -->
+<!-- * Counting: `r BiocStyle::Biocpkg("MSnbase")` and `r BiocStyle::Biocpkg("MSnID")` for -->
 <!--   peptide-spectrum matching confidence assessment. -->
-<!-- * `r Githubpkg("vladpetyuk/N14N15")` for heavy Nitrogen-labelled data. -->
+<!-- * `r BiocStyle::Githubpkg("vladpetyuk/N14N15")` for heavy Nitrogen-labelled data. -->
 
 ## QFeatures {#sec-qf}
 
@@ -124,7 +121,6 @@ biological entity of interest are the protein. As part of the data
 processing, we are thus required to **aggregate** low-level
 quantitative features into higher level data.
 
-
 ```{r featuresplot, fig.cap = "Conceptual representation of a `QFeatures` object and the aggregative relation between different assays.", echo = FALSE}
 par(mar = c(0, 0, 0, 0))
 plot(NA, xlim = c(0, 12), ylim = c(0, 20),
@@ -192,9 +188,8 @@ package. The class is derived from the Bioconductor
 `MultiAssayExperiment` [@MAE] (MAE) class. Let's start by loading the
 `QFeatures` package.
 
-
 ```{r, message = FALSE}
-library("QFeatures")
+library(QFeatures)
 ```
 
 Next, we load the `feat1` test data, which is composed of single
@@ -209,12 +204,21 @@ feat1
 Let's perform some simple operations to familiarise ourselves with the
 `QFeatures` class:
 
+::: {.callout-question .icon .callout-note}
+
 - Extract the sample metadata using the `colData()` accessor (like you
   have previously done with `SummarizedExperiment` objects).
 
+::: {.callout-answer .icon .callout-note collapse=true}
+
 ```{r cd}
 colData(feat1)
 ```
+
+:::
+
+:::
+
 We can also further annotate the experiment by adding columns to the `colData` slot:
 
 ```{r cd2}
@@ -223,21 +227,22 @@ feat1$Y <- c("Y1", "Y2")
 colData(feat1)
 ```
 
+::: {.callout-question .icon .callout-note}
+
 - Extract the first (and only) assay composing this `QFeatures` data
   using the `[[` operator (as you have done to extract elements of a
   list) by using the assay's index or name.
 
+::: {.callout-answer .icon .callout-note collapse=true}
+
 ```{r assay1}
 feat1[[1]]
 feat1[["psms"]]
 ```
 
-- Extract the `psms` assay's row data and quantitative values.
+:::
 
-```{r rd}
-assay(feat1[[1]])
-rowData(feat1[[1]])
-```
+:::
 
 ### Feature aggregation
 
@@ -285,7 +290,6 @@ assay(feat1[[2]])["IAEESNFPFIK", ]
 rowData(feat1[[2]])
 ```
 
-
 We can now aggregate the peptide-level data into a new protein-level
 assay using the `colMedians()` aggregation function.
 
@@ -298,7 +302,6 @@ feat1
 assay(feat1[["proteins"]])
 ```
 
-
 ### Subsetting and filtering
 
 The link between the assays becomes apparent when we now subset the
@@ -311,7 +314,6 @@ PSMs.
 feat1["ProtA", , ]
 ```
 
-
 The `filterFeatures()` function can be used to filter rows the assays
 composing a `QFeatures` object using the row data variables. We can
 for example retain rows that have a `pval` < 0.05, which would only
@@ -322,20 +324,21 @@ that assay.
 filterFeatures(feat1, ~ pval < 0.05)
 ```
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 As the message above implies, it is also possible to apply a filter to
 only the assays that have a filtering variables by setting the `keep`
 variables.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r ff1b}
 filterFeatures(feat1, ~ pval < 0.05, keep = TRUE)
 ```
-`r msmbstyle::solution_end()`
+
+:::
+
+:::
 
 On the other hand, if we filter assay rows for those that localise to
 the mitochondrion, we retain the relevant protein, peptides and PSMs.
@@ -344,20 +347,20 @@ the mitochondrion, we retain the relevant protein, peptides and PSMs.
 filterFeatures(feat1, ~ location == "Mitochondrion")
 ```
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 As an exercise, let's filter rows that do not localise to the
 mitochondrion.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r ff3}
 filterFeatures(feat1, ~ location != "Mitochondrion")
 ```
 
-`r msmbstyle::solution_end()`
+:::
+
+:::
 
 You can refer to the [*Quantitative features for mass spectrometry
 data*](https://rformassspectrometry.github.io/QFeatures/articles/QFeatures.html)
@@ -365,7 +368,6 @@ vignette and the `QFeatures` [manual
 page](https://rformassspectrometry.github.io/QFeatures/reference/QFeatures-class.html)
 for more details about the class.
 
-
 ## Creating `QFeatures` object
 
 ```{r loaddfr, echo = FALSE}
@@ -383,7 +385,6 @@ from [@Christoforou:2016]. The `ecol` argument specifies that columns
 `psms` in the returned `QFeatures` object, to reflect the nature of
 the data.
 
-
 ```{r readQFeatures}
 data(hlpsms)
 hl <- readQFeatures(hlpsms, ecol = 1:10, name = "psms")
@@ -395,8 +396,6 @@ name. The individual assays are stored as *SummarizedExperiment*
 object and further access its quantitative data and metadata using
 the `assay` and `rowData` functions.
 
-
-
 ```{r subsetassay}
 hl[[1]]
 hl[["psms"]]
@@ -445,15 +444,12 @@ available in the `msdata` package:
 basename(f <- msdata::quant(pattern = "cptac", full.names = TRUE))
 ```
 
-
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 Read these data in as either a `SummarizedExperiment` or a `QFeatures`
 object.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 From the names of the columns, we see that the quantitative columns,
 starting with `"Intensity."` (note the dot!) are at positions 56 to
@@ -477,17 +473,17 @@ cptac_se <- readSummarizedExperiment(f, ecol = i,
 cptac_se
 ```
 
-`r msmbstyle::solution_end()`
+:::
 
-`r msmbstyle::question_begin()`
+:::
+
+::: {.callout-question .icon .callout-note}
 
 Before proceeding, we are going to clean up the sample names by
 removing the unnecessary *Intensity* prefix and annotate the
 experiment in the object's `colData`.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r cptac_names}
 colnames(cptac_se) <- sub("I.+\\.", "", colnames(cptac_se))
@@ -495,17 +491,18 @@ cptac_se$condition <- sub("_[7-9]", "", colnames(cptac_se))
 cptac_se$id <- sub("^.+_", "", colnames(cptac_se))
 colData(cptac_se)
 ```
-`r msmbstyle::solution_end()`
 
-`r msmbstyle::question_begin()`
+:::
+
+:::
+
+::: {.callout-question .icon .callout-note}
 
 There are many row variables that aren't useful here. Get rid or all
 of them but `Sequence`, `Proteins`, `Leading.razor.protein`, `PEP`,
 `Score`, `Reverse`, and `Potential.contaminant`.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r vars}
 keep_var <- c("Sequence", "Proteins", "Leading.razor.protein", "PEP",
@@ -514,8 +511,9 @@ keep_var <- c("Sequence", "Proteins", "Leading.razor.protein", "PEP",
 rowData(cptac_se) <- rowData(cptac_se)[, keep_var]
 ```
 
-`r msmbstyle::solution_end()`
+:::
 
+:::
 
 ## Analysis pipeline
 
@@ -532,10 +530,10 @@ above.
 - Downstream analysis
 
 ```{r pkgs, message = FALSE}
-library("tidyverse")
-library("ggplot2")
-library("QFeatures")
-library("limma")
+library(tidyverse)
+library(ggplot2)
+library(QFeatures)
+library(limma)
 ```
 
 ### Missing values
@@ -565,7 +563,6 @@ instead of properly reporting missing values. We can use the
 `cptac_se` object and then explore the missing data patterns across
 columns and rows.
 
-
 ```{r na}
 cptac_se <- zeroIsNA(cptac_se)
 nNA(cptac_se)
@@ -618,7 +615,6 @@ table(nNA(cptac_se)$nNArows$nNA)
 cptac_se <- filterNA(cptac_se, pNA = 4/6)
 ```
 
-
 ### Imputation
 
 Imputation is the technique of replacing missing data with probable
@@ -631,12 +627,11 @@ to be imputed with [different types of imputation
 methods](https://rformassspectrometry.github.io/QFeatures/articles/Processing.html#imputation-1)
 [@Lazar:2016].
 
-
 ```{r miximp, echo = FALSE, fig.cap = "Mixed imputation method. Black cells represent presence of quantitation values and light grey corresponds to missing data. The two groups of interest are depicted in green and blue along the heatmap columns. Two classes of proteins are annotated on the left: yellow are proteins with randomly occurring missing values (if any) while proteins in brown are candidates for non-random missing value imputation."}
 data(se_na2)
 x <- assay(impute(se_na2, "zero"))
 x[x != 0] <- 1
-suppressPackageStartupMessages(library("gplots"))
+suppressPackageStartupMessages(library(gplots))
 heatmap.2(x, col = c("lightgray", "black"),
           scale = "none", dendrogram = "none",
           trace = "none", keysize = 0.5, key = FALSE,
@@ -648,14 +643,14 @@ heatmap.2(x, col = c("lightgray", "black"),
 ```{r lazar, fig.cap = "Effect of the nature of missing values on their imputation. Root-mean-square error (RMSE) observations standard deviation ratio (RSR), KNN and MinDet imputation. Lower (blue) is better.", echo = FALSE, out.width='100%'}
 knitr::include_graphics("./img/imp-sim.png")
 ```
+
 Generally, it is recommended to use **hot deck** methods (nearest
 neighbour (**left**), maximum likelihood, ...) when data are missing
 at random.Conversely, MNAR features should ideally be imputed with a
 **left-censor** (minimum value (**right**), but not zero, ...) method.
 
-
 There are various methods to perform data imputation, as described in
-`?impute`. The `r CRANpkg("imp4p")` package contains additional
+`?impute`. The `r BiocStyle::CRANpkg("imp4p")` package contains additional
 functionality, including some to estimate the randomness of missing
 data.
 
@@ -668,16 +663,14 @@ data(se_na2)
 impute(se_na2, method = "knn")
 ```
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 Following the example above, apply a mixed imputation, using knn for
 data missing at random and the zero imputation for data missing not at
 random. Hint: the `randna` variable defines which features are assumed
 to be missing at random.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r naex1, message = FALSE}
 impute(se_na2, "mixed",
@@ -685,9 +678,11 @@ impute(se_na2, "mixed",
        mar = "knn", mnar = "zero")
 ```
 
-`r msmbstyle::solution_end()`
+:::
 
-`r msmbstyle::question_begin()`
+:::
+
+::: {.callout-question .icon .callout-note}
 
 When assessing missing data imputation methods, such as in [Lazar et
 al. (2016)](https://pubs.acs.org/doi/abs/10.1021/acs.jproteome.5b00981),
@@ -697,10 +692,7 @@ method of choice, then quantifies the difference between original
 data, use this strategy to assess the difference between knn and
 Bayesian PCA imputation.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
-
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r naex2, cache = TRUE}
 imp1 <- impute(se_na2, method = "knn")
@@ -708,11 +700,12 @@ imp2 <- impute(se_na2, method = "bpca")
 summary(abs(assay(imp1)[is.na(assay(se_na2))] - assay(imp2)[is.na(assay(se_na2))]))
 summary(as.numeric(na.omit(assay(se_na2))))
 ```
-`r msmbstyle::solution_end()`
 
+:::
 
-`r msmbstyle::question_begin()`
+:::
 
+::: {.callout-question .icon .callout-note}
 
 When assessing the impact of missing value imputation on real data,
 one can't use the strategy above. Another useful approach is to assess
@@ -721,14 +714,11 @@ quantitative data. For instance, here is the intensity distribution of
 the `se_na2` data. Verify the effect of applying `knn`, `zero`,
 `MinDet` and `bpca` on this distribution.
 
-```{r nasetdist, fig.cap = "Intensity disctribution of the `naset` data."}
+```{r nasetdist, fig.cap = "Intensity disctribution of the naset data."}
 plot(density(na.omit(assay(se_na2))))
 ```
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
-
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r naex3, cache = TRUE}
 cls <- c("black", "red", "blue", "steelblue", "orange")
@@ -741,8 +731,9 @@ legend("topright", legend = c("orig", "knn", "zero", "MinDet", "bpca"),
        col = cls, lwd = 2, bty = "n")
 ```
 
-`r msmbstyle::solution_end()`
+:::
 
+:::
 
 **Tip**: When downstream analyses permit, it might be safer not to
 impute data and deal explicitly with missing values. Indeed missing
@@ -753,7 +744,6 @@ recommended to explore missingness as part of the exploratory data
 analysis.], but (generally) not to perform a principal component
 analysis.
 
-
 ### Identification quality control
 
 As discussed in the previous chapter, PSMs are deemed relevant after
@@ -774,36 +764,32 @@ table(rowData(cptac_se)$Potential.contaminant)
 Let's visualise some of the cptac's metadata using standard `ggplot2`
 code:
 
-
-`r msmbstyle::question_begin()`
-
+::: {.callout-question .icon .callout-note}
 
 Visualise the identification score and the posterior probability
 probability (PEP) distributions from forward and reverse hits and
 interpret the figure.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
-
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r idqc1}
-rowData(cptac_se) %>%
-    as_tibble() %>%
+rowData(cptac_se) |>
+    as_tibble() |>
     ggplot(aes(x = Score, colour = Reverse)) +
     geom_density()
 
 ```
 
 ```{r idqc2}
-rowData(cptac_se) %>%
-    as_tibble() %>%
+rowData(cptac_se) |>
+    as_tibble() |>
     ggplot(aes(x = PEP, colour = Reverse)) +
     geom_density()
 ```
 
-`r msmbstyle::solution_end()`
+:::
 
+:::
 
 **Note**: it is also possible to compute and visualise protein groups
 as connected components starting from a quantitative dataset such as a
@@ -835,31 +821,29 @@ with the `readQFeatures()` function and the same arguments as the
 used above and below work on single `SummarizedExperiment` objects or
 assays within a `QFeatures` object.
 
-
 ### Filtering out contaminants and reverse hits
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 Using the `filterFeatures()` function, filter out the reverse and
 contaminant hits, and also retain those that have a posterior error
 probability smaller than 0.05.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r qcfilter}
 cptac <-
-    cptac %>%
-    filterFeatures(~ Reverse != "+") %>%
-    filterFeatures(~ Potential.contaminant != "+") %>%
+    cptac |>
+    filterFeatures(~ Reverse != "+") |>
+    filterFeatures(~ Potential.contaminant != "+") |>
     filterFeatures(~ PEP < 0.05)
 ```
 
-`r msmbstyle::solution_end()`
+:::
 
-### Log-transformation and normalisation
+:::
 
+### Log-transformation and normalisation
 
 The two code chunks below log-transform and normalise using the assay
 `i` as input and adding a new one names as defined by `name`.
@@ -869,15 +853,12 @@ cptac <- logTransform(cptac, i = "peptides",
                       name = "log_peptides")
 ```
 
-
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 Use the `normalize()` method to normalise the data. The syntax is the
 same as `logTransform()`. Use the `center.median` method.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r nrom}
 cptac <- normalize(cptac, i = "log_peptides",
@@ -885,18 +866,18 @@ cptac <- normalize(cptac, i = "log_peptides",
                    method = "center.median")
 ```
 
-`r msmbstyle::solution_end()`
+:::
+
+:::
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 Visualise the result of the transformations above. The
 `plotDensities()` function from the `limma` package is very
 convenient, but feel free to use boxplots, violin plots, or any other
 visualisation that you deem useful to assess the tranformations.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r plotdens, fig.cap = "Three peptide level assays: raw data, log transformed and normalised.", fig.width = 15, fig.height = 5}
 par(mfrow = c(1, 3))
@@ -904,20 +885,20 @@ limma::plotDensities(assay(cptac[["peptides"]]))
 limma::plotDensities(assay(cptac[["log_peptides"]]))
 limma::plotDensities(assay(cptac[["lognorm_peptides"]]))
 ```
-`r msmbstyle::solution_end()`
 
-### Aggregation
+:::
 
+:::
 
-`r msmbstyle::question_begin()`
+### Aggregation
+
+::: {.callout-question .icon .callout-note}
 
 Use median aggregation to aggregation peptides into protein
 values. This is not necessarily the best choice, as we will see
 later, but a good start.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r qfagg1, message = FALSE}
 cptac <-
@@ -929,7 +910,9 @@ cptac <-
                       na.rm = TRUE)
 ```
 
-`r msmbstyle::solution_end()`
+:::
+
+:::
 
 Looking at the `.n` row variable computed during the aggregation, we
 see that most proteins result from the aggregation of 5 peptides or
@@ -942,29 +925,29 @@ table(rowData(cptac[["proteins_med"]])$.n)
 ### Principal component analysis
 
 ```{r pca, message = FALSE}
-library("factoextra")
+library(factoextra)
 
 pca_pep <-
-    cptac[["lognorm_peptides"]] %>%
-    filterNA() %>%
-    assay() %>%
-    t() %>%
-    prcomp(scale = TRUE, center = TRUE) %>%
+    cptac[["lognorm_peptides"]] |>
+    filterNA() |>
+    assay() |>
+    t() |>
+    prcomp(scale = TRUE, center = TRUE) |>
     fviz_pca_ind(habillage = cptac$condition, title = "Peptides")
 
 pca_prot <-
-    cptac[["proteins_med"]] %>%
-    filterNA() %>%
-    assay() %>%
-    t() %>%
-    prcomp() %>%
+    cptac[["proteins_med"]] |>
+    filterNA() |>
+    assay() |>
+    t() |>
+    prcomp() |>
     fviz_pca_ind(habillage = cptac$condition,
                  title = "Proteins (median aggregation)")
 
 ```
 
 ```{r plotpca, fig.width = 12, fig.height = 6, fig.cap = "Peptide and protein level PCA analyses."}
-library("patchwork")
+library(patchwork)
 pca_pep + pca_prot
 ```
 
@@ -974,12 +957,11 @@ Below, we use the `longFormat()` function to extract the quantitative
 and row data in a long format, that can be directly reused by the
 tidyverse tools.
 
-
 ```{r vis, message = FALSE, warning = FALSE, fig.width = 12, fig.height = 6, fig.cap = "Peptide and protein expression profile."}
 longFormat(cptac["P02787ups|TRFE_HUMAN_UPS", ,
-                 c("lognorm_peptides", "proteins_med")]) %>%
-    as_tibble() %>%
-    mutate(condition = ifelse(grepl("A", colname), "A", "B")) %>%
+                 c("lognorm_peptides", "proteins_med")]) |>
+    as_tibble() |>
+    mutate(condition = ifelse(grepl("A", colname), "A", "B")) |>
     ggplot(aes(x = colname, y = value, colour = rowname, shape = condition)) +
     geom_point(size = 3) +
     geom_line(aes(group = rowname)) +
@@ -994,7 +976,7 @@ their relation.
 plot(cptac)
 ```
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 The example above shows a simple linear relationship between
 assays. Create a more interesting one by applying a different
@@ -1002,10 +984,7 @@ normalisation method on the *log_peptides* assay and aggreate that new
 normalised peptide assay. Visualise the relationship with `plot()`, as
 above.
 
-`r msmbstyle::question_end()`
-
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r plotqf2, message = FALSE}
 normalize(cptac, "log_peptides",
@@ -1020,8 +999,9 @@ normalize(cptac, "log_peptides",
     plot()
 ```
 
-`r msmbstyle::solution_end()`
+:::
 
+:::
 
 ### Statistical analysis
 
@@ -1029,20 +1009,20 @@ R in general and Bioconductor in particular are well suited for the
 statistical analysis of quantitative proteomics data. Several
 packages provide dedicated resources for proteomics data:
 
-- `r Biocpkg("MSstats")` and `r Biocpkg("MSstatsTMT")`: A set of tools
+- `r BiocStyle::Biocpkg("MSstats")` and `r BiocStyle::Biocpkg("MSstatsTMT")`: A set of tools
   for statistical relative protein significance analysis in Data
   dependent (DDA), SRM, Data independent acquisition (DIA) and TMT
   experiments.
 
-- `r Biocpkg("msmsTests")`: Statistical tests for label-free LC-MS/MS
+- `r BiocStyle::Biocpkg("msmsTests")`: Statistical tests for label-free LC-MS/MS
   data by spectral counts, to discover differentially expressed
   proteins between two biological conditions. Three tests are
   available: Poisson GLM regression, quasi-likelihood GLM regression,
-  and the negative binomial of the `r Biocpkg("edgeR")`
+  and the negative binomial of the `r BiocStyle::Biocpkg("edgeR")`
   package. All can be readily applied on `MSnSet` instances produced,
   for example by `MSnID`.
 
-- `r Biocpkg("DEP")` provides an integrated analysis workflow for the
+- `r BiocStyle::Biocpkg("DEP")` provides an integrated analysis workflow for the
   analysis of mass spectrometry proteomics data for differential
   protein expression or differential enrichment.
 
@@ -1068,7 +1048,7 @@ packages provide dedicated resources for proteomics data:
   detect differentially abundant proteins.
 
 Others, while not specfic to proteomics, are also recommended, such as
-the `r Biocpkg("limma")` package. When analysing spectral counting
+the `r BiocStyle::Biocpkg("limma")` package. When analysing spectral counting
 data, methods for high throughput sequencing data are
 applicable. Below, we illustrate how to apply a typical `edgeR` test
 to count data using the `msms.edgeR` function from the `msmsTests`
@@ -1078,7 +1058,6 @@ package.
 Below, we are going to perform our statistical analysis on the protein
 data using `limma`.
 
-
 ```{r protse}
 prots <- getWithColData(cptac, "proteins_med")
 ```
@@ -1098,7 +1077,7 @@ The code chunk below illustrates how to set up the model, fit it, and
 apply the empirical Bayes moderation.
 
 ```{r limma, message = FALSE}
-library("limma")
+library(limma)
 design <- model.matrix(~ prots$condition)
 fit <- lmFit(assay(prots), design)
 fit <- eBayes(fit)
@@ -1109,9 +1088,9 @@ the coefficient of interest.
 
 ```{r res}
 res <-
-    topTable(fit, coef = "prots$condition6B", number = Inf) %>%
-    rownames_to_column("protein") %>%
-    as_tibble() %>%
+    topTable(fit, coef = "prots$condition6B", number = Inf) |>
+    rownames_to_column("protein") |>
+    as_tibble() |>
     mutate(TP = grepl("ups", protein))
 ```
 
@@ -1119,7 +1098,7 @@ Note the warning about partial `NA` coefficients for 23 probes:
 
 ```{r nacoefs}
 na_coefs <-
-    filter(res, is.na(t)) %>%
+    filter(res, is.na(t)) |>
     pull(protein)
 assay(prots[na_coefs, ])
 ```
@@ -1127,7 +1106,7 @@ assay(prots[na_coefs, ])
 We can now visualise the results using a volcano plot:
 
 ```{r vp, fig.cap = "Volcano plot highlighing spiked-in proteins in red."}
-res %>%
+p <- res |>
     ggplot(aes(x = logFC, y = -log10(adj.P.Val))) +
     geom_point(aes(colour = TP)) +
     geom_vline(xintercept = c(-1, 1)) +
@@ -1135,7 +1114,6 @@ res %>%
     scale_color_manual(values = c("black","red"))
 ```
 
-
 Using the pipeline described above, we would would identify a single
 differentially expressed protein at an 5 percent FDR but miss out the
 other `r sum(res$TP) - 1` expected spike-in proteins.
@@ -1147,7 +1125,6 @@ We can assess our results in terms of true/false postitves/negatives:
 - True negatives: `r nrow(filter(res, adj.P.Val > 0.05 & !TP))`
 - False negatives: `r nrow(filter(res, adj.P.Val > 0.05 & TP))`
 
-
 ##  Summary exercice
 
 As shown below, it is possible to substantially improve these results
@@ -1156,7 +1133,6 @@ by aggregating features using a robust summarisation (available as
 M-estimation using Huber weights, as described in section 2.7 in
 [@Sticker:2019].
 
-
 ```{r echo = FALSE, message = FALSE, warning = FALSE, fig.cap = "Aggregation using robust summarisation."}
 knitr::include_graphics("./img/vp2.png")
 ```
@@ -1168,3 +1144,5 @@ knitr::include_graphics("./img/vp2.png")
 
 Repeat and adapt what we have seen here using, for example, the
 `robustSummary()` function.
+
+# References {-}
diff --git a/95-annex.Rmd b/inst/pages/95-annex.qmd
similarity index 92%
rename from 95-annex.Rmd
rename to inst/pages/95-annex.qmd
index 770988c..03b8dfa 100644
--- a/95-annex.Rmd
+++ b/inst/pages/95-annex.qmd
@@ -1,6 +1,5 @@
 # Annex {#sec-anx}
 
-
 ## Raw MS data under the hood: the `mzR` package {#sec-raw2}
 
 The `mzR` package is a direct interface to the
@@ -28,9 +27,8 @@ The three main functions of `mzR` are
 Other functions such as `instrumentInfo`, or `runInfo` can be used to
 gather general information about a run.
 
-
 ```{r rawms}
-library("mzR")
+library(mzR)
 ms <- openMSfile(f2)
 ms
 ```
@@ -46,15 +44,13 @@ head(peaks(ms, 117))
 str(peaks(ms, 1:5))
 ```
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 Let's extract the index of the MS2 spectrum with the highest base peak
 intensity and plot its spectrum. Is the data centroided or in profile
 mode?
-`r msmbstyle::question_end()`
-
 
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r ex_raw, echo=TRUE, eval=TRUE, fig.align='center'}
 hd2 <- hd[hd$msLevel == 2, ]
@@ -66,18 +62,16 @@ mz <- hd2[i, "basePeakMZ"]
 plot(pi, type = "h", xlim = c(mz - 0.5, mz + 0.5))
 ```
 
-`r msmbstyle::solution_end()`
+:::
 
+:::
 
-`r msmbstyle::question_begin()`
+::: {.callout-question .icon .callout-note}
 
 Pick an MS1 spectrum and visually check whether it is centroided or in
 profile mode.
 
-`r msmbstyle::question_end()`
-
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r ex_raw2}
 ## Zooming into spectrum 300 (an MS1 spectrum).
@@ -88,8 +82,9 @@ mz <- hd[j, "basePeakMZ"]
 plot(pj, type = "l", xlim = c(mz - 0.5, mz + 0.5))
 ```
 
-`r msmbstyle::solution_end()`
+:::
 
+:::
 
 ## PSM data under the hood {#sec-id2}
 
@@ -121,8 +116,8 @@ The main functions are `mzID` to read the data into a dedicated data
 class and `flatten` to transform it into a `data.frame`.
 
 ```{r mzid1, warning = FALSE}
-idf
-library("mzID")
+library(mzID)
+idf <- msdata::ident(full.names = TRUE)
 id <- mzID(idf)
 id
 ```
@@ -145,7 +140,7 @@ relevant data on demand. It has also accessor functions such as
 to see all available methods.
 
 ```{r idmzr}
-library("mzR")
+library(mzR)
 id2 <- openIDfile(idf)
 id2
 softwareInfo(id2)
diff --git a/99-si.Rmd b/inst/pages/99-si.qmd
similarity index 88%
rename from 99-si.Rmd
rename to inst/pages/99-si.qmd
index 4bdb47a..921a2e8 100644
--- a/99-si.Rmd
+++ b/inst/pages/99-si.qmd
@@ -1,4 +1,4 @@
-# Additional materials and session information {#sec-si}
+# Additional materials and help {#sec-si}
 
 ## Additional materials
 
@@ -52,18 +52,4 @@ For questions about specific software or their usage, please refer to
 the software's github issue page, or use the [Bioconductor support
 site](http://support.bioconductor.org/).
 
-
-## Session information
-
-The following packages have been used to generate this document.
-
-```{r include=FALSE}
-# automatically create a bib database for R packages
-knitr::write_bib(c(
-  .packages(), 'bookdown', 'knitr', 'rmarkdown', 'msmbstyle'
-), 'packages.bib')
-```
-
-```{r si}
-sessionInfo()
-```
+# References {-}
diff --git a/inst/pages/images/rstudio-logo.jpg b/inst/pages/images/rstudio-logo.jpg
new file mode 100644
index 0000000..0177f5c
Binary files /dev/null and b/inst/pages/images/rstudio-logo.jpg differ
diff --git a/inst/pages/images/text-editor-logo.jpg b/inst/pages/images/text-editor-logo.jpg
new file mode 100644
index 0000000..bc9e17b
Binary files /dev/null and b/inst/pages/images/text-editor-logo.jpg differ
diff --git a/inst/pages/images/vscode-logo.jpg b/inst/pages/images/vscode-logo.jpg
new file mode 100644
index 0000000..22c00ce
Binary files /dev/null and b/inst/pages/images/vscode-logo.jpg differ
diff --git a/docs/img/F02-3D-MS1-MS2-scans-100-1200-lattice.png b/inst/pages/img/F02-3D-MS1-MS2-scans-100-1200-lattice.png
similarity index 100%
rename from docs/img/F02-3D-MS1-MS2-scans-100-1200-lattice.png
rename to inst/pages/img/F02-3D-MS1-MS2-scans-100-1200-lattice.png
diff --git a/docs/img/F02-3D-MS1-scans-400-1200-lattice.png b/inst/pages/img/F02-3D-MS1-scans-400-1200-lattice.png
similarity index 100%
rename from docs/img/F02-3D-MS1-scans-400-1200-lattice.png
rename to inst/pages/img/F02-3D-MS1-scans-400-1200-lattice.png
diff --git a/docs/img/MS1-MS2-spectra.png b/inst/pages/img/MS1-MS2-spectra.png
similarity index 100%
rename from docs/img/MS1-MS2-spectra.png
rename to inst/pages/img/MS1-MS2-spectra.png
diff --git a/docs/img/MSGFgui.png b/inst/pages/img/MSGFgui.png
similarity index 100%
rename from docs/img/MSGFgui.png
rename to inst/pages/img/MSGFgui.png
diff --git a/docs/img/SE.png b/inst/pages/img/SE.png
similarity index 100%
rename from docs/img/SE.png
rename to inst/pages/img/SE.png
diff --git a/docs/img/SchematicMS2.png b/inst/pages/img/SchematicMS2.png
similarity index 100%
rename from docs/img/SchematicMS2.png
rename to inst/pages/img/SchematicMS2.png
diff --git a/docs/img/Silac.png b/inst/pages/img/Silac.png
similarity index 100%
rename from docs/img/Silac.png
rename to inst/pages/img/Silac.png
diff --git a/docs/img/chromatogram.png b/inst/pages/img/chromatogram.png
similarity index 100%
rename from docs/img/chromatogram.png
rename to inst/pages/img/chromatogram.png
diff --git a/docs/img/chrompeaks.png b/inst/pages/img/chrompeaks.png
similarity index 100%
rename from docs/img/chrompeaks.png
rename to inst/pages/img/chrompeaks.png
diff --git a/docs/img/cptac.png b/inst/pages/img/cptac.png
similarity index 100%
rename from docs/img/cptac.png
rename to inst/pages/img/cptac.png
diff --git a/img/features.png b/inst/pages/img/features.png
similarity index 100%
rename from img/features.png
rename to inst/pages/img/features.png
diff --git a/docs/img/frag.png b/inst/pages/img/frag.png
similarity index 100%
rename from docs/img/frag.png
rename to inst/pages/img/frag.png
diff --git a/docs/img/imp-sim.png b/inst/pages/img/imp-sim.png
similarity index 100%
rename from docs/img/imp-sim.png
rename to inst/pages/img/imp-sim.png
diff --git a/docs/img/itraq.png b/inst/pages/img/itraq.png
similarity index 100%
rename from docs/img/itraq.png
rename to inst/pages/img/itraq.png
diff --git a/img/msanim1.gif b/inst/pages/img/msanim1.gif
similarity index 100%
rename from img/msanim1.gif
rename to inst/pages/img/msanim1.gif
diff --git a/img/msanim2.gif b/inst/pages/img/msanim2.gif
similarity index 100%
rename from img/msanim2.gif
rename to inst/pages/img/msanim2.gif
diff --git a/img/msnset.png b/inst/pages/img/msnset.png
similarity index 100%
rename from img/msnset.png
rename to inst/pages/img/msnset.png
diff --git a/docs/img/mstut.gif b/inst/pages/img/mstut.gif
similarity index 100%
rename from docs/img/mstut.gif
rename to inst/pages/img/mstut.gif
diff --git a/docs/img/msvisfig.png b/inst/pages/img/msvisfig.png
similarity index 100%
rename from docs/img/msvisfig.png
rename to inst/pages/img/msvisfig.png
diff --git a/docs/img/pbase.png b/inst/pages/img/pbase.png
similarity index 100%
rename from docs/img/pbase.png
rename to inst/pages/img/pbase.png
diff --git a/docs/img/pr0c00313_0002.gif b/inst/pages/img/pr0c00313_0002.gif
similarity index 100%
rename from docs/img/pr0c00313_0002.gif
rename to inst/pages/img/pr0c00313_0002.gif
diff --git a/img/pset.jpg b/inst/pages/img/pset.jpg
similarity index 100%
rename from img/pset.jpg
rename to inst/pages/img/pset.jpg
diff --git a/docs/img/raw.png b/inst/pages/img/raw.png
similarity index 100%
rename from docs/img/raw.png
rename to inst/pages/img/raw.png
diff --git a/docs/img/vp2.png b/inst/pages/img/vp2.png
similarity index 100%
rename from docs/img/vp2.png
rename to inst/pages/img/vp2.png
diff --git a/05-intro.Rmd b/inst/pages/introduction.qmd
similarity index 98%
rename from 05-intro.Rmd
rename to inst/pages/introduction.qmd
index 42796d0..2427776 100644
--- a/05-intro.Rmd
+++ b/inst/pages/introduction.qmd
@@ -1,6 +1,5 @@
 # Introduction {#sec-msintro}
 
-
 ## How does mass spectrometry work?
 
 Mass spectrometry (MS) is a technology that *separates* charged
@@ -9,7 +8,6 @@ often coupled to chromatography (liquid LC, but can also be gas-based
 GC). The time an analyte takes to elute from the chromatography
 column is the *retention time*.
 
-
 ```{r, results='markup', fig.cap="A chromatogram, illustrating the total amount of analytes over the retention time.", echo=FALSE, purl=FALSE, out.width='100%', fig.align='center'}
 knitr::include_graphics("./img/chromatogram.png")
 ```
@@ -51,18 +49,15 @@ is of high enough quality) or using a search engine such as, for
 example Mascot, MSGF+, ..., that will match the observed, experimental
 spectrum to theoretical spectra (see details below).
 
-
 ```{r, results='markup', fig.cap="Schematics of a mass spectrometer and two rounds of MS.", echo=FALSE, purl=FALSE, out.width='100%', fig.align='center'}
 knitr::include_graphics("./img/SchematicMS2.png")
 ```
 
-
 The animation below show how 25 ions different ions (i.e. having
 different M/Z values) are separated throughout the MS analysis and are
 eventually detected (i.e. quantified). The final frame shows the
 hypothetical spectrum.
 
-
 ```{r, results='markup', fig.cap="Separation and detection of ions in a mass spectrometer.", echo=FALSE, purl=FALSE, out.width='100%', fig.align='center'}
 knitr::include_graphics("./img/mstut.gif")
 ```
@@ -94,7 +89,7 @@ knitr::include_graphics("./img/F02-3D-MS1-MS2-scans-100-1200-lattice.png")
 
 ## Accessing data
 
-### From the ProteomeXchange database {-}
+### From the ProteomeXchange database
 
 MS-based proteomics data is disseminated through the
 [ProteomeXchange](http://www.proteomexchange.org/) infrastructure,
@@ -107,11 +102,10 @@ quantitative data, as opposed as the name suggests),
 Reaction Monitoring (SRM, i.e. targeted) data and the
 [MassIVE](http://massive.ucsd.edu/ProteoSAFe/static/massive.jsp)
 resource. These data can be downloaded within R using the
-`r Biocpkg("rpx")` package.
-
+`r BiocStyle::Biocpkg("rpx")` package.
 
 ```{r rpx}
-library("rpx")
+library(rpx)
 ```
 
 Using the unique `PXD000001` identifier, we can retrieve the relevant
@@ -145,7 +139,7 @@ mzf <- pxget(px, fn)
 mzf
 ```
 
-### Data packages {-}
+### Data packages
 
 Some data are also distributed through dedicated packages. The
 `r BiocStyle::Biocexptpkg("msdata")`, for example, provides some
@@ -153,7 +147,7 @@ general raw data files relevant for both proteomics and
 metabolomics.
 
 ```{r msdatafiles, message = FALSE}
-library("msdata")
+library(msdata)
 ## proteomics raw data
 proteomics()
 ## proteomics identification data
diff --git a/10-raw.Rmd b/inst/pages/raw-ms-data.qmd
similarity index 89%
rename from 10-raw.Rmd
rename to inst/pages/raw-ms-data.qmd
index 2b35a67..469a0fe 100644
--- a/10-raw.Rmd
+++ b/inst/pages/raw-ms-data.qmd
@@ -1,16 +1,9 @@
-# Raw MS data  {#sec-raw}
-
+# Raw MS data {#sec-raw}
 
 In this section, we will learn how to read raw data in one of the
 commonly used open formats (`mzML`, `mzXML`, `netCDF` or `mgf`) into
 R.
 
-```{r rwpkgs, echo = FALSE, message = FALSE, warning = FALSE}
-## x <- RforProteomics:::msDataTab()
-## sel <- x[, 1] %in% c("Raw", "Peak lists")
-## knitr::kable(x[sel, ])
-```
-
 ## What is raw data in R
 
 When we manipulate complex data, we need a way to abstract it.
@@ -64,7 +57,7 @@ sp0
 
 #### Exercise {-}
 
-Explore the newly created object using
+Explore the newly created object using:
 
 - `spectraVariables` to extract all the metadata variables. Compare these to the
   spectra variables available from the previous example.
@@ -78,6 +71,10 @@ Let's now create a new object using the mzML data previously
 downloaded and available in the `mzf` file.
 
 ```{r, spectra2}
+library(rpx)
+fn <- "TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML"
+px <- PXDataset("PXD000001")
+mzf <- pxget(px, fn)
 mzf
 sp <- Spectra(mzf)
 sp
@@ -154,7 +151,6 @@ sp$rtime_minute <- rtime(sp) / 60
 sp$rtime_minute |> head()
 ```
 
-
 #### Exercise {-}
 
 - Extract a set of spectra variables using the accessor (for example
@@ -180,7 +176,6 @@ following questions:
 * Plot one spectrum of each level. Are they centroided or in profile
   mode?
 
-
 These objects and their manipulations are not limited to single files or
 samples. Below we load data from two mzML files. The MS data from both files in
 the `Spectra` is organized linearly (first all spectra from the first file
@@ -188,7 +183,7 @@ and then from the second). The `dataOrigin` function can be used to identify
 spectra from the different data files.
 
 ```{r, sciex_mzr}
-(fls <- dir(system.file("sciex", package = "msdata"), full.names = TRUE))
+fls <- dir(system.file("sciex", package = "msdata"), full.names = TRUE)
 sp_sciex <- Spectra(fls)
 table(dataOrigin(sp_sciex))
 ```
@@ -203,7 +198,7 @@ example backends but any object extending the base `MsBackend` class could be
 used instead. The default backends are:
 
 - `MsBackendMzR`: this backend keeps only general spectra variables in memory
-  and relies on the `r Biocpkg("mzR")` package to read mass peaks (m/z and
+  and relies on the `r BiocStyle::Biocpkg("mzR")` package to read mass peaks (m/z and
   intensity values) from the original MS files on-demand.
 
 ```{r sciex_mzr_show}
@@ -215,7 +210,6 @@ sp_sciex
   high performance but has also, depending on the number of mass peaks in each
   spectrum, a much higher memory footprint.
 
-
 ```{r sciex_dfr}
 setBackend(sp_sciex, MsBackendMemory())
 ```
@@ -263,7 +257,6 @@ backends](https://jorainer.github.io/SpectraTutorials/articles/Spectra-backends.
 for more information on different backends, their properties and
 advantages/disadvantages.
 
-
 ## Visualisation of raw MS data
 
 The importance of flexible access to specialised data becomes visible
@@ -273,7 +266,6 @@ Not only can we access specific data and understand/visualise them,
 but we can transverse all the data and extract/visualise/understand
 structured slices of data.
 
-
 The figure below shows an illustration of how mass spectrometry
 works:
 
@@ -296,16 +288,11 @@ knitr::include_graphics("./img/msvisfig.png")
 
 We are going to reproduce the figure above through a set of exercices.
 
-`r msmbstyle::question_begin()`
-
-1.  The chromatogram can be created by extracting the `totIonCurrent`
-   and `rtime` variables for all MS1 spectra. Annotate the spectrum of
-   interest.
+- The chromatogram can be created by extracting the `totIonCurrent` 
+    and `rtime` variables for all MS1 spectra.
+    Annotate the spectrum of interest.
 
-`r msmbstyle::question_end()`
-
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r}
 with(spectraData(filterMsLevel(sp, 1)),
@@ -313,33 +300,26 @@ with(spectraData(filterMsLevel(sp, 1)),
 abline(v = rtime(sp)[2807], col = "red")
 ```
 
-`r msmbstyle::solution_end()`
-
-`r msmbstyle::question_begin()`
+:::
 
-2. The `filterPrecursorScan()` function can be used to retain a set
-   parent (MS1) and children scans (MS2), as defined by an acquisition
-   number. Use it to extract the MS1 scan of interest and all its MS2
-   children.
+- The `filterPrecursorScan()` function can be used to retain a set
+    parent (MS1) and children scans (MS2), as defined by an acquisition
+    number. Use it to extract the MS1 scan of interest and all its MS2
+    children.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r}
 ms_2 <- filterPrecursorScan(sp, 2807)
 ms_2
 ```
-`r msmbstyle::solution_end()`
-
-`r msmbstyle::question_begin()`
 
-3. Plot the MS1 spectrum of interest and highlight all the peaks that
-   will be selected for MS2 analysis.
+:::
 
-`r msmbstyle::question_end()`
+- Plot the MS1 spectrum of interest and highlight all the peaks that
+    will be selected for MS2 analysis.
 
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r}
 plotSpectra(sp[2807], xlim = c(400, 1000))
@@ -347,47 +327,36 @@ abline(v = precursorMz(ms_2)[-1], col = "grey")
 abline(v = precursorMz(ms_2)[2], col = "red")
 ```
 
-`r msmbstyle::solution_end()`
+:::
 
+- Zoom in mz values 521.1 and 522.5 to reveal the isotopic envelope
+    of that peak.
 
-`r msmbstyle::question_begin()`
-
-4. Zoom in mz values 521.1 and 522.5 to reveal the isotopic envelope
-   of that peak.
-
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r}
 plotSpectra(sp[2807], xlim = c(521.2, 522.5), type = "l")
 abline(v = precursorMz(ms_2)[2], col = "red")
 ```
 
-`r msmbstyle::solution_end()`
-
+:::
 
-`r msmbstyle::question_begin()`
+- The `plotSpectra()` function is used to plot all 10 MS2 spectra in
+    one call.
 
-5. The `plotSpectra()` function is used to plot all 10 MS2 spectra in
-   one call.
+::: {.callout-answer .icon .callout-note collapse=true}
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
-
-```{r, fig.height = 12, fig.width = 8}
+```{r}
 plotSpectra(ms_2[-1])
 ```
 
-`r msmbstyle::solution_end()`
+:::
 
 It is possible to label the peaks with the `plotSpectra()`
 function. The `labels` argument is either a `character` of appropriate
 length (i.e. with a label for each peak) or, as illustrated below, a
 function that computes the labels.
 
-
 ```{r}
 mzLabel <- function(z) {
     z <- peaksData(z)[[1L]]
@@ -406,14 +375,10 @@ plotSpectra(ms_2[7],
 Spectra can also be compared either by overlay or mirror plotting
 using the `plotSpectraOverlay()` and `plotSpectraMirror()` functions.
 
-`r msmbstyle::question_begin()`
-
-Filter MS2 level spectra and find any 2 MS2 spectra that have matching
+- Filter MS2 level spectra and find any 2 MS2 spectra that have matching
 precursor peaks based on the precursor m/z values.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r}
 sp2 <- filterMsLevel(sp, 2L)
@@ -422,16 +387,12 @@ i <- which(precursorMz(sp2) == precursorMz(sp2)[37])
 sp2i <- sp2[i]
 ```
 
-`r msmbstyle::solution_end()`
-
-`r msmbstyle::question_begin()`
+:::
 
-Visualise the matching pair using the `plotSpectraOverlay()` and
+- Visualise the matching pair using the `plotSpectraOverlay()` and
 `plotSpectraMirror()` functions.
 
-`r msmbstyle::question_end()`
-
-`r msmbstyle::solution_begin()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
 ```{r}
 plotSpectraOverlay(sp2i, col = c("red", "steelblue"))
@@ -440,8 +401,8 @@ plotSpectraOverlay(sp2i, col = c("red", "steelblue"))
 ```{r}
 plotSpectraMirror(sp2i[1], sp2i[2])
 ```
-`r msmbstyle::solution_end()`
 
+:::
 
 It is also possible to explore raw data interactively with the
 [`SpectraVis`
@@ -458,15 +419,9 @@ package](https://rformassspectrometry.github.io/SpectraVis/):
   [`plotly`](https://plotly.com/r/) allowing to explore (zooming,
   panning) the spectrum interactively.
 
-
-`r msmbstyle::question_begin()`
-
-Test the `SpectraVis` function on some the `Spectra` objects produce
+- Test the `SpectraVis` function on some the `Spectra` objects produce
 above.
 
-`r msmbstyle::question_end()`
-
-
 ## Raw data processing and manipulation
 
 Apart from *classical* subsetting operations such as `[` and `split`, a set of
@@ -521,18 +476,13 @@ within a `Spectra`:
 - `reduceSpectra`: filters individual spectra keeping only the largest peak for
   groups of peaks with similar m/z values.
 
-
-`r msmbstyle::question_begin()`
-
-Using the `sp_sciex` data, select all spectra measured in the second
+- Using the `sp_sciex` data, select all spectra measured in the second
 mzML file and subsequently filter them to retain spectra measured
 between 175 and 189 seconds in the measurement run.
 
-`r msmbstyle::question_end()`
+::: {.callout-answer .icon .callout-note collapse=true}
 
-`r msmbstyle::solution_begin()`
-
-```{r filterfile-filterrt1}
+```{r}
 fls <- unique(dataOrigin(sp_sciex))
 fls
 file_2 <- filterDataOrigin(sp_sciex, dataOrigin = fls[2])
@@ -547,7 +497,7 @@ sp_sciex |>
     filterRt(c(175, 189))
 ```
 
-`r msmbstyle::solution_end()`
+:::
 
 As an example of data processing, we use below the `pickPeaks()`
 function. This function allows to convert *profile mode* MS data to *centroid
@@ -599,7 +549,6 @@ manipulation operations are also possible for read-only backends
 information about the number of such processing steps can be seen
 below (next to Lazy evaluation queue).
 
-
 ```{r}
 min(intensity(sp_sciex[1]))
 sp_sciex <- filterIntensity(sp_sciex, intensity = c(10, Inf))
@@ -621,3 +570,5 @@ More information on this lazy evaluation concept implemented in `Spectra` is
 provided in the [Spectra
 backends](https://jorainer.github.io/SpectraTutorials/articles/Spectra-backends.html)
 vignette.
+
+# References {-}
diff --git a/install_docs_deps.R b/install_docs_deps.R
deleted file mode 100644
index a22f718..0000000
--- a/install_docs_deps.R
+++ /dev/null
@@ -1,11 +0,0 @@
-if (!requireNamespace("BiocManager", quietly = TRUE))
-    install.packages("BiocManager")
-
-deps <- c("dplyr", "factoextra", "ggplot2", "gplots", "limma",
-          "magrittr", "MsCoreUtils", "msdata", "MSnID", "mzID", "mzR",
-          "patchwork", "PSMatch", "QFeatures", "rpx", "Spectra",
-          "tidyr", "tidyverse", "impute", "MSnID")
-BiocManager::install(deps, ask = FALSE, udpate = TRUE)
-
-deps2 <- c("lgatto/msmbstyle", "BiocStyle", "bookdown")
-BiocManager::install(deps2, ask = FALSE, udpate = TRUE)
diff --git a/refs.bib b/refs.bib
deleted file mode 100644
index c37168f..0000000
--- a/refs.bib
+++ /dev/null
@@ -1,432 +0,0 @@
-@Manual{R-base,
-  title = {R: A Language and Environment for Statistical Computing},
-  author = {{R Core Team}},
-  organization = {R Foundation for Statistical Computing},
-  address = {Vienna, Austria},
-  year = {2021},
-  url = {https://www.R-project.org/},
-}
-
-@Manual{R-bookdown,
-  title = {bookdown: Authoring Books and Technical Documents with R Markdown},
-  author = {Yihui Xie},
-  year = {2021},
-  note = {R package version 0.21.6},
-  url = {https://github.com/rstudio/bookdown},
-}
-
-@Manual{R-msmbstyle,
-  title = {msmbstyle: MSMB Styles for R Markdown Documents},
-  author = {Mike Smith},
-  year = {2021},
-  note = {R package version 0.0.18},
-}
-
-@Manual{R-rmarkdown,
-  title = {rmarkdown: Dynamic Documents for R},
-  author = {JJ Allaire and Yihui Xie and Jonathan McPherson and Javier Luraschi and Kevin Ushey and Aron Atkins and Hadley Wickham and Joe Cheng and Winston Chang and Richard Iannone},
-  year = {2021},
-  note = {R package version 2.7},
-  url = {https://CRAN.R-project.org/package=rmarkdown},
-}
-
-@Book{bookdown2016,
-  title = {bookdown: Authoring Books and Technical Documents with {R} Markdown},
-  author = {Yihui Xie},
-  publisher = {Chapman and Hall/CRC},
-  address = {Boca Raton, Florida},
-  year = {2016},
-  note = {ISBN 978-1138700109},
-  url = {https://github.com/rstudio/bookdown},
-}
-
-@Book{rmarkdown2018,
-  title = {R Markdown: The Definitive Guide},
-  author = {Yihui Xie and J.J. Allaire and Garrett Grolemund},
-  publisher = {Chapman and Hall/CRC},
-  address = {Boca Raton, Florida},
-  year = {2018},
-  note = {ISBN 9781138359338},
-  url = {https://bookdown.org/yihui/rmarkdown},
-}
-
-@Book{rmarkdown2020,
-  title = {R Markdown Cookbook},
-  author = {Yihui Xie and Christophe Dervieux and Emily Riederer},
-  publisher = {Chapman and Hall/CRC},
-  address = {Boca Raton, Florida},
-  year = {2020},
-  note = {ISBN 9780367563837},
-  url = {https://bookdown.org/yihui/rmarkdown-cookbook},
-}
-
-
-@ARTICLE{Gatto:2020,
-  title    = "{MSnbase}, efficient and elegant R-based processing and
-              visualisation of raw mass spectrometry data",
-  author   = "Gatto, Laurent and Gibb, Sebastian and Rainer, Johannes",
-  abstract = "We present version 2 of the MSnbase R/Bioconductor package.
-              MSnbase provides infrastructure for the manipulation, processing
-              and visualisation of mass spectrometry data. We focus on the new
-              on-disk infrastructure, that allows the handling of large raw
-              mass spectrometry experiments on commodity hardware and
-              illustrate how the package is used for elegant data processing,
-              method development and visualisation.",
-  journal  = "J. Proteome Res.",
-  month    =  sep,
-  year     =  2020,
-  language = "en"
-}
-
-@Article{MAE,
-    title = {Software For The Integration Of Multi-Omics Experiments
-                  In Bioconductor},
-    author = {Marcel Ramos and Lucas Schiffer and Angela Re and Rimsha
-                  Azhar and Azfar Basunia and Carmen Rodriguez Cabrera
-                  and Tiffany Chan and Philip Chapman and Sean Davis
-                  and David Gomez-Cabrero and Aedin C. Culhane and
-                  Benjamin Haibe-Kains and Kasper Hansen and Hanish
-                  Kodali and Marie Stephie Louis and Arvind Singh Mer
-                  and Markus Reister and Martin Morgan and Vincent
-                  Carey and Levi Waldron},
-    journal = {Cancer Research},
-    year = {2017},
-    volume = {77(21); e39-42},
-  }
-
-
-@Manual{SE,
-    title = {SummarizedExperiment: SummarizedExperiment container},
-    author = {Martin Morgan and Valerie Obenchain and Jim Hester and Hervé Pagès},
-    year = {2020},
-    note = {R package version 1.21.0},
-    url = {https://bioconductor.org/packages/SummarizedExperiment},
-  }
-
-@Article{Christoforou:2016,
-  author =       {Christoforou, Andy and Mulvey, Claire M and
-                  Breckels, Lisa M and Geladaki, Aikaterini and
-                  Hurrell, Tracey and Hayward, Penelope C and Naake,
-                  Thomas and Gatto, Laurent and Viner, Rosa and
-                  Martinez Arias, Alfonso and Lilley, Kathryn S},
-  title =        {A draft map of the mouse pluripotent stem cell
-                 spatial proteome.},
-  journal =      {Nat Commun},
-  year =         {2016},
-  month =        {},
-  number =       {},
-  volume =       {7},
-  pages =        {8992},
-  doi =          {10.1038/ncomms9992},
-  PMID =         {26754106}}
-
-@article{Sticker:2019,
-        author = {Sticker, Adriaan and Goeminne, Ludger and Martens, Lennart and Clement, Lieven},
-        title = {Robust summarization and inference in proteome-wide label-free quantification},
-        elocation-id = {668863},
-        year = {2019},
-        doi = {10.1101/668863},
-        publisher = {Cold Spring Harbor Laboratory},
-        abstract = {Label-Free Quantitative mass spectrometry based
-                  workflows for differential expression (DE) analysis
-                  of proteins impose important challenges on the data
-                  analysis due to peptide-specific effects and context
-                  dependent missingness of peptide
-                  intensities. Peptide-based workflows, like MSqRob,
-                  test for DE directly from peptide intensities and
-                  outper-form summarization methods which first
-                  aggregate MS1 peptide intensities to protein
-                  intensities before DE analysis. However, these
-                  methods are computationally expensive, often hard to
-                  understand for the non-specialised end-user, and do
-                  not provide protein summaries, which are important
-                  for visualisation or downstream processing. In this
-                  work, we therefore evaluate state-of-the-art
-                  summarization strategies using a benchmark spike-in
-                  dataset and discuss why and when these fail compared
-                  to the state-of-the-art peptide based model,
-                  MSqRob. Based on this evaluation, we propose a novel
-                  summarization strategy, MSqRob-Sum, which estimates
-                  MSqRob{\textquoteright}s model parameters in a
-                  two-stage procedure circumventing the drawbacks of
-                  peptide-based workflows. MSqRobSum maintains
-                  MSqRob{\textquoteright}s superior performance, while
-                  providing useful protein expression summaries for
-                  plotting and downstream analysis. Summarising
-                  peptide to protein intensities considerably reduces
-                  the computational complexity, the memory footprint
-                  and the model complexity, and makes it easier to
-                  disseminate DE inferred on protein
-                  summaries. Moreover, MSqRobSum provides a highly
-                  modular analysis framework, which provides
-                  researchers with full flexibility to develop data
-                  analysis workflows tailored towards their specific
-                  applications.},
-        URL = {https://www.biorxiv.org/content/early/2019/06/13/668863},
-        eprint = {https://www.biorxiv.org/content/early/2019/06/13/668863.full.pdf},
-        journal = {bioRxiv}
-}
-
-@ARTICLE{Paulovich:2010,
-  title    = "Interlaboratory study characterizing a yeast performance standard
-              for benchmarking {LC-MS} platform performance",
-  author   = "Paulovich, Amanda G and Billheimer, Dean and Ham, Amy-Joan L and
-              Vega-Montoto, Lorenzo and Rudnick, Paul A and Tabb, David L and
-              Wang, Pei and Blackman, Ronald K and Bunk, David M and Cardasis,
-              Helene L and Clauser, Karl R and Kinsinger, Christopher R and
-              Schilling, Birgit and Tegeler, Tony J and Variyath, Asokan
-              Mulayath and Wang, Mu and Whiteaker, Jeffrey R and Zimmerman,
-              Lisa J and Fenyo, David and Carr, Steven A and Fisher, Susan J
-              and Gibson, Bradford W and Mesri, Mehdi and Neubert, Thomas A and
-              Regnier, Fred E and Rodriguez, Henry and Spiegelman, Cliff and
-              Stein, Stephen E and Tempst, Paul and Liebler, Daniel C",
-  abstract = "Optimal performance of LC-MS/MS platforms is critical to
-              generating high quality proteomics data. Although individual
-              laboratories have developed quality control samples, there is no
-              widely available performance standard of biological complexity
-              (and associated reference data sets) for benchmarking of platform
-              performance for analysis of complex biological proteomes across
-              different laboratories in the community. Individual preparations
-              of the yeast Saccharomyces cerevisiae proteome have been used
-              extensively by laboratories in the proteomics community to
-              characterize LC-MS platform performance. The yeast proteome is
-              uniquely attractive as a performance standard because it is the
-              most extensively characterized complex biological proteome and
-              the only one associated with several large scale studies
-              estimating the abundance of all detectable proteins. In this
-              study, we describe a standard operating protocol for large scale
-              production of the yeast performance standard and offer aliquots
-              to the community through the National Institute of Standards and
-              Technology where the yeast proteome is under development as a
-              certified reference material to meet the long term needs of the
-              community. Using a series of metrics that characterize LC-MS
-              performance, we provide a reference data set demonstrating
-              typical performance of commonly used ion trap instrument
-              platforms in expert laboratories; the results provide a basis for
-              laboratories to benchmark their own performance, to improve upon
-              current methods, and to evaluate new technologies. Additionally,
-              we demonstrate how the yeast reference, spiked with human
-              proteins, can be used to benchmark the power of proteomics
-              platforms for detection of differentially expressed proteins at
-              different levels of concentration in a complex matrix, thereby
-              providing a metric to evaluate and minimize pre-analytical and
-              analytical variation in comparative proteomics experiments.",
-  journal  = "Mol. Cell. Proteomics",
-  volume   =  9,
-  number   =  2,
-  pages    = "242--254",
-  month    =  feb,
-  year     =  2010,
-  language = "en"
-}
-
-@Article{Lazar:2016,
-  author = {Lazar, C and Gatto, L and Ferro, M and Bruley, C
-                 and Burger, T},
-  title = {Accounting for the Multiple Natures of Missing
-                 Values in Label-Free Quantitative Proteomics Data
-                 Sets to Compare Imputation Strategies.},
-  journal = {J Proteome Res},
-  year = {2016},
-  month = {Apr},
-  number = {4},
-  volume = {15},
-  pages = {1116-25},
-  doi = {10.1021/acs.jproteome.5b00981},
-  PMID = {26906401}
-}
-
-@Article{Cox:2008,
-  author =       {Cox, J and Mann, M},
-  title =        {MaxQuant enables high peptide identification
-                 rates, individualized p.p.b.-range mass accuracies
-                 and proteome-wide protein quantification.},
-  journal =      {Nat Biotechnol},
-  year =         {2008},
-  month =        {Dec},
-  number =       {12},
-  volume =       {26},
-  pages =        {1367-72},
-  doi =          {10.1038/nbt.1511},
-  PMID =         {19029910}}
-
-@article{Morgenstern:2020,
-   author = {Morgenstern, David and Barzilay, Rotem and Levin, Yishai},
-   title = {{RawBeans}: A Simple, Vendor-Independent, Raw-Data Quality-Control Tool},
-   journal = {Journal of Proteome Research},
-   year = {2021},
-   doi = {10.1021/acs.jproteome.0c00956},
-   note ={PMID: 33657803},
-   URL = {https://doi.org/10.1021/acs.jproteome.0c00956},
-   eprint = {https://doi.org/10.1021/acs.jproteome.0c00956}
-}
-
-
-@ARTICLE{Vanderaa:2021,
-  title    = "Replication of single-cell proteomics data reveals important
-              computational challenges",
-  author   = "Vanderaa, Christophe and Gatto, Laurent",
-  abstract = "INTRODUCTION: Mass spectrometry-based proteomics is actively
-              embracing quantitative, single-cell level analyses. Indeed,
-              recent advances in sample preparation and mass spectrometry (MS)
-              have enabled the emergence of quantitative MS-based single-cell
-              proteomics (SCP). While exciting and promising, SCP still has
-              many rough edges. The current analysis workflows are custom and
-              built from scratch. The field is therefore craving for
-              standardized software that promotes principled and reproducible
-              SCP data analyses. AREAS COVERED: This special report is the
-              first step toward the formalization and standardization of SCP
-              data analysis. scp, the software that accompanies this work,
-              successfully replicates one of the landmark SCP studies and is
-              applicable to other experiments and designs. We created a
-              repository containing the replicated workflow with comprehensive
-              documentation in order to favor further dissemination and
-              improvements of SCP data analyses. EXPERT OPINION: Replicating
-              SCP data analyses uncovers important challenges in SCP data
-              analysis. We describe two such challenges in detail: batch
-              correction and data missingness. We provide the current
-              state-of-the-art and illustrate the associated limitations. We
-              also highlight the intimate dependence that exists between batch
-              effects and data missingness and offer avenues for dealing with
-              these exciting challenges.",
-  journal  = "Expert Rev. Proteomics",
-  month    =  oct,
-  year     =  2021,
-  keywords = "Bioconductor; R; batch correction; imputation; mass spectrometry;
-              proteomics; replication; reproducible research; single-cell;
-              software",
-  language = "en"
-}
-
-
-@ARTICLE{Rainer:2022,
-  title     = "A Modular and Expandable Ecosystem for Metabolomics Data
-               Annotation in {R}",
-  author    = "Rainer, Johannes and Vicini, Andrea and Salzer, Liesa and
-               Stanstrup, Jan and Badia, Josep M and Neumann, Steffen and
-               Stravs, Michael A and Verri Hernandes, Vinicius and Gatto,
-               Laurent and Gibb, Sebastian and Witting, Michael",
-  abstract  = "Liquid chromatography-mass spectrometry (LC-MS)-based untargeted
-               metabolomics experiments have become increasingly popular
-               because of the wide range of metabolites that can be analyzed
-               and the possibility to measure novel compounds. LC-MS
-               instrumentation and analysis conditions can differ substantially
-               among laboratories and experiments, thus resulting in
-               non-standardized datasets demanding customized annotation
-               workflows. We present an ecosystem of R packages, centered
-               around the MetaboCoreUtils, MetaboAnnotation and CompoundDb
-               packages that together provide a modular infrastructure for the
-               annotation of untargeted metabolomics data. Initial annotation
-               can be performed based on MS1 properties such as m/z and
-               retention times, followed by an MS2-based annotation in which
-               experimental fragment spectra are compared against a reference
-               library. Such reference databases can be created and managed
-               with the CompoundDb package. The ecosystem supports data from a
-               variety of formats, including, but not limited to, MSP, MGF,
-               mzML, mzXML, netCDF as well as MassBank text files and SQL
-               databases. Through its highly customizable functionality, the
-               presented infrastructure allows to build reproducible annotation
-               workflows tailored for and adapted to most untargeted
-               LC-MS-based datasets. All core functionality, which supports
-               base R data types, is exported, also facilitating its re-use in
-               other R packages. Finally, all packages are thoroughly
-               unit-tested and documented and are available on GitHub and
-               through Bioconductor.",
-  journal   = "Metabolites",
-  publisher = "Multidisciplinary Digital Publishing Institute",
-  volume    =  12,
-  number    =  2,
-  pages     = "173",
-  month     =  feb,
-  year      =  2022,
-  language  = "en"
-}
-
-@article{Sinha:2020,
-    author = {Sinha, Ankit and Mann, Matthias},
-    title = "{A beginner’s guide to mass spectrometry–based proteomics}",
-    journal = {The Biochemist},
-    year = {2020},
-    month = {09},
-    abstract = "{Mass spectrometry (MS)-based proteomics is the most
-                  comprehensive approach for the quantitative
-                  profiling of proteins, their interactions and
-                  modifications. It is a challenging topic as a firm
-                  grasp requires expertise in biochemistry for sample
-                  preparation, analytical chemistry for
-                  instrumentation and computational biology for data
-                  analysis. In this short guide, we highlight the
-                  various components of a mass spectrometer, the
-                  sample preparation process for conversion of
-                  proteins into peptides, and quantification and
-                  analysis strategies. The advancing technology of
-                  MS-based proteomics now opens up opportunities in
-                  clinical applications and single-cell analysis.}",
-    issn = {0954-982X},
-    doi = {10.1042/BIO20200057},
-    url = {https://doi.org/10.1042/BIO20200057},
-    note = {BIO20200057},
-    eprint = {https://portlandpress.com/biochemist/article-pdf/doi/10.1042/BIO20200057/892770/bio20200057.pdf},
-}
-
-@Article{Steen:2004,
-  title    = "The {ABC's} (and {XYZ's}) of peptide sequencing",
-  author   = "Steen, Hanno and Mann, Matthias",
-  abstract = "Proteomics is an increasingly powerful and indispensable
-              technology in molecular cell biology. It can be used to identify
-              the components of small protein complexes and large organelles,
-              to determine post-translational modifications and in
-              sophisticated functional screens. The key - but little understood
-              - technology in mass-spectrometry-based proteomics is peptide
-              sequencing, which we describe and review here in an easily
-              accessible format.",
-  journal  = "Nat. Rev. Mol. Cell Biol.",
-  volume   =  5,
-  number   =  9,
-  pages    = "699--711",
-  month    =  sep,
-  year     =  2004,
-  language = "en"
-}
-
-@ARTICLE{Marcotte:2007,
-  title    = "How do shotgun proteomics algorithms identify proteins?",
-  author   = "Marcotte, Edward M",
-  journal  = "Nat. Biotechnol.",
-  volume   =  25,
-  number   =  7,
-  pages    = "755--757",
-  month    =  jul,
-  year     =  2007,
-  language = "en"
-}
-
-
-@ARTICLE{Shuken:2023,
-  title    = "An Introduction to Mass {Spectrometry-Based} Proteomics",
-  author   = "Shuken, Steven R",
-  abstract = "Mass spectrometry is unmatched in its versatility for studying
-              practically any aspect of the proteome. Because the foundations
-              of mass spectrometry-based proteomics are complex and span
-              multiple scientific fields, proteomics can be perceived as having
-              a high barrier to entry. This tutorial is intended to be an
-              accessible illustrated guide to the technical details of a
-              relatively simple quantitative proteomic experiment. An attempt
-              is made to explain the relevant concepts to those with limited
-              knowledge of mass spectrometry and a basic understanding of
-              proteins. An experimental overview is provided, from the
-              beginning of sample preparation to the analysis of protein group
-              quantities, with explanations of how the data are acquired,
-              processed, and analyzed. A selection of advanced topics is
-              briefly surveyed and works for further reading are cited. To
-              conclude, a brief discussion of the future of proteomics is
-              given, considering next-generation protein sequencing
-              technologies that may complement mass spectrometry to create a
-              fruitful future for proteomics.",
-  journal  = "J. Proteome Res.",
-  month    =  jun,
-  year     =  2023,
-  keywords = "bottom-up; data-dependent acquisition; label-free quantification;
-              mass spectrometry; proteomics; untargeted proteomics",
-  language = "en"
-}
diff --git a/skeleton.bib b/skeleton.bib
deleted file mode 100644
index 77e82d8..0000000
--- a/skeleton.bib
+++ /dev/null
@@ -1,62 +0,0 @@
-@Manual{R-base,
-  title = {R: A Language and Environment for Statistical Computing},
-  author = {{R Core Team}},
-  organization = {R Foundation for Statistical Computing},
-  address = {Vienna, Austria},
-  year = {2023},
-  url = {https://www.R-project.org/},
-}
-
-@Manual{R-bookdown,
-  title = {bookdown: Authoring Books and Technical Documents with R Markdown},
-  author = {Yihui Xie},
-  note = {R package version 0.34.2, https://pkgs.rstudio.com/bookdown/},
-  url = {https://github.com/rstudio/bookdown},
-  year = {2023},
-}
-
-@Manual{R-msmbstyle,
-  title = {msmbstyle: MSMB Styles for R Markdown Documents},
-  author = {Mike Smith},
-  year = {2023},
-  note = {R package version 0.0.19},
-}
-
-@Manual{R-rmarkdown,
-  title = {rmarkdown: Dynamic Documents for R},
-  author = {JJ Allaire and Yihui Xie and Christophe Dervieux and Jonathan McPherson and Javier Luraschi and Kevin Ushey and Aron Atkins and Hadley Wickham and Joe Cheng and Winston Chang and Richard Iannone},
-  year = {2023},
-  note = {R package version 2.24},
-  url = {https://CRAN.R-project.org/package=rmarkdown},
-}
-
-@Book{bookdown2016,
-  title = {bookdown: Authoring Books and Technical Documents with {R} Markdown},
-  author = {Yihui Xie},
-  publisher = {Chapman and Hall/CRC},
-  address = {Boca Raton, Florida},
-  year = {2016},
-  isbn = {978-1138700109},
-  url = {https://bookdown.org/yihui/bookdown},
-}
-
-@Book{rmarkdown2018,
-  title = {R Markdown: The Definitive Guide},
-  author = {Yihui Xie and J.J. Allaire and Garrett Grolemund},
-  publisher = {Chapman and Hall/CRC},
-  address = {Boca Raton, Florida},
-  year = {2018},
-  isbn = {9781138359338},
-  url = {https://bookdown.org/yihui/rmarkdown},
-}
-
-@Book{rmarkdown2020,
-  title = {R Markdown Cookbook},
-  author = {Yihui Xie and Christophe Dervieux and Emily Riederer},
-  publisher = {Chapman and Hall/CRC},
-  address = {Boca Raton, Florida},
-  year = {2020},
-  isbn = {9780367563837},
-  url = {https://bookdown.org/yihui/rmarkdown-cookbook},
-}
-
diff --git a/style.css b/style.css
deleted file mode 100644
index 4c51529..0000000
--- a/style.css
+++ /dev/null
@@ -1,5 +0,0 @@
-/* original background colour is #1881c2 */
-
-:root {
-  --main-bg-color: #115a88;
-}
diff --git a/vignettes/Makefile b/vignettes/Makefile
new file mode 100644
index 0000000..e9d4040
--- /dev/null
+++ b/vignettes/Makefile
@@ -0,0 +1,5 @@
+all: render
+
+render: 
+	quarto render ../inst/
+	mkdir -p ../inst/doc && mv ../inst/docs ../inst/doc/book
diff --git a/vignettes/stub.Rmd b/vignettes/stub.Rmd
new file mode 100644
index 0000000..aaa1b3c
--- /dev/null
+++ b/vignettes/stub.Rmd
@@ -0,0 +1,19 @@
+---
+vignette: >
+  %\VignetteIndexEntry{Link to book}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---    
+
+```{r, echo=FALSE}
+# This is adapted from Aaron Lun's approach in OSCA.* books
+link <- BiocStyle::Biocbook(
+  read.dcf('../DESCRIPTION')[1], 
+  label="link"
+)
+URL <- sub(".*\\((.+))", "\\1", link)
+```
+
+<meta charset="utf-8">
+<meta http-equiv="refresh" content="`r sprintf("0; URL=%s", URL)`">
+<link rel="canonical" href="`r URL`">