Skip to content
This repository has been archived by the owner on Nov 10, 2021. It is now read-only.

Commit

Permalink
Merge pull request #25 from AberystwythSystemsBiology/normalisation
Browse files Browse the repository at this point in the history
New Release
  • Loading branch information
KeironO authored Sep 3, 2020
2 parents ee08170 + fd4ca48 commit 057149e
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 80 deletions.
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
language: python
python:
- "3.5"
- "3.7"
install:
- pip install .
Expand Down
114 changes: 85 additions & 29 deletions dimepy/spectrum.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,7 @@
# -*- coding: utf-8 -*-
# encoding: utf-8

"""
The class :py:class:`Spectrum` has been designed to load data
from a mzML file and to represetn the data as a python object.
.. note::
This class is still being actively developed and will likely change over time.
"""
import numpy as np
from typing import Tuple, List
from scipy.stats import binned_statistic
from pymzml.run import Reader as pymzmlReader
from .scan import Scan
from .utils import terms, bin_masses_and_intensities
import itertools

# Copyright (c) 2017-2019 Keiron O'Shea
# Copyright (c) 2017-2020 Keiron O'Shea
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
Expand All @@ -33,6 +18,26 @@
# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301 USA


import numpy as np
import logging
from typing import Tuple, List
from scipy.stats import binned_statistic
from pymzml.run import Reader as pymzmlReader
from .scan import Scan
from .utils import terms, bin_masses_and_intensities
import itertools
import os
import matplotlib.pyplot as plt

"""
The class :py:class:`Spectrum` has been designed to load data
from a mzML file.
.. note::
This class is still being actively developed and will likely change over time.
"""

class Spectrum(object):

""" Initialise Spectrum object for a given mzML file.
Expand All @@ -41,11 +46,12 @@ class Spectrum(object):

def __init__(self,
filepath: str,
identifier: str,
identifier: str = None,
injection_order: int = None,
stratification: str = None,
snr_estimator: str = False,
peak_type: str = "raw",
is_qc: bool= False,
MS1_precision: float = 5e-6,
MSn_precision: float = 20e-6
):
Expand All @@ -57,7 +63,8 @@ def __init__(self,
Arguments:
filepath (str): Path to the mzML file to parse.
identifier (str): Unique identifier for the Spectrum object.
identifier (str): Unique identifier for the Spectrum object. If none is given,
it will default to the filepath.
injection_order (int): The injection number of the Spectrum object.
Expand All @@ -70,24 +77,31 @@ def __init__(self,
* 'mean'
* 'mad'
peak_type (raw): What peak type to load in.
peak_type (str): What peak type to load in.
Currently supported peak types are:
* raw (default)
* centroided
* reprofiled
MS1_precision (float): Measured precision for the MS level 1.
is_qc (bool): Whether the Sample is QC.
MSn_precision (float): Measured precision for the MS level n.
MS1_precision (float): Measured precision for the MS level 1.
MSn_precision (float): Measured precision for the MS level n.
"""
self.filepath = filepath
self.identifier = identifier

if identifier == None:
self.identifier = os.path.basename(filepath)

self.injection_order = injection_order
self.stratification = stratification
self.snr_estimator = snr_estimator
self.peak_type = peak_type
self.is_qc = is_qc
self.MS1_precision = MS1_precision
self.MSn_precision = MSn_precision

Expand All @@ -99,6 +113,8 @@ def __init__(self,

def _base_load(self) -> Tuple[np.array, np.array]:

logging.info("Loading %s" % (self.filepath))

extraAccessions = [
[[y, ["value"]] for y in terms[x]] for x in terms.keys()
]
Expand All @@ -122,7 +138,7 @@ def _base_load(self) -> Tuple[np.array, np.array]:

return np.array(scans), np.array(to_use)

def limit_polarity(self, polarity: str) -> None:
def limit_polarity(self, polarity: str, verbose: bool = False) -> None:
"""
Limit the Scans found within the mzML file to whatever polarity is given.
This should only be called where fast-polarity switching is used.
Expand All @@ -134,21 +150,30 @@ def limit_polarity(self, polarity: str) -> None:
* 'positive'
* 'negative'
verbose (bool): enable verbose output.
"""

if polarity.upper() not in ["POSITIVE", "NEGATIVE"]:
raise AttributeError("%s not a valid option" % (polarity))

def _determine_polarity(scan) -> str:
scan_polarity = None
for polarity_acc in terms["polarity"]:
if scan.get(polarity_acc) != None:
scan_polarity = terms["polarity"][polarity_acc]

return scan_polarity

for index, scan in enumerate(self._scans):
if _determine_polarity(scan) != polarity.upper():
self._to_use[index] = False
logging.info("Scan %i is not %s polarity" % (index, polarity))

if verbose and self._to_use[index]:
print("Scan %i is %s polarity" % (index, polarity))


def limit_infusion(self, threshold: int = 3) -> None:
def limit_infusion(self, threshold: int = 3, plot = False) -> None:
"""
This method is a slight extension of Manfred Beckmann's (meb@aber.ac.uk)
LCT/Q-ToF scan retrieval method in FIEMSpro in which we use the median absolute
Expand All @@ -161,7 +186,7 @@ def limit_infusion(self, threshold: int = 3) -> None:
/ \
/ \_
____/ \_________________
0 0.5 1 1.5 2 [min]
0 0.5 1 1.5 n [scan number]
|--------| Apex
We are only interested in the scans in which the infusion takes place
Expand All @@ -172,7 +197,9 @@ def limit_infusion(self, threshold: int = 3) -> None:
mad_multiplier (int): The multiplier for the median absolute
deviation method to take the infusion profile from.
plot (bool): Plot the results.
"""

def _calculate_mad(tics: np.array) -> float:
Expand All @@ -190,11 +217,34 @@ def _get_mask(tics: np.array, mad: float) -> np.array:

return modified_z_score >= threshold

def _plot(tics: np.array, mad: float, ini_scan: int, end_scan: int):
plt.figure()
plt.title("Apex Selection Plot \n%s" % (self.identifier))
plt.plot(tics)
plt.xlim(0, len(tics))
plt.ylim(0, max(tics * 1.1))
plt.axvline(ini_scan, ls="--", c="red")
plt.axvline(end_scan, ls="--", c="red")
plt.xlabel("Scan Number")
plt.ylabel("Total Ion Current (TIC)")
plt.tight_layout()
if type(plot) == bool:
plt.show()
else:
plt.savefig(plot)

tics = np.array([scan.TIC for scan in self._scans[self._to_use]])
mad = _calculate_mad(tics)

apex_index = _get_mask(tics, mad)

logging.info("Mean Absolute Deviation Calculated as %f TIC" % (mad))

ini_scan = np.min(np.where(apex_index == True)[0])
end_scan = np.max(np.where(apex_index == True)[0])

if plot:
_plot(tics, mad, ini_scan, end_scan)

to_use = np.where(self._to_use == True)[0]

for i, j in enumerate(to_use):
Expand Down Expand Up @@ -232,8 +282,10 @@ def _load_masses_and_ints_from_scans(self) -> None:
intensities = []

for scan in self.read_scans:
masses.extend(scan.masses)
intensities.extend(scan.intensities)
for m, i in zip(scan.masses, scan.intensities):
if i > 0.0:
masses.append(m)
intensities.append(i)

masses = np.array(masses)
intensities = np.array(intensities)
Expand Down Expand Up @@ -387,6 +439,10 @@ def intensities(self) -> np.array:
return self._intensities
else:
raise ValueError("No intensities generated, run load_scans() first!")

@property
def TIC(self) -> float:
return sum(self.intensities)

@property
def to_use(self) -> List[bool]:
Expand Down
Loading

0 comments on commit 057149e

Please sign in to comment.