Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Api refine #25

Merged
merged 3 commits into from
Dec 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 18 additions & 35 deletions alpharaw/ms_data_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@ class MSData_Base:
The base data structure for MS Data, other MSData loader inherit
"""

column_dtypes = {
"rt": np.float64,
"ms_level": np.int8,
"precursor_mz": np.float64,
"isolation_lower_mz": np.float64,
"isolation_upper_mz": np.float64,
"precursor_charge": np.int8,
"nce": np.float32,
"injection_time": np.float32,
"activation": "U",
}

spectrum_df: pd.DataFrame
"""
Spectrum dataframe containing the following columns:
Expand Down Expand Up @@ -152,41 +164,12 @@ def _set_dataframes(self, raw_data:dict):
raw_data['peak_indices'][:-1],
raw_data['peak_indices'][1:],
)
self.spectrum_df["rt"] = raw_data['rt']
self.spectrum_df["ms_level"] = np.array(
raw_data['ms_level'], dtype=np.int8
)
self.spectrum_df["precursor_mz"] = np.array(
raw_data['precursor_mz'], dtype=np.float64
)

self.spectrum_df["charge"] = np.array(
raw_data['precursor_charge'],
dtype=np.int8
)
self.spectrum_df["isolation_lower_mz"] = np.array(
raw_data['isolation_lower_mz'], dtype=np.float64
)
self.spectrum_df["isolation_upper_mz"] = np.array(
raw_data['isolation_upper_mz'], dtype=np.float64
)
if "nce" in raw_data:
self.spectrum_df["nce"] = np.array(
raw_data["nce"],
dtype=np.float32,
)
if "fragmentation" in raw_data:
self.spectrum_df["fragmentation"] = np.array(
raw_data["fragmentation"]
)
if "detector" in raw_data:
self.spectrum_df["detector"] = np.array(
raw_data["detector"]
)
if "injection_time" in raw_data:
self.spectrum_df["injection_time"] = np.array(
raw_data["injection_time"]
)

for col, val in raw_data.items():
if col in self.column_dtypes:
self.spectrum_df[col] = np.array(
val, dtype=self.column_dtypes[col]
)

def _read_creation_time(self, raw_data):
pass
Expand Down
148 changes: 97 additions & 51 deletions alpharaw/raw_access/pythermorawfilereader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import time
import warnings

from collections import defaultdict

# require pythonnet, pip install pythonnet on Windows
import clr
clr.AddReference('System')
Expand Down Expand Up @@ -76,47 +78,54 @@ class RawFileReader(object):
'PDA': 3,
'UV': 4}

massAnalyzerType = {'ITMS': 0,
'TQMS': 1,
'SQMS': 2,
'TOFMS': 3,
'FTMS': 4,
'Sector': 5,
0: 'ITMS',
1: 'TQMS',
2: 'SQMS',
3: 'TOFMS',
4: 'FTMS',
5: 'Sector'}
activationType = {'CID': 0,
'MPD': 1,
'ECD': 2,
'PQD': 3,
'ETD': 4,
'HCD': 5,
'Any activation type': 6,
'SA': 7,
'PTR': 8,
'NETD': 9,
'NPTR': 10,
'UVPD': 11,
'ETHCD': 12, # not Thermo's build-in activation types
'ETCID': 13, # not Thermo's build-in activation types
0: 'CID',
1: 'MPD',
2: 'ECD',
3: 'PQD',
4: 'ETD',
5: 'HCD',
6: 'Any activation type',
7: 'SA',
8: 'PTR',
9: 'NETD',
10: 'NPTR',
11: 'UVPD',
12: 'ETHCD', # not Thermo's build-in activation types
13: 'ETCID', # not Thermo's build-in activation types
}
massAnalyzerType = defaultdict(lambda: '?',
{
'ITMS': 0,
'TQMS': 1,
'SQMS': 2,
'TOFMS': 3,
'FTMS': 4,
'Sector': 5,
'Astral': 7,
0: 'ITMS',
1: 'TQMS',
2: 'SQMS',
3: 'TOFMS',
4: 'FTMS',
5: 'Sector',
7: 'Astral',
})
activationType = defaultdict(lambda: '?',
{
'CID': 0,
'MPD': 1,
'ECD': 2,
'PQD': 3,
'ETD': 4,
'HCD': 5,
'Any activation type': 6,
'SA': 7,
'PTR': 8,
'NETD': 9,
'NPTR': 10,
'UVPD': 11,
'ETHCD': 1001, # not Thermo's build-in activation types
'ETCID': 1002, # not Thermo's build-in activation types
0: 'CID',
1: 'MPD',
2: 'ECD',
3: 'PQD',
4: 'ETD',
5: 'HCD',
6: 'Any activation type',
7: 'SA',
8: 'PTR',
9: 'NETD',
10: 'NPTR',
11: 'UVPD',
1001: 'ETHCD', # not Thermo's build-in activation types
1002: 'ETCID', # not Thermo's build-in activation types
})

detectorType = {'Valid': 0,
'Any': 1,
Expand Down Expand Up @@ -283,34 +292,46 @@ def GetStatusLogForScanNum(self, scan):

def GetNumberOfMassRangesFromScanNum(self, scanNumber):
"""This function gets the number of MassRange data items in the scan."""
return IScanEventBase(self.source.GetScanEventForScanNumber(scanNumber)).MassRangeCount
return IScanEventBase(
self.source.GetScanEventForScanNumber(scanNumber)
).MassRangeCount

def GetMassRangeFromScanNum(self, scanNumber, massRangeIndex):
"""This function retrieves information about the mass range data of a scan (high and low
masses). You can find the count of mass ranges for the scan by calling
GetNumberOfMassRangesFromScanNum()."""
range = IScanEventBase(self.source.GetScanEventForScanNumber(scanNumber)).GetMassRange(massRangeIndex)
range = IScanEventBase(
self.source.GetScanEventForScanNumber(scanNumber)
).GetMassRange(massRangeIndex)
return range.Low, range.High

def GetNumberOfSourceFragmentsFromScanNum(self, scanNumber):
"""This function gets the number of source fragments (or compensation voltages) in the scan."""
return IScanEventBase(self.source.GetScanEventForScanNumber(scanNumber)).SourceFragmentationInfoCount
return IScanEventBase(
self.source.GetScanEventForScanNumber(scanNumber)
).SourceFragmentationInfoCount

def GetSourceFragmentValueFromScanNum(self, scanNumber, sourceFragmentIndex):
"""This function retrieves information about one of the source fragment values of a scan. It is
also the same value as the compensation voltage. You can find the count of source fragments
for the scan by calling GetNumberOfSourceFragmentsFromScanNum ()."""
return IScanEventBase(self.source.GetScanEventForScanNumber(scanNumber)).GetSourceFragmentationInfo(sourceFragmentIndex)
return IScanEventBase(
self.source.GetScanEventForScanNumber(scanNumber)
).GetSourceFragmentationInfo(sourceFragmentIndex)

def GetIsolationWidthForScanNum(self, scanNumber, MSOrder = 0):
"""This function returns the isolation width for the scan specified by scanNumber and the
transition specified by MSOrder (0 for MS1?) from the scan event structure in the raw file."""
return IScanEventBase(self.source.GetScanEventForScanNumber(scanNumber)).GetIsolationWidth(MSOrder)
return IScanEventBase(
self.source.GetScanEventForScanNumber(scanNumber)
).GetIsolationWidth(MSOrder)

def GetCollisionEnergyForScanNum(self, scanNumber, MSOrder = 0):
"""This function returns the collision energy for the scan specified by scanNumber and the
transition specified by MSOrder (0 for MS1?) from the scan event structure in the raw file. """
return IScanEventBase(self.source.GetScanEventForScanNumber(scanNumber)).GetEnergy(MSOrder)
return IScanEventBase(
self.source.GetScanEventForScanNumber(scanNumber)
).GetEnergy(MSOrder)

def GetActivationTypeForScanNum(self, scanNumber, MSOrder = 0):
"""This function returns the activation type for the scan specified by scanNumber and the
Expand All @@ -328,20 +349,42 @@ def GetActivationTypeForScanNum(self, scanNumber, MSOrder = 0):
NETD 9
NPTR 10
UVPD 11"""
return RawFileReader.activationType[IScanEventBase(self.source.GetScanEventForScanNumber(scanNumber)).GetActivation(MSOrder)]
return RawFileReader.activationType[
self.GetActivationIDForScanNum(scanNumber, MSOrder)
]

def GetActivationIDForScanNum(self, scanNumber, MSOrder=0):
return int(IScanEventBase(
self.source.GetScanEventForScanNumber(scanNumber)
).GetActivation(MSOrder))

def GetMassAnalyzerTypeForScanNum(self, scanNumber):
"""This function returns the mass analyzer type for the scan specified by scanNumber from the
scan event structure in the RAW file. The value of scanNumber must be within the range of
scans or readings for the current controller. The range of scans or readings for the current
controller may be obtained by calling GetFirstSpectrumNumber and
GetLastSpectrumNumber.
return RawFileReader.massAnalyzerType[IScanEventBase(self.source.GetScanEventForScanNumber(scanNumber)).MassAnalyzer]"""
"""
return RawFileReader.massAnalyzerType[
self.GetMassAnalyzerIDForScanNum(scanNumber)
]

def GetMassAnalyzerIDForScanNum(self, scanNumber):
return int(IScanEventBase(
self.source.GetScanEventForScanNumber(scanNumber)
).MassAnalyzer)

def GetDetectorTypeForScanNum(self, scanNumber):
"""This function returns the detector type for the scan specified by scanNumber from the scan
event structure in the RAW file."""
return RawFileReader.detectorType[IScanEventBase(self.source.GetScanEventForScanNumber(scanNumber)).Detector]
return RawFileReader.detectorType[
self.GetDetectorIDForScanNum(scanNumber)
]

def GetDetectorIDForScanNum(self, scanNumber):
return int(IScanEventBase(
self.source.GetScanEventForScanNumber(scanNumber)
).Detector)

def GetNumberOfMassCalibratorsFromScanNum(self, scanNumber):
"""This function gets the number of mass calibrators (each of which is a double) in the scan."""
Expand Down Expand Up @@ -479,7 +522,10 @@ def GetTrailerExtraForScanNum(self, scanNumber):
NOTE : XCALIBUR INTERFACE "View/Scan header", lower part
"""
trailerData = self.source.GetTrailerExtraInformation(scanNumber)
return dict(zip(trailerData.Labels, trailerData.Values))
ret_dict = defaultdict(
lambda: 0, zip(trailerData.Labels, trailerData.Values)
)
return ret_dict

def GetMS2MonoMzAndChargeFromScanNum(self, scanNumber):
trailerData = self.GetTrailerExtraForScanNum(scanNumber)
Expand Down
Loading