Skip to content

Commit

Permalink
Merge pull request #33 from MannLabs/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
jalew188 authored Mar 16, 2024
2 parents 3e682e5 + 34c47a0 commit 31f8eee
Show file tree
Hide file tree
Showing 8 changed files with 272 additions and 212 deletions.
4 changes: 2 additions & 2 deletions alpharaw/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ def run(ctx, **kwargs):
if ctx.invoked_subcommand is None:
click.echo(run.get_help(ctx))

@run.command("parse", help="Convert raw files into alpharaw_hdf format.")
@run.command("parse", help="Convert raw files into alpharaw hdf5 (.hdf) format.")
@click.option(
"--raw_type", type=str, default="thermo_raw",
show_default=True, help=f"Only `thermo_raw` is supported currently.",
show_default=True, help=f"Only `thermo_raw`, `sciex_wiff` is supported currently.",
)
@click.option(
"--raw", multiple=True, default=[],
Expand Down
39 changes: 36 additions & 3 deletions alpharaw/match/psm_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ def _match_ms2_one_raw_numba(self,
psm_idxes = psm_groups[dia_group]
if len(psm_idxes) == 0: continue
psm_idxes = np.array(psm_idxes, dtype=np.int32)
spec_idxes = get_dia_spec_idxes(
spec_idxes = query_dia_spec_idxes_same_window(
group_df.rt.values,
psm_df_one_raw.rt.values[psm_idxes],
max_spec_per_query=self.max_spec_per_query
Expand Down Expand Up @@ -659,7 +659,7 @@ def get_ion_count_scores(
return np.array(scores,np.int32)

@numba.njit
def get_dia_spec_idxes(
def query_dia_spec_idxes_same_window(
spec_rt_values:np.ndarray,
query_rt_values:np.ndarray,
max_spec_per_query:int,
Expand All @@ -682,4 +682,37 @@ def get_dia_spec_idxes(
)
return spec_idxes


@numba.njit
def query_spec_idxes(
spec_rts:np.ndarray,
spec_isolation_lower_mzs:np.ndarray,
spec_isolation_upper_mzs:np.ndarray,
query_start_rts:np.ndarray,
query_stop_rts:np.ndarray,
query_mzs:np.ndarray,
max_spec_per_query:int,
):
rt_start_idxes = np.searchsorted(spec_rts, query_start_rts)
rt_stop_idxes = np.searchsorted(spec_rts, query_stop_rts)+1

spec_idxes = np.full(
(len(query_mzs),max_spec_per_query),
-1, dtype=np.int32
)
for iquery in range(len(rt_start_idxes)):
idx_list = []
for ispec in range(rt_start_idxes[iquery], rt_stop_idxes[iquery]):
if (
query_mzs[iquery]>=spec_isolation_lower_mzs[ispec] and
query_mzs[iquery]<=spec_isolation_upper_mzs[ispec]
):
idx_list.append(ispec)
if len(idx_list) > max_spec_per_query:
spec_idxes[iquery,:] = idx_list[
len(idx_list)/2-max_spec_per_query//2:
len(idx_list)/2+max_spec_per_query//2+1
]
else:
spec_idxes[iquery,:len(idx_list)] = idx_list
return spec_idxes

15 changes: 7 additions & 8 deletions alpharaw/match/psm_match_alphatims.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,16 +98,16 @@ class PepSpecMatch_AlphaTims(PepSpecMatch):

def get_peak_df(self,
precursor_mz:float,
rt:float,
rt_sec:float,
im:float=0.0,
)->pd.DataFrame:
"""
Parameters
----------
precursor_mz : float
Precursor m/z value
rt : float
RT value in minutes
rt_sec : float
RT value in seconds
im : float, optional
Ion mobility, by default 0.0
Expand All @@ -116,7 +116,6 @@ def get_peak_df(self,
pd.DataFrame
peak_df in alphatims DF format
"""
rt_sec = rt*60
rt_slice = slice(
rt_sec-self.rt_sec_tol_to_slice_ms2,
rt_sec+self.rt_sec_tol_to_slice_ms2,
Expand Down Expand Up @@ -176,16 +175,16 @@ def find_k_nearest(array, val, k=3):
def get_peaks(
self,
precursor_mz:float,
rt:float,
rt_sec:float,
im:float=0.0,
)->tuple:
"""
Parameters
----------
precursor_mz : float
Precursor m/z value
rt : float
RT value in minutes
rt_sec : float
RT value in seconds
im : float, optional
Ion mobility, by default 0.0
Expand All @@ -195,7 +194,7 @@ def get_peaks(
np.ndarray: peak m/z values
np.ndarray: peak intensity values
"""
spec_df = self.get_peak_df(precursor_mz, rt, im)
spec_df = self.get_peak_df(precursor_mz, rt_sec, im)
spec_df = spec_df.sort_values('mz_values').reset_index(drop=True)
return (
spec_df.mz_values.values,
Expand Down
13 changes: 8 additions & 5 deletions alpharaw/ms_data_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def __init__(
self.peak_df:pd.DataFrame = pd.DataFrame()
self._raw_file_path = ''
self.centroided = centroided
self.save_as_hdf = save_as_hdf
self._save_as_hdf = save_as_hdf
self.creation_time = ''
self.file_type = ''
self.instrument = 'none'
Expand Down Expand Up @@ -99,7 +99,7 @@ def import_raw(self, _path:str):
self._set_dataframes(raw_data)
self._check_df()

if self.save_as_hdf:
if self._save_as_hdf:
self.save_hdf(_path+'.hdf')

def load_raw(self, _path:str):
Expand Down Expand Up @@ -167,9 +167,12 @@ def _set_dataframes(self, raw_data:dict):

for col, val in raw_data.items():
if col in self.column_dtypes:
self.spectrum_df[col] = np.array(
val, dtype=self.column_dtypes[col]
)
if self.column_dtypes[col] == "O":
self.spectrum_df[col] = list(val)
else:
self.spectrum_df[col] = np.array(
val, dtype=self.column_dtypes[col]
)

def _read_creation_time(self, raw_data):
pass
Expand Down
5 changes: 5 additions & 0 deletions alpharaw/raw_access/pythermorawfilereader.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,11 @@ def GetStatusLogForRetentionTime(self, rt):
def GetStatusLogForScanNum(self, scan):
return self.GetStatusLogForRetentionTime(self.RTFromScanNum(scan))

def GetScanEventForScanNum(self, scanNumber):
return IScanEventBase(
self.source.GetScanEventForScanNumber(scanNumber)
)

def GetNumberOfMassRangesFromScanNum(self, scanNumber):
"""This function gets the number of MassRange data items in the scan."""
return IScanEventBase(
Expand Down
Loading

0 comments on commit 31f8eee

Please sign in to comment.