diff --git a/alpharaw/utils/centroiding.py b/alpharaw/utils/centroiding.py index f9b42f6..4d872a6 100644 --- a/alpharaw/utils/centroiding.py +++ b/alpharaw/utils/centroiding.py @@ -1,4 +1,4 @@ -# Modified from AlphaPept +from typing import Tuple import numpy as np from numba import njit @@ -6,10 +6,28 @@ @njit def naive_centroid( - peak_mzs, - peak_intens, - centroiding_ppm=20.0, -): + peak_mzs: np.ndarray, + peak_intensities: np.ndarray, + centroiding_ppm: float = 20.0, +) -> Tuple[np.ndarray, np.ndarray]: + """ + A naive centroiding algorithm. + + Parameters + ---------- + peak_mzs : np.ndarray + peak m/z values to centroid. + peak_intensities : np.ndarray + peak intensities to centroid. + centroiding_ppm : float, optional + The centroiding ppm, by default 20.0 + + Returns + ------- + Tuple + ndarray: peak m/z array + ndarray: peak intensity array + """ mz_list = [] inten_list = [] start, stop = 0, 1 @@ -17,18 +35,25 @@ def naive_centroid( while start < len(peak_mzs): stop = _find_sister_peaks(peak_mzs, centroiding_peak_tols, start) mz_list.append( - np.average(peak_mzs[start:stop], weights=peak_intens[start:stop]) + np.average(peak_mzs[start:stop], weights=peak_intensities[start:stop]) ) - inten_list.append(np.sum(peak_intens[start:stop])) + inten_list.append(np.sum(peak_intensities[start:stop])) start = stop return ( np.array(mz_list, dtype=peak_mzs.dtype), - np.array(inten_list, dtype=peak_intens.dtype), + np.array(inten_list, dtype=peak_intensities.dtype), ) @njit -def _find_sister_peaks(peak_mzs, centroiding_peak_tols, start): +def _find_sister_peaks( + peak_mzs: np.ndarray, centroiding_peak_tols: np.ndarray, start: int +): + """ + Find sister peak stop idx for the given start idx. + Sister peaks refers to peaks from the same ion in profile mode. + Internal function. + """ stop = start + 1 for i in range(start + 1, len(peak_mzs)): if peak_mzs[i] - peak_mzs[start] <= centroiding_peak_tols[start]: diff --git a/alpharaw/utils/ms_path_utils.py b/alpharaw/utils/ms_path_utils.py index 6884f43..d07a3cd 100644 --- a/alpharaw/utils/ms_path_utils.py +++ b/alpharaw/utils/ms_path_utils.py @@ -1,6 +1,6 @@ import os -_special_ms_exts: list = [ +_SPECIAL_MS_EXTS: list = [ ".ms_data.hdf", # alphapept ".raw.hdf", # alpharaw ".raw.hdf5", # alpharaw @@ -14,10 +14,30 @@ ] -def get_raw_name(ms_file: str, special_ms_exts: list = _special_ms_exts): +def get_raw_name(ms_file: str) -> str: + """ + Get `raw_name` (base name of RAW data file) from the MS file path + by removing the extensions defined in :data:`_SPECIAL_MS_EXTS`. + + Parameters + ---------- + ms_file : str + The absolute or relative path of the RAW file. + + Returns + ------- + str + The `raw_name` without extension. + + Examples + -------- + >>> get_raw_name("/MS/files/your_raw_name.raw") + 'your_raw_name' + + """ raw_name = os.path.basename(ms_file) lower_name = raw_name.lower() - for _ext in special_ms_exts: + for _ext in _SPECIAL_MS_EXTS: if lower_name.endswith(_ext.lower()): raw_name = raw_name[: -len(_ext)] break @@ -27,7 +47,7 @@ def get_raw_name(ms_file: str, special_ms_exts: list = _special_ms_exts): def parse_ms_files_to_dict( - ms_file_list: list, special_ms_exts: list = _special_ms_exts + ms_file_list: list, ) -> dict: """ Parse spectrum file paths into a dict: @@ -49,6 +69,6 @@ def parse_ms_files_to_dict( ms_file_dict = {} for ms_file in ms_file_list: - raw_name = get_raw_name(ms_file, special_ms_exts) + raw_name = get_raw_name(ms_file, _SPECIAL_MS_EXTS) ms_file_dict[raw_name] = ms_file return ms_file_dict