diff --git a/alphadia/constants/keys.py b/alphadia/constants/keys.py index 1b3f04da..c947a19f 100644 --- a/alphadia/constants/keys.py +++ b/alphadia/constants/keys.py @@ -32,3 +32,9 @@ class ConfigKeys(metaclass=ConstantsClass): RAW_PATHS = "raw_paths" FASTA_PATHS = "fasta_paths" QUANT_DIRECTORY = "quant_directory" + + +class SearchStepFiles(metaclass=ConstantsClass): + PSM_FILE_NAME = "psm.parquet" + FRAG_FILE_NAME = "frag.parquet" + FRAG_TRANSFER_FILE_NAME = "frag.transfer.parquet" diff --git a/alphadia/outputaccumulator.py b/alphadia/outputaccumulator.py index 76c14cd2..d4ce7e7e 100644 --- a/alphadia/outputaccumulator.py +++ b/alphadia/outputaccumulator.py @@ -34,171 +34,129 @@ from alphabase.spectral_library.flat import SpecLibFlat from tqdm import tqdm -logger = logging.getLogger() - +from alphadia.constants.keys import SearchStepFiles -class SpecLibFlatFromOutput(SpecLibFlat): - def __init__(self, **kwargs): - super().__init__(**kwargs) - - def _calculate_fragment_position(self): - """ - Calculate the position of the fragments based on the type and number of the fragment. - """ - # Fragtypes from ascii to char - available_frag_types = self._fragment_df["type"].unique() - self.frag_types_as_char = {i: chr(i) for i in available_frag_types} - - mapped_frag_types = self._fragment_df["type"].map(self.frag_types_as_char) - a_b_c_fragments = mapped_frag_types.isin(["a", "b", "c"]) - x_y_z_fragments = mapped_frag_types.isin(["x", "y", "z"]) - - precursor_idx_to_nAA = ( - self._precursor_df[["precursor_idx", "nAA"]] - .set_index("precursor_idx") - .to_dict()["nAA"] - ) - # For X,Y,Z frags calculate the position as being the nAA of the precursor - number of the fragment - x_y_z_number = ( - self._fragment_df.loc[x_y_z_fragments, "precursor_idx"].map( - precursor_idx_to_nAA - ) - - self._fragment_df.loc[x_y_z_fragments, "number"] - ) - self._fragment_df.loc[x_y_z_fragments, "position"] = x_y_z_number - 1 +logger = logging.getLogger() - # For A,B,C frags calculate the position as being the number of the fragment - self._fragment_df.loc[a_b_c_fragments, "position"] = ( - self._fragment_df.loc[a_b_c_fragments, "number"] - 1 - ) - # Change position to int - self._fragment_df["position"] = self._fragment_df["position"].astype(int) +def build_speclibflat_from_quant( + folder: str, + mandatory_precursor_columns: list[str] | None = None, + optional_precursor_columns: list[str] | None = None, + charged_frag_types: list[str] | None = None, +) -> SpecLibFlat: + """ + Build a SpecLibFlat object from quantification output data stored in a folder for transfer learning. - def parse_output_folder( - self, - folder: str, - mandatory_precursor_columns: list[str] | None = None, - optional_precursor_columns: list[str] | None = None, - ) -> tuple[pd.DataFrame, pd.DataFrame]: - """ - Parse the output folder to get a precursor and fragment dataframe in the flat format. - Parameters - ---------- - folder : str - The output folder to be parsed. - mandatory_precursor_columns : list, optional - The columns to be selected from the precursor dataframe, by default ['precursor_idx', 'sequence', 'flat_frag_start_idx', 'flat_frag_stop_idx', 'charge', 'rt_library', 'mobility_library', 'mz_library', 'proteins', 'genes', 'mods', 'mod_sites', 'proba'] + Parameters + ---------- + folder : str + The output folder to be parsed. + mandatory_precursor_columns : list[str], optional + The columns to be selected from the precursor dataframe + optional_precursor_columns : list[str], optional + Additional optional columns to include if present - Returns - ------- - pd.DataFrame - The precursor dataframe. - pd.DataFrame - The fragment dataframe. + Returns + ------- + SpecLibFlat + A spectral library object containing the parsed data + """ + speclib = SpecLibFlat() + + if mandatory_precursor_columns is None: + mandatory_precursor_columns = [ + "precursor_idx", + "sequence", + "flat_frag_start_idx", + "flat_frag_stop_idx", + "charge", + "rt_library", + "rt_observed", + "mobility_library", + "mobility_observed", + "mz_library", + "mz_observed", + "proteins", + "genes", + "mods", + "mod_sites", + "proba", + "decoy", + ] + if optional_precursor_columns is None: + optional_precursor_columns = [ + "rt_calibrated", + "mz_calibrated", + ] - """ - if mandatory_precursor_columns is None: - mandatory_precursor_columns = [ - "precursor_idx", - "sequence", - "flat_frag_start_idx", - "flat_frag_stop_idx", - "charge", - "rt_library", - "rt_observed", - "mobility_library", - "mobility_observed", - "mz_library", - "mz_observed", - "proteins", - "genes", - "mods", - "mod_sites", - "proba", - "decoy", - ] - - if optional_precursor_columns is None: - optional_precursor_columns = [ - "rt_calibrated", - "mz_calibrated", - ] - - psm_df = pd.read_parquet(os.path.join(folder, "psm.parquet")) - frag_df = pd.read_parquet(os.path.join(folder, "frag.parquet")) - - if not set(mandatory_precursor_columns).issubset(psm_df.columns): - raise ValueError( - f"mandatory_precursor_columns must be a subset of psm_df.columns didnt find {set(mandatory_precursor_columns) - set(psm_df.columns)}" - ) + psm_df = pd.read_parquet(os.path.join(folder, SearchStepFiles.PSM_FILE_NAME)) + frag_df = pd.read_parquet( + os.path.join(folder, SearchStepFiles.FRAG_TRANSFER_FILE_NAME) + ) - available_columns = sorted( - list( - set(mandatory_precursor_columns) - | (set(optional_precursor_columns) & set(psm_df.columns)) - ) + if not set(mandatory_precursor_columns).issubset(psm_df.columns): + raise ValueError( + f"mandatory_precursor_columns must be a subset of psm_df.columns didnt find {set(mandatory_precursor_columns) - set(psm_df.columns)}" ) - psm_df = psm_df[available_columns] - # get foldername of the output folder - foldername = os.path.basename(folder) - psm_df["raw_name"] = foldername + available_columns = sorted( + list( + set(mandatory_precursor_columns) + | (set(optional_precursor_columns) & set(psm_df.columns)) + ) + ) + psm_df = psm_df[available_columns] - # remove decoy precursors - # assert that decoy is int - psm_df["decoy"] = psm_df["decoy"].astype(int) - psm_df = psm_df[psm_df["decoy"] == 0].reset_index(drop=True) + psm_df["raw_name"] = os.path.basename(folder) - self._precursor_df = pd.DataFrame() - for col in psm_df.columns: - self._precursor_df[col] = psm_df[col] + psm_df["decoy"] = psm_df["decoy"].astype(int) + psm_df = psm_df[psm_df["decoy"] == 0].reset_index(drop=True) - # self._precursor_df.set_index('precursor_idx', inplace=True) - # Change the data type of the mods column to string - self._precursor_df["mods"] = self._precursor_df["mods"].astype(str) + speclib._precursor_df = psm_df.copy() - self._precursor_df["mod_sites"] = self._precursor_df["mod_sites"].astype(str) + speclib._precursor_df["mods"] = speclib._precursor_df["mods"].astype(str) + speclib._precursor_df["mod_sites"] = speclib._precursor_df["mod_sites"].astype(str) + speclib._precursor_df["mods"] = speclib._precursor_df["mods"].replace("nan", "") + speclib._precursor_df["mod_sites"] = speclib._precursor_df["mod_sites"].replace( + "nan", "" + ) - # Replace nan with empty string - self._precursor_df["mods"] = self._precursor_df["mods"].replace("nan", "") - self._precursor_df["mod_sites"] = self._precursor_df["mod_sites"].replace( - "nan", "" - ) + speclib.calc_precursor_mz() - self.calc_precursor_mz() - - for col in ["rt", "mz", "mobility"]: - if f"{col}_observed" in psm_df.columns: - values = psm_df[f"{col}_observed"] - elif "{col}_calibrated" in psm_df.columns: - values = psm_df["{col}_calibrated"] - else: - values = psm_df[f"{col}_library"] - self._precursor_df[col] = values - - # ----------------- Fragment ----------------- - # Filer fragments that are not used in the precursors - frag_df = frag_df[ - frag_df["precursor_idx"].isin(self._precursor_df["precursor_idx"]) + for col in ["rt", "mz", "mobility"]: + if f"{col}_observed" in psm_df.columns: + values = psm_df[f"{col}_observed"] + elif "{col}_calibrated" in psm_df.columns: + values = psm_df["{col}_calibrated"] + else: + values = psm_df[f"{col}_library"] + speclib._precursor_df[col] = values + + frag_df = frag_df[ + frag_df["precursor_idx"].isin(speclib._precursor_df["precursor_idx"]) + ] + speclib._fragment_df = frag_df[ + [ + "mz", + "intensity", + "precursor_idx", + "frag_idx", + "correlation", + "number", + "type", + "charge", + "loss_type", + "position", ] - self._fragment_df = frag_df[ - ["mz", "intensity", "precursor_idx", "frag_idx", "correlation"] - ].copy() + ].copy() - for col in ["number", "type", "charge"]: - if col in self.custom_fragment_df_columns: - self._fragment_df.loc[:, col] = frag_df.loc[:, col] - - if "position" in self.custom_fragment_df_columns: - if "position" in frag_df.columns: - self._fragment_df.loc[:, "position"] = frag_df.loc[:, "position"] - else: - self._calculate_fragment_position() - - return self._precursor_df, self._fragment_df + return speclib.to_speclib_base( + charged_frag_types=charged_frag_types, + flat_columns=["intensity", "correlation"], + ) class BaseAccumulator: @@ -226,34 +184,6 @@ def post_process(self) -> None: raise NotImplementedError("Subclasses must implement the post_process method") -def process_folder(folder): - """ - Process a folder and return the speclibase object. - It does so by parsing the output folderto get SpecLibFlat object and then converting it to SpecLibBase object. - And for now it assumes that the loss_type is 0 for all the fragments. - - Parameters - ---------- - folder : str - The folder to be processed. - - Returns - ------- - SpecLibBase - The SpecLibBase object obtained from the output folder. - """ - speclibflat_object = SpecLibFlatFromOutput() - psm, frag_df = speclibflat_object.parse_output_folder(folder) - speclibflat_object._fragment_df["loss_type"] = 0 - speclibase = speclibflat_object.to_SpecLibBase() - # sort columns - for dense_df_name in speclibase.available_dense_fragment_dfs(): - df = getattr(speclibase, dense_df_name) - setattr(speclibase, dense_df_name, df[df.columns.sort_values()]) - - return speclibase - - def error_callback(e): logger.error(e, exc_info=True) @@ -264,11 +194,14 @@ class AccumulationBroadcaster: And broadcasts the output of each folder to the subscribers. """ - def __init__(self, folders: list, number_of_processes: int): - self._folders = folders + def __init__( + self, folder_list: list, number_of_processes: int, processing_kwargs: dict + ): + self._folder_list = folder_list self._number_of_processes = number_of_processes self._subscribers = [] self._lock = threading.Lock() # Lock to prevent two processes trying to update the same subscriber at the same time + self._processing_kwargs = processing_kwargs def subscribe(self, subscriber: BaseAccumulator): self._subscribers.append(subscriber) @@ -290,10 +223,11 @@ def _post_process(self): def run(self): with multiprocessing.Pool(processes=self._number_of_processes) as pool: - for folder in self._folders: + for folder in self._folder_list: _ = pool.apply_async( - process_folder, + build_speclibflat_from_quant, (folder,), + self._processing_kwargs, callback=self._broadcast, error_callback=error_callback, ) diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py index f42b9a41..1077726a 100644 --- a/alphadia/outputtransform.py +++ b/alphadia/outputtransform.py @@ -11,7 +11,7 @@ import directlfq.utils as lfqutils import numpy as np import pandas as pd -from alphabase.peptide import precursor +from alphabase.peptide import fragment, precursor from alphabase.spectral_library import base from alphabase.spectral_library.base import SpecLibBase from sklearn.model_selection import train_test_split @@ -294,6 +294,7 @@ def prepare_df(df, psm_df, column="intensity"): df["number"].values, df["type"].values, df["charge"].values, + df["loss_type"].values, ) return df[["precursor_idx", "ion", column, "correlation"]] @@ -485,8 +486,16 @@ def build_transfer_library( ], ) accumulationBroadcaster = AccumulationBroadcaster( - folder_list, number_of_processes + folder_list=folder_list, + number_of_processes=number_of_processes, + processing_kwargs={ + "charged_frag_types": fragment.get_charged_frag_types( + self.config["transfer_library"]["fragment_types"], + self.config["transfer_library"]["max_charge"], + ) + }, ) + accumulationBroadcaster.subscribe(transferAccumulator) accumulationBroadcaster.run() logger.info( diff --git a/alphadia/plexscoring.py b/alphadia/plexscoring.py index 4ae4e674..4b157ab2 100644 --- a/alphadia/plexscoring.py +++ b/alphadia/plexscoring.py @@ -767,6 +767,9 @@ def process( psm_proto_df.fragment_charge[self.output_idx, : len(fragments.charge)] = ( fragments.charge ) + psm_proto_df.fragment_loss_type[ + self.output_idx, : len(fragments.loss_type) + ] = fragments.loss_type # ============= FRAGMENT MOBILITY CORRELATIONS ============= # will be skipped if no mobility dimension is present @@ -1294,6 +1297,7 @@ class OuptutPsmDF: fragment_number: nb.uint8[:, ::1] fragment_type: nb.uint8[:, ::1] fragment_charge: nb.uint8[:, ::1] + fragment_loss_type: nb.uint8[:, ::1] def __init__(self, n_psm, top_k_fragments): self.valid = np.zeros(n_psm, dtype=np.bool_) @@ -1321,6 +1325,7 @@ def __init__(self, n_psm, top_k_fragments): self.fragment_number = np.zeros((n_psm, top_k_fragments), dtype=np.uint8) self.fragment_type = np.zeros((n_psm, top_k_fragments), dtype=np.uint8) self.fragment_charge = np.zeros((n_psm, top_k_fragments), dtype=np.uint8) + self.fragment_loss_type = np.zeros((n_psm, top_k_fragments), dtype=np.uint8) def to_fragment_df(self): mask = self.fragment_mz_library.flatten() > 0 @@ -1339,6 +1344,7 @@ def to_fragment_df(self): self.fragment_number.flatten()[mask], self.fragment_type.flatten()[mask], self.fragment_charge.flatten()[mask], + self.fragment_loss_type.flatten()[mask], ) def to_precursor_df(self): @@ -1823,6 +1829,7 @@ def collect_fragments( "number", "type", "charge", + "loss_type", ] df = pd.DataFrame( { diff --git a/alphadia/search_step.py b/alphadia/search_step.py index 7c39491f..107b3159 100644 --- a/alphadia/search_step.py +++ b/alphadia/search_step.py @@ -9,7 +9,7 @@ from alphabase.spectral_library.flat import SpecLibFlat from alphadia import libtransform, outputtransform -from alphadia.constants.keys import ConfigKeys +from alphadia.constants.keys import ConfigKeys, SearchStepFiles from alphadia.exceptions import CustomError, NoLibraryAvailableError from alphadia.workflow import peptidecentric, reporting from alphadia.workflow.base import WorkflowBase @@ -336,11 +336,18 @@ def _process_raw_file( ) # check if the raw file is already processed - psm_location = os.path.join(workflow.path, "psm.parquet") - frag_location = os.path.join(workflow.path, "frag.parquet") + psm_location = os.path.join(workflow.path, SearchStepFiles.PSM_FILE_NAME) + frag_location = os.path.join(workflow.path, SearchStepFiles.FRAG_FILE_NAME) + frag_transfer_location = os.path.join( + workflow.path, SearchStepFiles.FRAG_TRANSFER_FILE_NAME + ) if self.config["general"]["reuse_quant"]: - if os.path.exists(psm_location) and os.path.exists(frag_location): + files_exist = os.path.exists(psm_location) and os.path.exists(frag_location) + if self.config["transfer_library"]["enabled"]: + files_exist = files_exist and os.path.exists(frag_transfer_location) + + if files_exist: logger.info( f"reuse_quant: found existing quantification for {raw_name}, skipping processing .." ) @@ -356,7 +363,10 @@ def _process_raw_file( workflow.timing_manager.set_end_time("optimization") workflow.timing_manager.set_start_time("extraction") + psm_df, frag_df = workflow.extraction() + frag_df.to_parquet(frag_location, index=False) + workflow.timing_manager.set_end_time("extraction") workflow.timing_manager.save() @@ -367,11 +377,11 @@ def _process_raw_file( psm_df = psm_df[psm_df["qval"] <= self.config["fdr"]["fdr"]] if self.config["transfer_library"]["enabled"]: - psm_df, frag_df = workflow.requantify_fragments(psm_df) + psm_df, frag_transfer_df = workflow.requantify_fragments(psm_df) + frag_transfer_df.to_parquet(frag_transfer_location, index=False) psm_df["run"] = raw_name psm_df.to_parquet(psm_location, index=False) - frag_df.to_parquet(frag_location, index=False) return workflow diff --git a/alphadia/utils.py b/alphadia/utils.py index b1576a8d..bfdbefde 100644 --- a/alphadia/utils.py +++ b/alphadia/utils.py @@ -52,14 +52,20 @@ def candidate_hash(precursor_idx, rank): @nb.njit -def ion_hash(precursor_idx, number, type, charge): +def ion_hash(precursor_idx, number, type, charge, loss_type): # create a 64 bit hash from the precursor_idx, number and type # the precursor_idx is the lower 32 bits # the number is the next 8 bits # the type is the next 8 bits - # the last 8 bits are used to distinguish between different charges of the same precursor - # this is necessary because I forgot to save the charge in the frag.tsv file :D - return precursor_idx + (number << 32) + (type << 40) + (charge << 48) + # the charge is the next 8 bits + # the loss_type is the last 8 bits + return ( + precursor_idx + + (number << 32) + + (type << 40) + + (charge << 48) + + (loss_type << 56) + ) @nb.njit diff --git a/alphadia/workflow/peptidecentric.py b/alphadia/workflow/peptidecentric.py index 71747ec3..a720c11d 100644 --- a/alphadia/workflow/peptidecentric.py +++ b/alphadia/workflow/peptidecentric.py @@ -1212,7 +1212,7 @@ def requantify_fragments( config = plexscoring.CandidateConfig() config.update( { - "top_k_fragments": 1000, # Use all fragments ever expected, needs to be larger than charged_frag_types(8)*max_sequence_len(100?) + "top_k_fragments": 9999, # Use all fragments ever expected, needs to be larger than charged_frag_types(8)*max_sequence_len(100?) "precursor_mz_tolerance": self.config["search"]["target_ms1_tolerance"], "fragment_mz_tolerance": self.config["search"]["target_ms2_tolerance"], } diff --git a/nbs/debug/debug_lvl1.ipynb b/nbs/debug/debug_lvl1.ipynb index 5c5d2b97..fea8f8ee 100644 --- a/nbs/debug/debug_lvl1.ipynb +++ b/nbs/debug/debug_lvl1.ipynb @@ -10,8 +10,10 @@ ] }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2\n", @@ -28,13 +30,13 @@ "os.environ[\"NUMBA_BOUNDSCHECK\"] = \"1\"\n", "os.environ[\"NUMBA_DEVELOPER_MODE\"] = \"1\"\n", "os.environ[\"NUMBA_FULL_TRACEBACKS\"] = \"1\"" - ], - "outputs": [], - "execution_count": null + ] }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import tempfile\n", "\n", @@ -65,13 +67,13 @@ "\n", "library_path = DataShareDownloader(library_url,test_folder).download()\n", "raw_data_path_list = [DataShareDownloader(url,test_folder).download() for url in raw_data_url_list]" - ], - "outputs": [], - "execution_count": null + ] }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "config = {\n", " \"general\": {\n", @@ -90,23 +92,23 @@ "}\n", "output_folder = test_folder + \"/output\"\n", "step = search_step.SearchStep(output_folder, config=config)" - ], - "outputs": [], - "execution_count": null + ] }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "for raw_name, dia_path, speclib in step.get_run_data():\n", " pass" - ], - "outputs": [], - "execution_count": null + ] }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "workflow = peptidecentric.PeptideCentricWorkflow(\n", " raw_name,\n", @@ -114,31 +116,31 @@ ")\n", "workflow.load(dia_path, speclib)\n", "workflow.search_parameter_optimization()" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "workflow.extraction()" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "test_df = workflow.spectral_library.precursor_df.sample(1000)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "from alphadia.peakgroup import search\n", "\n", @@ -167,13 +169,13 @@ " fwhm_mobility=workflow.optimization_manager.fwhm_mobility,\n", ")\n", "candidates_df = extraction(thread_count=workflow.config[\"general\"][\"thread_count\"])" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "from alphadia import plexscoring\n", "\n", @@ -203,18 +205,20 @@ " thread_count=workflow.config[\"general\"][\"thread_count\"],\n", " debug=True,\n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { - "metadata": {}, "cell_type": "markdown", - "source": "## Multistep Search" + "metadata": {}, + "source": [ + "## Multistep Search" + ] }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from alphadia.search_plan import SearchPlan\n", "\n", @@ -225,16 +229,14 @@ "\n", "plan = SearchPlan(output_folder, config=config)\n", "plan.run_plan()" - ], - "outputs": [], - "execution_count": null + ] } ], "metadata": { "kernelspec": { - "display_name": "alphadia", + "display_name": "alpha", "language": "python", - "name": "alphadia" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -246,7 +248,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.7" }, "orig_nbformat": 4 }, diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 6a165d05..45c97d4d 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -5,7 +5,7 @@ numba==0.59.1 argparse==1.4.0 alpharaw==0.4.5 alphatims==1.0.8 -alphabase==1.4.2 # test: tolerate_version +alphabase==1.5.0 # test: tolerate_version peptdeep==1.3.0 # test: tolerate_version progressbar==2.5 neptune==1.10.4 diff --git a/requirements/requirements_loose.txt b/requirements/requirements_loose.txt index 76d82a41..786515d5 100644 --- a/requirements/requirements_loose.txt +++ b/requirements/requirements_loose.txt @@ -5,7 +5,7 @@ argparse alpharaw>=0.3.1 # test: tolerate_version alphatims # TODO remove once compatible with alphabase>=1.5.0 -alphabase>=1.4.0,<1.5.0 # test: tolerate_version +alphabase>=1.5.0 # test: tolerate_version peptdeep>=1.3.0 # test: tolerate_version dask==2024.11.2 # test: tolerate_version progressbar diff --git a/tests/unit_tests/conftest.py b/tests/unit_tests/conftest.py index e6c8ecf3..b59249eb 100644 --- a/tests/unit_tests/conftest.py +++ b/tests/unit_tests/conftest.py @@ -151,8 +151,10 @@ def mock_fragment_df(n_fragments: int = 10, n_precursor: int = 20): fragment_intensity = ( 10 ** (precursor_intensity * 3) * np.random.rand(n_fragments) ).flatten() + fragment_correlation = np.random.rand(n_precursor * n_fragments).flatten() + fragment_loss_type = np.ones(n_precursor * n_fragments).astype(np.uint8).flatten() return pd.DataFrame( { "precursor_idx": fragment_precursor_idx, @@ -164,6 +166,7 @@ def mock_fragment_df(n_fragments: int = 10, n_precursor: int = 20): "height": fragment_height, "intensity": fragment_intensity, "correlation": fragment_correlation, + "loss_type": fragment_loss_type, } ) diff --git a/tests/unit_tests/test_outputaccumulator.py b/tests/unit_tests/test_outputaccumulator.py index 912579d5..a29cdee3 100644 --- a/tests/unit_tests/test_outputaccumulator.py +++ b/tests/unit_tests/test_outputaccumulator.py @@ -8,6 +8,7 @@ from conftest import mock_fragment_df, mock_precursor_df from alphadia import outputtransform +from alphadia.constants.keys import SearchStepFiles from alphadia.workflow.base import QUANT_FOLDER_NAME @@ -102,9 +103,12 @@ def prepare_input_data(): for i, raw_folder in enumerate(raw_folders): os.makedirs(raw_folder, exist_ok=True) - psm_dfs[i].to_parquet(os.path.join(raw_folder, "psm.parquet"), index=False) + psm_dfs[i].to_parquet( + os.path.join(raw_folder, SearchStepFiles.PSM_FILE_NAME), index=False + ) fragment_dfs[i].to_parquet( - os.path.join(raw_folder, "frag.parquet"), index=False + os.path.join(raw_folder, SearchStepFiles.FRAG_TRANSFER_FILE_NAME), + index=False, ) return config, temp_folder, raw_folders, psm_dfs, fragment_dfs