From 1fb223812d8284223d6c3c2cba4f620d32c050f7 Mon Sep 17 00:00:00 2001 From: Vincenth Brennsteiner Date: Mon, 11 Mar 2024 00:09:58 +0100 Subject: [PATCH 01/48] open notebook to compare grouping between search engines --- nbs/debug/dev_grouping_comparison.ipynb | 92 +++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 nbs/debug/dev_grouping_comparison.ipynb diff --git a/nbs/debug/dev_grouping_comparison.ipynb b/nbs/debug/dev_grouping_comparison.ipynb new file mode 100644 index 00000000..de8d1f51 --- /dev/null +++ b/nbs/debug/dev_grouping_comparison.ipynb @@ -0,0 +1,92 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Main Notebook Aim" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The aim of this notebook is to delineate differences in protein inference between AlphaDIA, Spectronaut and DIANN. Heuristic and Strict Parsimony grouping of either of these engines shall be evaluated on a HeLa QC dataset (Orbitrap Astral). Aims are the following:\n", + "\n", + "- Explain apparent differences in protein inference between AlphaDIA - DIANN: Strict Parsimony\n", + "- Explain apparent differences in protein inference between AlphaDIA - Spectronaut: Strict Parsimony\n", + "- Explain apparent differences in protein inference between AlphaDIA - DIANN: Highly Heuristic\n", + "- Explain apparent differences in protein inference between AlphaDIA - Spectronaut: Highly Heuristic\n", + "\n", + "Special focus lies on edge cases (subsumable, circular, non-deterministic) grouping situations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Inspect QC file and generate an overview of the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load AlphaDIA results for both grouping methods" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load Spectronaut results for both grouping methods" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load DIA-NN results for both grouping methods" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Visualize differences between the datasets for each method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Closer investigation of divergent grouping results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 9147995b6e2b07d12580d0223e51515adb1897ce Mon Sep 17 00:00:00 2001 From: Vincenth Brennsteiner Date: Thu, 4 Apr 2024 16:08:59 +0200 Subject: [PATCH 02/48] formulate approach to compare grouping between different search engines --- nbs/debug/dev_grouping_comparison.ipynb | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/nbs/debug/dev_grouping_comparison.ipynb b/nbs/debug/dev_grouping_comparison.ipynb index de8d1f51..f29b54ee 100644 --- a/nbs/debug/dev_grouping_comparison.ipynb +++ b/nbs/debug/dev_grouping_comparison.ipynb @@ -18,7 +18,14 @@ "- Explain apparent differences in protein inference between AlphaDIA - DIANN: Highly Heuristic\n", "- Explain apparent differences in protein inference between AlphaDIA - Spectronaut: Highly Heuristic\n", "\n", - "Special focus lies on edge cases (subsumable, circular, non-deterministic) grouping situations." + "Special focus lies on edge cases (subsumable, circular, non-deterministic) grouping situations.\n", + "\n", + "Algorithmically, this is a challenging task since the different search engines operate with different, closed source codebases. Instead, opt for a post-hoc approach:\n", + "\n", + "1. From each search engine, obtain a peptide and protein-group level output file for the same rawfiles processed with the same fasta/spectral library\n", + "2. Parse outputs such that each precursor is associated with its genes, with fanned out rows for peptides associating with more than one gene\n", + "3. Iterate over each gene in each result table and select those with exactly identical precursor sets. Mark these genes as \"shared\", with the definition: \"A shared gene is a gene whose associated precursors are the same from each search engine\"\n", + "4. Then, select groups that consist only of shared genes. While the gene-precursor association is clearly determined by the fasta, the manner in which genes are combined into groups may differ between search engines. Using these cases, we can examine how different search engines handle grouping starting from exactly the same precursors." ] }, { @@ -27,7 +34,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Inspect QC file and generate an overview of the data" + "# Inspect QC file and generate an overview of the data " ] }, { From eb393cdc532e32ad3dfea6c4abec7285bf23e08b Mon Sep 17 00:00:00 2001 From: Vincenth Brennsteiner Date: Tue, 9 Apr 2024 15:49:03 +0200 Subject: [PATCH 03/48] file loading for post-hoc comparison of AlphaDIA, DiaNN, Spectronaut protein inference --- nbs/debug/dev_grouping_comparison.ipynb | 1410 ++++++++++++++++++++++- 1 file changed, 1407 insertions(+), 3 deletions(-) diff --git a/nbs/debug/dev_grouping_comparison.ipynb b/nbs/debug/dev_grouping_comparison.ipynb index f29b54ee..fa381c51 100644 --- a/nbs/debug/dev_grouping_comparison.ipynb +++ b/nbs/debug/dev_grouping_comparison.ipynb @@ -30,11 +30,1401 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ - "# Inspect QC file and generate an overview of the data " + "# Inspect QC files and generate an overview of the data \n", + "\n", + "import os \n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Utility functions\n", + "# to be sure and to enable parsing without user input, infer cohort from file itself\n", + "def infer_engine(\n", + " headers : list,\n", + " alphadia_pe_columns : list = ['base_width_mobility', 'base_width_rt', 'mono_ms1_intensity'],\n", + " alphadia_pg_columns : list = ['pg'],\n", + " diann_pe_columns : list = ['File.Name', 'Run', 'Protein.Group', 'Protein.Ids'],\n", + " diann_pg_columns : list = ['First.Protein.Description'],\n", + " spectronaut_pe_columns : list = ['PG.ProteinGroups', 'PG.ProteinAccessions', 'PG.Genes', 'PG.UniProtIds'],\n", + " spectronaut_pg_columns : list = ['PG.NrOfStrippedSequencesIdentified (Experiment-wide)', 'PG.NrOfPrecursorsIdentified (Experiment-wide)'],\n", + "):\n", + " engine = []\n", + " level = []\n", + " if set(alphadia_pe_columns).issubset(headers):\n", + " engine.append('Alphadia')\n", + " level.append('peptide')\n", + " elif set (alphadia_pg_columns).issubset(headers):\n", + " engine.append('Alphadia')\n", + " level.append('protein_group')\n", + " elif set(diann_pe_columns).issubset(headers):\n", + " engine.append('DiaNN')\n", + " level.append('peptide')\n", + " elif set(diann_pg_columns).issubset(headers):\n", + " engine.append('DiaNN')\n", + " level.append('protein_group')\n", + " elif set(spectronaut_pe_columns).issubset(headers):\n", + " engine.append('Spectronaut')\n", + " level.append('peptide')\n", + " elif set(spectronaut_pg_columns).issubset(headers):\n", + " engine.append('Spectronaut')\n", + " level.append('protein_group')\n", + " else:\n", + " raise ValueError('Could not infer search engine from column names')\n", + "\n", + " if len(engine) > 1:\n", + " raise ValueError('More than one search engine detected')\n", + "\n", + " return engine[0], level[0]\n", + "\n", + "# parse result files to usable format\n", + "\n", + "def parse_alphadia_pe_table(\n", + " pe_table : pd.DataFrame,\n", + "):\n", + " return pe_table\n", + "\n", + "def parse_alphadia_pg_table(\n", + " pg_table : pd.DataFrame,\n", + "):\n", + " return pg_table\n", + "\n", + "def parse_diann_pe_table(\n", + " pe_table : pd.DataFrame,\n", + "):\n", + " return pe_table\n", + "\n", + "def parse_diann_pg_table(\n", + " pg_table : pd.DataFrame,\n", + "):\n", + " return pg_table\n", + "\n", + "def parse_spectronaut_pe_table(\n", + " pe_table : pd.DataFrame,\n", + "):\n", + " return pe_table\n", + "\n", + "def parse_spectronaut_pg_table(\n", + " pg_table : pd.DataFrame,\n", + "):\n", + " return pg_table\n", + "\n", + "# Higher level wrapper to read and parse peptide and protein group level results table\n", + "\n", + "def read_and_parse_peptide_table(\n", + " input_table_path : str,\n", + " input_table_filename : str,\n", + ") -> pd.DataFrame:\n", + " \"\"\"Read and parse results table from respective search engine output. First step in analysing QC data.\n", + " \n", + " Parameters:\n", + " ----------\n", + "\n", + " input_table_path : str\n", + " Path to the peptide level results table\n", + "\n", + " input_table_filename : str\n", + " Filename of the peptide level results table\n", + "\n", + " Returns:\n", + " -------\n", + "\n", + " out_table : pd.DataFrame\n", + " DataFrame containing the peptide level results\n", + " \n", + " \"\"\"\n", + "\n", + " # read peptide level table\n", + " input_table = pd.read_csv(\n", + " os.path.join(input_table_path, input_table_filename),\n", + " sep = '\\t'\n", + " )\n", + " \n", + " engine, level = infer_engine(input_table.columns.tolist())\n", + "\n", + " if level == 'peptide':\n", + " if engine == 'Alphadia':\n", + " out_table = parse_alphadia_pe_table(input_table)\n", + " elif engine == 'DiaNN':\n", + " out_table = parse_diann_pe_table(input_table)\n", + " elif engine == 'Spectronaut':\n", + " out_table = parse_spectronaut_pe_table(input_table)\n", + " elif level == 'protein_group':\n", + " if engine == 'Alphadia':\n", + " out_table = parse_alphadia_pg_table(input_table)\n", + " elif engine == 'DiaNN':\n", + " out_table = parse_diann_pg_table(input_table)\n", + " elif engine == 'Spectronaut':\n", + " out_table = parse_spectronaut_pg_table(input_table)\n", + "\n", + " return out_table, engine" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
base_width_mobilitybase_width_rtrt_observedmobility_observedmono_ms1_intensitytop_ms1_intensitysum_ms1_intensityweighted_ms1_intensityweighted_mass_deviationweighted_mass_error..._candidate_idxvalidcandidate_idxrunmod_seq_hashmod_seq_charge_hashpg_masterpgpg_qvalintensity
00.010.728760297.969730.0000015833644.505833644.5013318241.04630847.00.0460640.046064...3810240True381024020240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...66848771819252965816684877181925296583O43237O432370.01.027229e+06
10.010.723328288.757050.00000113404818.0013404818.0033200900.011027869.0-0.1877660.187766...6107025True610702520240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...66768610521064218436676861052106421845Q04323Q043230.01.041206e+06
20.016.940490432.817630.0000011033554.441033554.443246347.81095333.5-0.8502230.850223...4568806True456880620240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...26774315748927070692677431574892707071O60264O602640.04.600186e+06
30.010.726410425.166930.0000012843770.002843770.007460814.02434136.2-0.7933790.793379...4384664True438466420240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...46198710397994671504619871039799467152Q8IWE2Q8IWE20.02.069930e+05
40.038.312042481.918100.0000014392369.004392369.0014179063.04668329.50.0747340.074734...3785744True378574420240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...1452062050981500841814520620509815008420Q14978Q149780.03.119783e+06
\n", + "

5 rows × 91 columns

\n", + "
" + ], + "text/plain": [ + " base_width_mobility base_width_rt rt_observed mobility_observed \\\n", + "0 0.0 10.728760 297.96973 0.000001 \n", + "1 0.0 10.723328 288.75705 0.000001 \n", + "2 0.0 16.940490 432.81763 0.000001 \n", + "3 0.0 10.726410 425.16693 0.000001 \n", + "4 0.0 38.312042 481.91810 0.000001 \n", + "\n", + " mono_ms1_intensity top_ms1_intensity sum_ms1_intensity \\\n", + "0 5833644.50 5833644.50 13318241.0 \n", + "1 13404818.00 13404818.00 33200900.0 \n", + "2 1033554.44 1033554.44 3246347.8 \n", + "3 2843770.00 2843770.00 7460814.0 \n", + "4 4392369.00 4392369.00 14179063.0 \n", + "\n", + " weighted_ms1_intensity weighted_mass_deviation weighted_mass_error ... \\\n", + "0 4630847.0 0.046064 0.046064 ... \n", + "1 11027869.0 -0.187766 0.187766 ... \n", + "2 1095333.5 -0.850223 0.850223 ... \n", + "3 2434136.2 -0.793379 0.793379 ... \n", + "4 4668329.5 0.074734 0.074734 ... \n", + "\n", + " _candidate_idx valid candidate_idx \\\n", + "0 3810240 True 3810240 \n", + "1 6107025 True 6107025 \n", + "2 4568806 True 4568806 \n", + "3 4384664 True 4384664 \n", + "4 3785744 True 3785744 \n", + "\n", + " run mod_seq_hash \\\n", + "0 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng... 6684877181925296581 \n", + "1 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng... 6676861052106421843 \n", + "2 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng... 2677431574892707069 \n", + "3 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng... 4619871039799467150 \n", + "4 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng... 14520620509815008418 \n", + "\n", + " mod_seq_charge_hash pg_master pg pg_qval intensity \n", + "0 6684877181925296583 O43237 O43237 0.0 1.027229e+06 \n", + "1 6676861052106421845 Q04323 Q04323 0.0 1.041206e+06 \n", + "2 2677431574892707071 O60264 O60264 0.0 4.600186e+06 \n", + "3 4619871039799467152 Q8IWE2 Q8IWE2 0.0 2.069930e+05 \n", + "4 14520620509815008420 Q14978 Q14978 0.0 3.119783e+06 \n", + "\n", + "[5 rows x 91 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pg20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_1420240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_1520240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_1620240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_17
0A0A024RBG127294.38091328892.14094127849.40303729332.977053
1A0A096LP490.00000013362.56574818468.7611550.000000
2A0A0B4J2D5304680.336728334097.520531294064.475715265216.624922
3A0A0B4J2F0267612.886812298607.689343320374.321906326374.101157
4A0A0B4J2F276495.97714382719.92904963831.15899980179.393809
\n", + "
" + ], + "text/plain": [ + " pg 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_14 \\\n", + "0 A0A024RBG1 27294.380913 \n", + "1 A0A096LP49 0.000000 \n", + "2 A0A0B4J2D5 304680.336728 \n", + "3 A0A0B4J2F0 267612.886812 \n", + "4 A0A0B4J2F2 76495.977143 \n", + "\n", + " 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_15 \\\n", + "0 28892.140941 \n", + "1 13362.565748 \n", + "2 334097.520531 \n", + "3 298607.689343 \n", + "4 82719.929049 \n", + "\n", + " 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_16 \\\n", + "0 27849.403037 \n", + "1 18468.761155 \n", + "2 294064.475715 \n", + "3 320374.321906 \n", + "4 63831.158999 \n", + "\n", + " 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_17 \n", + "0 29332.977053 \n", + "1 0.000000 \n", + "2 265216.624922 \n", + "3 326374.101157 \n", + "4 80179.393809 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
File.NameRunProtein.GroupProtein.IdsProtein.NamesGenesPG.QuantityPG.NormalisedPG.MaxLFQGenes.Quantity...Decoy.EvidenceDecoy.CScoreFragment.Quant.RawFragment.Quant.CorrectedFragment.CorrelationsMS2.ScanIMiIMPredicted.IMPredicted.iIM
0Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240...20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...P37108P37108SRP14_HUMANSRP142458760.02407790.02372520.02458760.0...1.049391.144500e-010;4799.62;0;0;2219.65;401.484;2770;0;0;3364.71...0;4799.62;0;0;2219.65;401.484;2770;0;0;3364.71...0;0.85516;0;0;0.930818;0.0392701;0.629045;0;0;...1333040000
1Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240...20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...P37108P37108SRP14_HUMANSRP142429120.02332930.02412170.02429120.0...0.00000-1.000000e+071517.72;1104.9;1813.46;0;3090.83;0;1185.63;0;0...1517.72;1104.9;1813.46;0;3090.83;0;1185.63;0;0...0.512347;0.511267;0.755782;0;0.635567;0;0.3431...1330040000
2Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240...20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...P37108P37108SRP14_HUMANSRP142255580.02329290.02182340.02255580.0...0.00000-1.000000e+072287.07;458.234;1840.52;0;757.653;0;1665.65;27...2287.07;458.234;1840.52;0;757.653;0;1665.65;27...0.232198;0.258243;0.633966;0;0.942524;0;0.6651...1339040000
3Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240...20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...P37108P37108SRP14_HUMANSRP142217790.02293100.02402530.02217790.0...0.00000-1.000000e+07814.006;2186.53;236.486;0;3221.79;559.551;2718...814.006;2186.53;236.486;0;3221.79;559.551;2718...0.290409;0.931915;0.699088;0;0.42669;0.18216;0...1339040000
4Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240...20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...P37108P37108SRP14_HUMANSRP142458760.02407790.02372520.02458760.0...1.049392.612290e-013107.44;2308.01;1727.22;10473;228.697;330.737;...3107.44;2308.01;1727.22;10473;228.697;330.737;...0.848145;0.850794;0.679977;0.591514;0.403484;0...1329550000
\n", + "

5 rows × 57 columns

\n", + "
" + ], + "text/plain": [ + " File.Name \\\n", + "0 Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240... \n", + "1 Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240... \n", + "2 Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240... \n", + "3 Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240... \n", + "4 Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240... \n", + "\n", + " Run Protein.Group \\\n", + "0 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng... P37108 \n", + "1 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng... P37108 \n", + "2 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng... P37108 \n", + "3 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng... P37108 \n", + "4 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng... P37108 \n", + "\n", + " Protein.Ids Protein.Names Genes PG.Quantity PG.Normalised PG.MaxLFQ \\\n", + "0 P37108 SRP14_HUMAN SRP14 2458760.0 2407790.0 2372520.0 \n", + "1 P37108 SRP14_HUMAN SRP14 2429120.0 2332930.0 2412170.0 \n", + "2 P37108 SRP14_HUMAN SRP14 2255580.0 2329290.0 2182340.0 \n", + "3 P37108 SRP14_HUMAN SRP14 2217790.0 2293100.0 2402530.0 \n", + "4 P37108 SRP14_HUMAN SRP14 2458760.0 2407790.0 2372520.0 \n", + "\n", + " Genes.Quantity ... Decoy.Evidence Decoy.CScore \\\n", + "0 2458760.0 ... 1.04939 1.144500e-01 \n", + "1 2429120.0 ... 0.00000 -1.000000e+07 \n", + "2 2255580.0 ... 0.00000 -1.000000e+07 \n", + "3 2217790.0 ... 0.00000 -1.000000e+07 \n", + "4 2458760.0 ... 1.04939 2.612290e-01 \n", + "\n", + " Fragment.Quant.Raw \\\n", + "0 0;4799.62;0;0;2219.65;401.484;2770;0;0;3364.71... \n", + "1 1517.72;1104.9;1813.46;0;3090.83;0;1185.63;0;0... \n", + "2 2287.07;458.234;1840.52;0;757.653;0;1665.65;27... \n", + "3 814.006;2186.53;236.486;0;3221.79;559.551;2718... \n", + "4 3107.44;2308.01;1727.22;10473;228.697;330.737;... \n", + "\n", + " Fragment.Quant.Corrected \\\n", + "0 0;4799.62;0;0;2219.65;401.484;2770;0;0;3364.71... \n", + "1 1517.72;1104.9;1813.46;0;3090.83;0;1185.63;0;0... \n", + "2 2287.07;458.234;1840.52;0;757.653;0;1665.65;27... \n", + "3 814.006;2186.53;236.486;0;3221.79;559.551;2718... \n", + "4 3107.44;2308.01;1727.22;10473;228.697;330.737;... \n", + "\n", + " Fragment.Correlations MS2.Scan IM iIM \\\n", + "0 0;0.85516;0;0;0.930818;0.0392701;0.629045;0;0;... 133304 0 0 \n", + "1 0.512347;0.511267;0.755782;0;0.635567;0;0.3431... 133004 0 0 \n", + "2 0.232198;0.258243;0.633966;0;0.942524;0;0.6651... 133904 0 0 \n", + "3 0.290409;0.931915;0.699088;0;0.42669;0.18216;0... 133904 0 0 \n", + "4 0.848145;0.850794;0.679977;0.591514;0.403484;0... 132955 0 0 \n", + "\n", + " Predicted.IM Predicted.iIM \n", + "0 0 0 \n", + "1 0 0 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 0 \n", + "\n", + "[5 rows x 57 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Protein.GroupProtein.IdsProtein.NamesGenesFirst.Protein.DescriptionY:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_14.mzMLY:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_15.mzMLY:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_16.mzMLY:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_17.mzML
0A0A024R1R8;Q9Y2S6Q9Y2S6;A0A024R1R8TMA7B_HUMAN;TMA7_HUMANTMA7;TMA7BNaN2824960.002864450.02950430.02851180.00
1A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q3BBV...Q3BBV0;P0DPF3;Q5TAG4;Q8N660;A0A087WUL8;B4DH59;...NBPF8_HUMAN;NBPF9_HUMAN;NBPFA_HUMAN;NBPFE_HUMA...NBPF10;NBPF14;NBPF19;NBPF20;NBPF26;NBPF8;NBPF9NaNNaN14483.418018.212017.10
2A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TI2...P0DPF3;Q5TAG4;Q86T75;Q8N660;A0A087WUL8;B4DH59;...NBPF9_HUMAN;NBPFA_HUMAN;NBPFE_HUMAN;NBPFJ_HUMA...NBPF10;NBPF14;NBPF19;NBPF20;NBPF26;NBPF9NaN6904.08NaN16465.611101.30
3A0A096LP01A0A096LP01SIM26_HUMANSMIM26NaN4283.0810357.36473.18930.01
4A0A096LP49;A0A096LP49-2A0A096LP49;A0A096LP49-2CC187_HUMANCCDC187NaN25736.9025175.727476.636951.80
\n", + "
" + ], + "text/plain": [ + " Protein.Group \\\n", + "0 A0A024R1R8;Q9Y2S6 \n", + "1 A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q3BBV... \n", + "2 A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TI2... \n", + "3 A0A096LP01 \n", + "4 A0A096LP49;A0A096LP49-2 \n", + "\n", + " Protein.Ids \\\n", + "0 Q9Y2S6;A0A024R1R8 \n", + "1 Q3BBV0;P0DPF3;Q5TAG4;Q8N660;A0A087WUL8;B4DH59;... \n", + "2 P0DPF3;Q5TAG4;Q86T75;Q8N660;A0A087WUL8;B4DH59;... \n", + "3 A0A096LP01 \n", + "4 A0A096LP49;A0A096LP49-2 \n", + "\n", + " Protein.Names \\\n", + "0 TMA7B_HUMAN;TMA7_HUMAN \n", + "1 NBPF8_HUMAN;NBPF9_HUMAN;NBPFA_HUMAN;NBPFE_HUMA... \n", + "2 NBPF9_HUMAN;NBPFA_HUMAN;NBPFE_HUMAN;NBPFJ_HUMA... \n", + "3 SIM26_HUMAN \n", + "4 CC187_HUMAN \n", + "\n", + " Genes First.Protein.Description \\\n", + "0 TMA7;TMA7B NaN \n", + "1 NBPF10;NBPF14;NBPF19;NBPF20;NBPF26;NBPF8;NBPF9 NaN \n", + "2 NBPF10;NBPF14;NBPF19;NBPF20;NBPF26;NBPF9 NaN \n", + "3 SMIM26 NaN \n", + "4 CCDC187 NaN \n", + "\n", + " Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_14.mzML \\\n", + "0 2824960.00 \n", + "1 NaN \n", + "2 6904.08 \n", + "3 4283.08 \n", + "4 25736.90 \n", + "\n", + " Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_15.mzML \\\n", + "0 2864450.0 \n", + "1 14483.4 \n", + "2 NaN \n", + "3 10357.3 \n", + "4 25175.7 \n", + "\n", + " Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_16.mzML \\\n", + "0 2950430.0 \n", + "1 18018.2 \n", + "2 16465.6 \n", + "3 6473.1 \n", + "4 27476.6 \n", + "\n", + " Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_17.mzML \n", + "0 2851180.00 \n", + "1 12017.10 \n", + "2 11101.30 \n", + "3 8930.01 \n", + "4 36951.80 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PG.ProteinGroupsPG.ProteinAccessionsPG.GenesPG.UniProtIdsPG.ProteinNamesPG.IsCandidatePG.CompletenessPG.ProteinLabelPEP.GroupingKeyPEP.GroupingKeyType...PEP.IsProteotypicPEP.PeptidePositionPEP.IsProteinGroupSpecificPEP.IsGeneSpecificPEP.AllOccurringProteinAccessionsEG.PrecursorId[1] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_14.raw.EG.TotalQuantity (Settings)[2] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_15.raw.EG.TotalQuantity (Settings)[3] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_16.raw.EG.TotalQuantity (Settings)[4] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_17.raw.EG.TotalQuantity (Settings)
0A0A024R1R8;Q9Y2S6A0A024R1R8;Q9Y2S6TMA7B;TMA7A0A024R1R8;Q9Y2S6TMA7B_HUMAN;TMA7_HUMANFalse100A0A024R1R8;Q9Y2S6GPLATGGIKStripped Sequence...Unknown51;51UnknownUnknownNaN_GPLATGGIK_.291116.195312587792.8437595424.960937592398.78125
1A0A024RBG1;Q9NZJ9-2A0A024RBG1;Q9NZJ9-2NUDT4B;NUDT4A0A024RBG1;Q9NZJ9-2NUD4B_HUMAN;NUDT4_HUMANFalse100A0A024RBG1;Q9NZJ9-2LLGIFEQNQDRStripped Sequence...Unknown80;80UnknownUnknownNaN_LLGIFEQNQDR_.21104.52001953125926.77227783203121012.0153808593751105.1077880859375
2A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...NBPF19;NBPF26;NBPF20;NBPF9;NBPF9;NBPF12;NBPF14...A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...NBPFJ_HUMAN;NBPFP_HUMAN;NBPFK_HUMAN;NBPF9_HUMA...False100A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...SAFYVLEQQRStripped Sequence...Unknown387;318;43,287,531,775,1019,1263,1507,1751,199...UnknownUnknownNaN_SAFYVLEQQR_.2128.3846282958984488.06173706054688259.7120361328125145.58016967773438
3A0A096LP49;A0A096LP49-2A0A096LP49;A0A096LP49-2CCDC187A0A096LP49;A0A096LP49-2CC187_HUMANFalse100A0A096LP49;A0A096LP49-2QAQLQALETTAKStripped Sequence...Unknown821;723UnknownUnknownNaN_QAQLQALETTAK_.21Filtered331.3834533691406154.49806213378906
4A0A096LP49;A0A096LP49-2A0A096LP49;A0A096LP49-2CCDC187A0A096LP49;A0A096LP49-2CC187_HUMANFalse100A0A096LP49;A0A096LP49-2EAEHLGTSSSLHLRStripped Sequence...Unknown805;707UnknownUnknownNaN_EAEHLGTSSSLHLR_.456.88742828369140656.4841918945312588.43661499023438108.95454406738281
\n", + "

5 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " PG.ProteinGroups \\\n", + "0 A0A024R1R8;Q9Y2S6 \n", + "1 A0A024RBG1;Q9NZJ9-2 \n", + "2 A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG... \n", + "3 A0A096LP49;A0A096LP49-2 \n", + "4 A0A096LP49;A0A096LP49-2 \n", + "\n", + " PG.ProteinAccessions \\\n", + "0 A0A024R1R8;Q9Y2S6 \n", + "1 A0A024RBG1;Q9NZJ9-2 \n", + "2 A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG... \n", + "3 A0A096LP49;A0A096LP49-2 \n", + "4 A0A096LP49;A0A096LP49-2 \n", + "\n", + " PG.Genes \\\n", + "0 TMA7B;TMA7 \n", + "1 NUDT4B;NUDT4 \n", + "2 NBPF19;NBPF26;NBPF20;NBPF9;NBPF9;NBPF12;NBPF14... \n", + "3 CCDC187 \n", + "4 CCDC187 \n", + "\n", + " PG.UniProtIds \\\n", + "0 A0A024R1R8;Q9Y2S6 \n", + "1 A0A024RBG1;Q9NZJ9-2 \n", + "2 A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG... \n", + "3 A0A096LP49;A0A096LP49-2 \n", + "4 A0A096LP49;A0A096LP49-2 \n", + "\n", + " PG.ProteinNames PG.IsCandidate \\\n", + "0 TMA7B_HUMAN;TMA7_HUMAN False \n", + "1 NUD4B_HUMAN;NUDT4_HUMAN False \n", + "2 NBPFJ_HUMAN;NBPFP_HUMAN;NBPFK_HUMAN;NBPF9_HUMA... False \n", + "3 CC187_HUMAN False \n", + "4 CC187_HUMAN False \n", + "\n", + " PG.Completeness PG.ProteinLabel \\\n", + "0 100 A0A024R1R8;Q9Y2S6 \n", + "1 100 A0A024RBG1;Q9NZJ9-2 \n", + "2 100 A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG... \n", + "3 100 A0A096LP49;A0A096LP49-2 \n", + "4 100 A0A096LP49;A0A096LP49-2 \n", + "\n", + " PEP.GroupingKey PEP.GroupingKeyType ... PEP.IsProteotypic \\\n", + "0 GPLATGGIK Stripped Sequence ... Unknown \n", + "1 LLGIFEQNQDR Stripped Sequence ... Unknown \n", + "2 SAFYVLEQQR Stripped Sequence ... Unknown \n", + "3 QAQLQALETTAK Stripped Sequence ... Unknown \n", + "4 EAEHLGTSSSLHLR Stripped Sequence ... Unknown \n", + "\n", + " PEP.PeptidePosition \\\n", + "0 51;51 \n", + "1 80;80 \n", + "2 387;318;43,287,531,775,1019,1263,1507,1751,199... \n", + "3 821;723 \n", + "4 805;707 \n", + "\n", + " PEP.IsProteinGroupSpecific PEP.IsGeneSpecific \\\n", + "0 Unknown Unknown \n", + "1 Unknown Unknown \n", + "2 Unknown Unknown \n", + "3 Unknown Unknown \n", + "4 Unknown Unknown \n", + "\n", + " PEP.AllOccurringProteinAccessions EG.PrecursorId \\\n", + "0 NaN _GPLATGGIK_.2 \n", + "1 NaN _LLGIFEQNQDR_.2 \n", + "2 NaN _SAFYVLEQQR_.2 \n", + "3 NaN _QAQLQALETTAK_.2 \n", + "4 NaN _EAEHLGTSSSLHLR_.4 \n", + "\n", + " [1] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_14.raw.EG.TotalQuantity (Settings) \\\n", + "0 91116.1953125 \n", + "1 1104.52001953125 \n", + "2 128.38462829589844 \n", + "3 1 \n", + "4 56.887428283691406 \n", + "\n", + " [2] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_15.raw.EG.TotalQuantity (Settings) \\\n", + "0 87792.84375 \n", + "1 926.7722778320312 \n", + "2 88.06173706054688 \n", + "3 Filtered \n", + "4 56.48419189453125 \n", + "\n", + " [3] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_16.raw.EG.TotalQuantity (Settings) \\\n", + "0 95424.9609375 \n", + "1 1012.015380859375 \n", + "2 259.7120361328125 \n", + "3 331.3834533691406 \n", + "4 88.43661499023438 \n", + "\n", + " [4] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_17.raw.EG.TotalQuantity (Settings) \n", + "0 92398.78125 \n", + "1 1105.1077880859375 \n", + "2 145.58016967773438 \n", + "3 154.49806213378906 \n", + "4 108.95454406738281 \n", + "\n", + "[5 rows x 21 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PG.ProteinGroupsPG.GroupLabelPG.ProteinAccessionsPG.GenesPG.UniProtIdsPG.ProteinNamesPG.NrOfStrippedSequencesIdentified (Experiment-wide)PG.NrOfPrecursorsIdentified (Experiment-wide)PG.Sequence VersionPG.FASTAName[1] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_14.raw.PG.Quantity[2] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_15.raw.PG.Quantity[3] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_16.raw.PG.Quantity[4] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_17.raw.PG.Quantity
0A0A024R1R8;Q9Y2S6A0A024R1R8;Q9Y2S6A0A024R1R8;Q9Y2S6TMA7B;TMA7A0A024R1R8;Q9Y2S6TMA7B_HUMAN;TMA7_HUMAN111vb_uniprotkb_human_AND_reviewed_true_AND_m_202...91116.19531287792.84375095424.96093892398.781250
1A0A024RBG1;Q9NZJ9-2A0A024RBG1;Q9NZJ9-2A0A024RBG1;Q9NZJ9-2NUDT4B;NUDT4A0A024RBG1;Q9NZJ9-2NUD4B_HUMAN;NUDT4_HUMAN111;vb_uniprotkb_human_AND_reviewed_true_AND_m_202...1104.520020926.7722781012.0153811105.107788
2A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...NBPF19;NBPF26;NBPF20;NBPF9;NBPF9;NBPF12;NBPF14...A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...NBPFJ_HUMAN;NBPFP_HUMAN;NBPFK_HUMAN;NBPF9_HUMA...111;1;1;1;;3;2;3;3;2vb_uniprotkb_human_AND_reviewed_true_AND_m_202...128.38462888.061737259.712036145.580170
3A0A096LP49;A0A096LP49-2A0A096LP49;A0A096LP49-2A0A096LP49;A0A096LP49-2CCDC187A0A096LP49;A0A096LP49-2CC187_HUMAN231;vb_uniprotkb_human_AND_reviewed_true_AND_m_202...47.60421490.19089544.83037678.878120
4A0A0B4J2D5;P0DPI2A0A0B4J2D5;P0DPI2A0A0B4J2D5;P0DPI2GATD3B;GATD3A0A0B4J2D5;P0DPI2GAL3B_HUMAN;GAL3A_HUMAN6121vb_uniprotkb_human_AND_reviewed_true_AND_m_202...3114.3417973309.5410163181.6582032948.111816
\n", + "
" + ], + "text/plain": [ + " PG.ProteinGroups \\\n", + "0 A0A024R1R8;Q9Y2S6 \n", + "1 A0A024RBG1;Q9NZJ9-2 \n", + "2 A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG... \n", + "3 A0A096LP49;A0A096LP49-2 \n", + "4 A0A0B4J2D5;P0DPI2 \n", + "\n", + " PG.GroupLabel \\\n", + "0 A0A024R1R8;Q9Y2S6 \n", + "1 A0A024RBG1;Q9NZJ9-2 \n", + "2 A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG... \n", + "3 A0A096LP49;A0A096LP49-2 \n", + "4 A0A0B4J2D5;P0DPI2 \n", + "\n", + " PG.ProteinAccessions \\\n", + "0 A0A024R1R8;Q9Y2S6 \n", + "1 A0A024RBG1;Q9NZJ9-2 \n", + "2 A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG... \n", + "3 A0A096LP49;A0A096LP49-2 \n", + "4 A0A0B4J2D5;P0DPI2 \n", + "\n", + " PG.Genes \\\n", + "0 TMA7B;TMA7 \n", + "1 NUDT4B;NUDT4 \n", + "2 NBPF19;NBPF26;NBPF20;NBPF9;NBPF9;NBPF12;NBPF14... \n", + "3 CCDC187 \n", + "4 GATD3B;GATD3 \n", + "\n", + " PG.UniProtIds \\\n", + "0 A0A024R1R8;Q9Y2S6 \n", + "1 A0A024RBG1;Q9NZJ9-2 \n", + "2 A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG... \n", + "3 A0A096LP49;A0A096LP49-2 \n", + "4 A0A0B4J2D5;P0DPI2 \n", + "\n", + " PG.ProteinNames \\\n", + "0 TMA7B_HUMAN;TMA7_HUMAN \n", + "1 NUD4B_HUMAN;NUDT4_HUMAN \n", + "2 NBPFJ_HUMAN;NBPFP_HUMAN;NBPFK_HUMAN;NBPF9_HUMA... \n", + "3 CC187_HUMAN \n", + "4 GAL3B_HUMAN;GAL3A_HUMAN \n", + "\n", + " PG.NrOfStrippedSequencesIdentified (Experiment-wide) \\\n", + "0 1 \n", + "1 1 \n", + "2 1 \n", + "3 2 \n", + "4 6 \n", + "\n", + " PG.NrOfPrecursorsIdentified (Experiment-wide) PG.Sequence Version \\\n", + "0 1 1 \n", + "1 1 1; \n", + "2 1 1;1;1;1;;3;2;3;3;2 \n", + "3 3 1; \n", + "4 12 1 \n", + "\n", + " PG.FASTAName \\\n", + "0 vb_uniprotkb_human_AND_reviewed_true_AND_m_202... \n", + "1 vb_uniprotkb_human_AND_reviewed_true_AND_m_202... \n", + "2 vb_uniprotkb_human_AND_reviewed_true_AND_m_202... \n", + "3 vb_uniprotkb_human_AND_reviewed_true_AND_m_202... \n", + "4 vb_uniprotkb_human_AND_reviewed_true_AND_m_202... \n", + "\n", + " [1] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_14.raw.PG.Quantity \\\n", + "0 91116.195312 \n", + "1 1104.520020 \n", + "2 128.384628 \n", + "3 47.604214 \n", + "4 3114.341797 \n", + "\n", + " [2] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_15.raw.PG.Quantity \\\n", + "0 87792.843750 \n", + "1 926.772278 \n", + "2 88.061737 \n", + "3 90.190895 \n", + "4 3309.541016 \n", + "\n", + " [3] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_16.raw.PG.Quantity \\\n", + "0 95424.960938 \n", + "1 1012.015381 \n", + "2 259.712036 \n", + "3 44.830376 \n", + "4 3181.658203 \n", + "\n", + " [4] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_17.raw.PG.Quantity \n", + "0 92398.781250 \n", + "1 1105.107788 \n", + "2 145.580170 \n", + "3 78.878120 \n", + "4 2948.111816 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# test alphadia PE table\n", + "pe_table_path = \"./dev_grouping_comparison_data/alphadia/precursor_level\"\n", + "pe_table_filename = \"precursors.tsv\"\n", + "pe_table, engine = read_and_parse_peptide_table(pe_table_path, pe_table_filename)\n", + "assert engine == 'Alphadia'\n", + "display(pe_table.head())\n", + "\n", + "# test alphadia PG table\n", + "pg_table_path = \"./dev_grouping_comparison_data/alphadia/group_level\"\n", + "pg_table_filename = \"pg.matrix.tsv\"\n", + "pg_table, engine = read_and_parse_peptide_table(pg_table_path, pg_table_filename)\n", + "assert engine == 'Alphadia'\n", + "display(pg_table.head())\n", + "\n", + "# test diann PE table\n", + "pe_table_path = \"./dev_grouping_comparison_data/diann/precursor_level\"\n", + "pe_table_filename = \"report.tsv\"\n", + "pe_table, engine = read_and_parse_peptide_table(pe_table_path, pe_table_filename)\n", + "assert engine == 'DiaNN'\n", + "display(pe_table.head())\n", + "\n", + "# test diann PG table\n", + "pg_table_path = \"./dev_grouping_comparison_data/diann/group_level\"\n", + "pg_table_filename = \"report.pg_matrix.tsv\"\n", + "pg_table, engine = read_and_parse_peptide_table(pg_table_path, pg_table_filename)\n", + "assert engine == 'DiaNN'\n", + "display(pg_table.head())\n", + " \n", + "# test spectronaut PE table\n", + "pe_table_path = \"./dev_grouping_comparison_data/spectronaut/precursor_level\"\n", + "pe_table_filename = \"HeLa_QC_PE_20240409_140530_20240321_Report.tsv\"\n", + "pe_table, engine = read_and_parse_peptide_table(pe_table_path, pe_table_filename)\n", + "assert engine == 'Spectronaut'\n", + "display(pe_table.head())\n", + "\n", + "# test spectronaut PG table\n", + "pg_table_path = \"./dev_grouping_comparison_data/spectronaut/group_level\"\n", + "pg_table_filename = \"HeLa_QC_PG_20240409_140824_20240321_Report.tsv\"\n", + "pg_table, engine = read_and_parse_peptide_table(pg_table_path, pg_table_filename)\n", + "assert engine == 'Spectronaut'\n", + "display(pg_table.head())" ] }, { @@ -89,8 +1479,22 @@ } ], "metadata": { + "kernelspec": { + "display_name": "alphaverse", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" }, "orig_nbformat": 4 }, From 4b2bf92410b62b28bfe9ba62c40132fa4a7d891f Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Sat, 27 Apr 2024 17:20:25 +0200 Subject: [PATCH 04/48] update plotting --- alphadia/plotting/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/alphadia/plotting/utils.py b/alphadia/plotting/utils.py index 5fbe49d4..eba62983 100644 --- a/alphadia/plotting/utils.py +++ b/alphadia/plotting/utils.py @@ -45,6 +45,7 @@ def density_scatter( x: typing.Union[np.ndarray, pd.Series, pd.DataFrame], y: typing.Union[np.ndarray, pd.Series, pd.DataFrame], axis: plt.Axes = None, + bw_method=None, s: float = 1, **kwargs, ): @@ -100,7 +101,7 @@ def density_scatter( # Calculate the point density xy = np.vstack([x, y]) - z = gaussian_kde(xy)(xy) + z = gaussian_kde(xy, bw_method=bw_method)(xy) # Sort the points by density, so that the densest points are plotted last idx = z.argsort() From 07d98c26dbbb56b7e877fb6d3675cb23660ce6ac Mon Sep 17 00:00:00 2001 From: Vincenth Brennsteiner Date: Wed, 22 May 2024 14:39:16 +0200 Subject: [PATCH 05/48] MicroCommit: update gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 83dfd498..172f9b21 100644 --- a/.gitignore +++ b/.gitignore @@ -139,6 +139,7 @@ dmypy.json # Data testdata/ +nbs/debug/dev_grouping_comparison_data/ ###################### # OS generated files # From ddbfe4d8e40b8bc812bbb9cd099fcd8911376ac3 Mon Sep 17 00:00:00 2001 From: Vincenth Brennsteiner Date: Wed, 22 May 2024 19:50:28 +0200 Subject: [PATCH 06/48] Refactor function docstrings in grouping.py, add check for equal return_dict keys and initial precursor_idx list. --- alphadia/grouping.py | 51 +++++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/alphadia/grouping.py b/alphadia/grouping.py index e6966a8c..67458ee7 100644 --- a/alphadia/grouping.py +++ b/alphadia/grouping.py @@ -15,26 +15,19 @@ def group_and_parsimony( precursor_idx: NDArray[np.int64], precursor_ids: NDArray[Any], ): - """ - Function to group ids based on precursor indices and return groups & master ids as lists - - Parameters - ---------- - - precursor_idx : np.array[int] - array containing unique integer indices corresponding to each peptide precursor + """Function to group ids based on precursor indices and return groups & master ids as lists - precursor_ids : np.array[str] - array of variable length semicolon separated str belonging to a given peptide precursor id + Args: + precursor_idx (np.array[int]): array containing unique integer indices corresponding + to each peptide precursor + precursor_ids (np.array[str]): array of variable length semicolon separated str belonging + to a given peptide precursor id Returns - ------- - - ids : list[str] - list of ids linked to a given peptide precursor, such that each precursor only belongs to one id. This list is ordered by precursor_idx. - - groups : list[str] - list of semicolon separated ids belonging to a given peptide precursor, such that each precursor only belongs to one group. This list is ordered by precursor_idx. + ids (list[str]): list of ids linked to a given peptide precursor, such that each + precursor only belongs to one id. This list is ordered by precursor_idx. + groups (list[str]): list of semicolon separated ids belonging to a given peptide precursor, + such that each precursor only belongs to one group. This list is ordered by precursor_idx. """ @@ -53,11 +46,12 @@ def group_and_parsimony( # loop bounds max iterations for _ in range(len(id_dict)): - # remove longest set from dict as query & remove query peptided from all other sets + # remove longest set from dict as query & remove query peptide from all other sets query_id = max(id_dict.keys(), key=lambda x: len(id_dict[x])) query_peptides = id_dict.pop(query_id) query_group = [query_id] + # break if query is empty. Sorting step means that all remaining sets are empty if len(query_peptides) == 0: break @@ -67,6 +61,8 @@ def group_and_parsimony( continue new_subject_set = subject_peptides - query_peptides id_dict[subject_protein] = new_subject_set + # With the following lines commented out, the query will only eliminate peptides from + # respective subject proteins, but we will not add them to the query group # if len(new_subject_set) == 0: # query_group.append(subject_protein) @@ -90,13 +86,18 @@ def group_and_parsimony( f"Not all precursors were found in the output of the grouping function. {len(return_dict)} precursors were found, but {len(precursor_idx)} were expected." ) - # order by precursor index + # check that all return_dict keys are unique. Assume same length and unique keys constitutes match to precursor_idx + if len(return_dict) != len(set(return_dict.keys())): + raise ValueError( + "Not all precursors were found in the output of the grouping function. Duplicate precursors were found." + ) + + # order by precursor index and return as lists return_dict_ordered = {key: return_dict[key] for key in precursor_idx} ids, groups = zip(*return_dict_ordered.values()) return ids, groups - def perform_grouping( psm: pd.DataFrame, genes_or_proteins: str = "proteins", @@ -105,8 +106,14 @@ def perform_grouping( ): """Highest level function for grouping proteins in precursor table - Parameters: - gene_or_protein (str, optional): Column to group proteins by. Defaults to "proteins". + Args: + psm (pd.DataFrame) : Precursor table with columns "precursor_idx" and protein & decoy columns. + gene_or_protein (str, optional) : Column to group proteins by. Defaults to "proteins". + decoy_column (str, optional) : Column to use for decoy annotation. Defaults to "decoy". + group (bool, optional) : Whether to group proteins. Defaults to True. + + Returns: + pd.DataFrame: Precursor table with grouped proteins """ From dccb69bf52ca187fffba8d8720115f402cbcbf1a Mon Sep 17 00:00:00 2001 From: Vincenth Brennsteiner Date: Fri, 24 May 2024 11:10:46 +0200 Subject: [PATCH 07/48] adapt grouping tutorial notebook to heuristic grouping, add comments --- alphadia/grouping.py | 23 +- .../protein_grouping_tutorial.ipynb | 342 +++++++++--------- tests/unit_tests/test_grouping.py | 44 ++- 3 files changed, 223 insertions(+), 186 deletions(-) diff --git a/alphadia/grouping.py b/alphadia/grouping.py index 67458ee7..f0ba1588 100644 --- a/alphadia/grouping.py +++ b/alphadia/grouping.py @@ -75,6 +75,7 @@ def group_and_parsimony( id_group = [";".join(x) for x in id_group] # reshape output data and align with precursor dataframe input. Use dictionary for efficient ordering + # TODO consider iterating over precursor_idx directly return_dict = {} for i, peptide_set in enumerate(precursor_set): for key in peptide_set: @@ -83,16 +84,18 @@ def group_and_parsimony( # check that all precursors are found again if len(return_dict) != len(precursor_idx): raise ValueError( - f"Not all precursors were found in the output of the grouping function. {len(return_dict)} precursors were found, but {len(precursor_idx)} were expected." + f"""Not all precursors were found in the output of the grouping function. {len(return_dict)} precursors were found, but {len(precursor_idx)} were expected.""" ) # check that all return_dict keys are unique. Assume same length and unique keys constitutes match to precursor_idx if len(return_dict) != len(set(return_dict.keys())): raise ValueError( - "Not all precursors were found in the output of the grouping function. Duplicate precursors were found." + """Not all precursors were found in the output of the grouping function. + Duplicate precursors were found.""" ) # order by precursor index and return as lists + # TODO look above, order by precursor_idx directly? return_dict_ordered = {key: return_dict[key] for key in precursor_idx} ids, groups = zip(*return_dict_ordered.values()) @@ -122,17 +125,20 @@ def perform_grouping( # create non-duplicated view of precursor table duplicate_mask = ~psm.duplicated(subset=["precursor_idx"], keep="first") - # make sure column is string + + # make sure column is string and subset to relevant columns psm[genes_or_proteins] = psm[genes_or_proteins].astype(str) upsm = psm.loc[duplicate_mask, ["precursor_idx", genes_or_proteins, decoy_column]] # check if duplicate precursors exist + # TODO: consider removing check for duplicates since duplicate masking is implemented above if upsm.duplicated(subset=["precursor_idx"]).any(): raise ValueError( - "The same precursor was found annotated to different proteins. Please make sure all precursors were searched with the same library." + """The same precursor was found annotated to different proteins. + Please make sure all precursors were searched with the same library.""" ) - # handle case with only one decoy class: + # greedy set cover on all proteins if there is only one decoy class unique_decoys = upsm[decoy_column].unique() if len(unique_decoys) == 1: upsm[decoy_column] = -1 @@ -141,15 +147,19 @@ def perform_grouping( ) upsm = upsm[["precursor_idx", "pg_master", "pg", genes_or_proteins]] else: + # handle case with multiple decoy classes target_mask = upsm[decoy_column] == 0 decoy_mask = upsm[decoy_column] == 1 + # greedy set cover on targets t_df = upsm[target_mask].copy() + # TODO: consider directly assigning to t_df["pg_master"], t_df["pg"] = group_and_parsimony(...) new_columns = group_and_parsimony( t_df.precursor_idx.values, t_df[genes_or_proteins].values ) t_df["pg_master"], t_df["pg"] = new_columns + # greedy set cover on decoys d_df = upsm[decoy_mask].copy() new_columns = group_and_parsimony( d_df.precursor_idx.values, d_df[genes_or_proteins].values @@ -160,7 +170,10 @@ def perform_grouping( ["precursor_idx", "pg_master", "pg", genes_or_proteins] ] + # heuristic grouping: from each initial precursor's protein ID set, filter out proteins that + # are never master proteins if group: + # select all master protein groups allowed_pg = upsm["pg"].str.split(";", expand=True)[0].unique() allowed_set_pg = set(allowed_pg) diff --git a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb index 0b36cfe2..fd07c12b 100644 --- a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb +++ b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb @@ -25,7 +25,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -37,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -64,7 +64,7 @@ " precursor_idx\n", " proteins\n", " genes\n", - " _decoy\n", + " decoy\n", " pg_master\n", " pg\n", " \n", @@ -77,7 +77,7 @@ " P1;P2;P3;P4\n", " 0\n", " P1\n", - " P1;P2;P3\n", + " P1;P4\n", " \n", " \n", " 1\n", @@ -86,7 +86,7 @@ " P1;P2;P3;P4\n", " 0\n", " P1\n", - " P1;P2;P3\n", + " P1;P4\n", " \n", " \n", " 2\n", @@ -95,7 +95,7 @@ " P1;P2\n", " 0\n", " P1\n", - " P1;P2;P3\n", + " P1\n", " \n", " \n", " 3\n", @@ -104,7 +104,7 @@ " P1;P2\n", " 0\n", " P1\n", - " P1;P2;P3\n", + " P1\n", " \n", " \n", " 4\n", @@ -131,7 +131,7 @@ " P4;P5\n", " 1\n", " P4\n", - " P4;P5\n", + " P4\n", " \n", " \n", " 7\n", @@ -140,7 +140,7 @@ " P4;P5\n", " 1\n", " P4\n", - " P4;P5\n", + " P4\n", " \n", " \n", " 8\n", @@ -165,20 +165,20 @@ "" ], "text/plain": [ - " precursor_idx proteins genes _decoy pg_master pg\n", - "0 0 P1;P2;P3;P4 P1;P2;P3;P4 0 P1 P1;P2;P3\n", - "1 0 P1;P2;P3;P4 P1;P2;P3;P4 0 P1 P1;P2;P3\n", - "2 1 P1;P2 P1;P2 0 P1 P1;P2;P3\n", - "3 1 P1;P2 P1;P2 0 P1 P1;P2;P3\n", - "4 2 P4 P4 0 P4 P4\n", - "5 2 P4 P4 1 P4 P4\n", - "6 3 P4;P5 P4;P5 1 P4 P4;P5\n", - "7 3 P4;P5 P4;P5 1 P4 P4;P5\n", - "8 4 P6 P6 1 P6 P6\n", - "9 4 P6 P6 1 P6 P6" + " precursor_idx proteins genes decoy pg_master pg\n", + "0 0 P1;P2;P3;P4 P1;P2;P3;P4 0 P1 P1;P4\n", + "1 0 P1;P2;P3;P4 P1;P2;P3;P4 0 P1 P1;P4\n", + "2 1 P1;P2 P1;P2 0 P1 P1\n", + "3 1 P1;P2 P1;P2 0 P1 P1\n", + "4 2 P4 P4 0 P4 P4\n", + "5 2 P4 P4 1 P4 P4\n", + "6 3 P4;P5 P4;P5 1 P4 P4\n", + "7 3 P4;P5 P4;P5 1 P4 P4\n", + "8 4 P6 P6 1 P6 P6\n", + "9 4 P6 P6 1 P6 P6" ] }, - "execution_count": 2, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -216,7 +216,7 @@ " \"precursor_idx\": precursor_idx,\n", " \"proteins\": proteins,\n", " \"genes\": genes,\n", - " \"_decoy\": decoy,\n", + " \"decoy\": decoy,\n", " }\n", ")\n", "\n", @@ -251,7 +251,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -284,7 +284,7 @@ " \n", " precursor_idx\n", " proteins\n", - " _decoy\n", + " decoy\n", " \n", " \n", " \n", @@ -317,11 +317,11 @@ "" ], "text/plain": [ - " precursor_idx proteins _decoy\n", - "0 1 A 0\n", - "1 2 A 0\n", - "2 3 B 0\n", - "3 4 B 0" + " precursor_idx proteins decoy\n", + "0 1 A 0\n", + "1 2 A 0\n", + "2 3 B 0\n", + "3 4 B 0" ] }, "metadata": {}, @@ -350,7 +350,7 @@ " \n", " precursor_idx\n", " proteins\n", - " _decoy\n", + " decoy\n", " pg_master\n", " pg\n", " \n", @@ -393,11 +393,11 @@ "" ], "text/plain": [ - " precursor_idx proteins _decoy pg_master pg\n", - "0 1 A 0 A A\n", - "1 2 A 0 A A\n", - "2 3 B 0 B B\n", - "3 4 B 0 B B" + " precursor_idx proteins decoy pg_master pg\n", + "0 1 A 0 A A\n", + "1 2 A 0 A A\n", + "2 3 B 0 B B\n", + "3 4 B 0 B B" ] }, "metadata": {}, @@ -433,7 +433,7 @@ " \n", " precursor_idx\n", " proteins\n", - " _decoy\n", + " decoy\n", " \n", " \n", " \n", @@ -466,11 +466,11 @@ "" ], "text/plain": [ - " precursor_idx proteins _decoy\n", - "0 1 A 0\n", - "1 2 A;B 0\n", - "2 3 A;B 0\n", - "3 4 B 0" + " precursor_idx proteins decoy\n", + "0 1 A 0\n", + "1 2 A;B 0\n", + "2 3 A;B 0\n", + "3 4 B 0" ] }, "metadata": {}, @@ -499,7 +499,7 @@ " \n", " precursor_idx\n", " proteins\n", - " _decoy\n", + " decoy\n", " pg_master\n", " pg\n", " \n", @@ -519,7 +519,7 @@ " A;B\n", " 0\n", " A\n", - " A\n", + " A;B\n", " \n", " \n", " 2\n", @@ -527,7 +527,7 @@ " A;B\n", " 0\n", " A\n", - " A\n", + " A;B\n", " \n", " \n", " 3\n", @@ -542,11 +542,11 @@ "" ], "text/plain": [ - " precursor_idx proteins _decoy pg_master pg\n", - "0 1 A 0 A A\n", - "1 2 A;B 0 A A\n", - "2 3 A;B 0 A A\n", - "3 4 B 0 B B" + " precursor_idx proteins decoy pg_master pg\n", + "0 1 A 0 A A\n", + "1 2 A;B 0 A A;B\n", + "2 3 A;B 0 A A;B\n", + "3 4 B 0 B B" ] }, "metadata": {}, @@ -582,7 +582,7 @@ " \n", " precursor_idx\n", " proteins\n", - " _decoy\n", + " decoy\n", " \n", " \n", " \n", @@ -615,11 +615,11 @@ "" ], "text/plain": [ - " precursor_idx proteins _decoy\n", - "0 1 A;B 0\n", - "1 2 A;B 0\n", - "2 3 A;B 0\n", - "3 4 A;B 0" + " precursor_idx proteins decoy\n", + "0 1 A;B 0\n", + "1 2 A;B 0\n", + "2 3 A;B 0\n", + "3 4 A;B 0" ] }, "metadata": {}, @@ -648,7 +648,7 @@ " \n", " precursor_idx\n", " proteins\n", - " _decoy\n", + " decoy\n", " pg_master\n", " pg\n", " \n", @@ -660,7 +660,7 @@ " A;B\n", " 0\n", " A\n", - " A;B\n", + " A\n", " \n", " \n", " 1\n", @@ -668,7 +668,7 @@ " A;B\n", " 0\n", " A\n", - " A;B\n", + " A\n", " \n", " \n", " 2\n", @@ -676,7 +676,7 @@ " A;B\n", " 0\n", " A\n", - " A;B\n", + " A\n", " \n", " \n", " 3\n", @@ -684,18 +684,18 @@ " A;B\n", " 0\n", " A\n", - " A;B\n", + " A\n", " \n", " \n", "\n", "" ], "text/plain": [ - " precursor_idx proteins _decoy pg_master pg\n", - "0 1 A;B 0 A A;B\n", - "1 2 A;B 0 A A;B\n", - "2 3 A;B 0 A A;B\n", - "3 4 A;B 0 A A;B" + " precursor_idx proteins decoy pg_master pg\n", + "0 1 A;B 0 A A\n", + "1 2 A;B 0 A A\n", + "2 3 A;B 0 A A\n", + "3 4 A;B 0 A A" ] }, "metadata": {}, @@ -731,7 +731,7 @@ " \n", " precursor_idx\n", " proteins\n", - " _decoy\n", + " decoy\n", " \n", " \n", " \n", @@ -764,11 +764,11 @@ "" ], "text/plain": [ - " precursor_idx proteins _decoy\n", - "0 1 A 0\n", - "1 2 A;B 0\n", - "2 3 A;B 0\n", - "3 4 A;B 0" + " precursor_idx proteins decoy\n", + "0 1 A 0\n", + "1 2 A;B 0\n", + "2 3 A;B 0\n", + "3 4 A;B 0" ] }, "metadata": {}, @@ -797,7 +797,7 @@ " \n", " precursor_idx\n", " proteins\n", - " _decoy\n", + " decoy\n", " pg_master\n", " pg\n", " \n", @@ -809,7 +809,7 @@ " A\n", " 0\n", " A\n", - " A;B\n", + " A\n", " \n", " \n", " 1\n", @@ -817,7 +817,7 @@ " A;B\n", " 0\n", " A\n", - " A;B\n", + " A\n", " \n", " \n", " 2\n", @@ -825,7 +825,7 @@ " A;B\n", " 0\n", " A\n", - " A;B\n", + " A\n", " \n", " \n", " 3\n", @@ -833,18 +833,18 @@ " A;B\n", " 0\n", " A\n", - " A;B\n", + " A\n", " \n", " \n", "\n", "" ], "text/plain": [ - " precursor_idx proteins _decoy pg_master pg\n", - "0 1 A 0 A A;B\n", - "1 2 A;B 0 A A;B\n", - "2 3 A;B 0 A A;B\n", - "3 4 A;B 0 A A;B" + " precursor_idx proteins decoy pg_master pg\n", + "0 1 A 0 A A\n", + "1 2 A;B 0 A A\n", + "2 3 A;B 0 A A\n", + "3 4 A;B 0 A A" ] }, "metadata": {}, @@ -880,7 +880,7 @@ " \n", " precursor_idx\n", " proteins\n", - " _decoy\n", + " decoy\n", " \n", " \n", " \n", @@ -913,11 +913,11 @@ "" ], "text/plain": [ - " precursor_idx proteins _decoy\n", - "0 1 A 0\n", - "1 2 A;B 0\n", - "2 3 B;C 0\n", - "3 4 C 0" + " precursor_idx proteins decoy\n", + "0 1 A 0\n", + "1 2 A;B 0\n", + "2 3 B;C 0\n", + "3 4 C 0" ] }, "metadata": {}, @@ -946,7 +946,7 @@ " \n", " precursor_idx\n", " proteins\n", - " _decoy\n", + " decoy\n", " pg_master\n", " pg\n", " \n", @@ -974,7 +974,7 @@ " B;C\n", " 0\n", " C\n", - " C;B\n", + " C\n", " \n", " \n", " 3\n", @@ -982,18 +982,18 @@ " C\n", " 0\n", " C\n", - " C;B\n", + " C\n", " \n", " \n", "\n", "" ], "text/plain": [ - " precursor_idx proteins _decoy pg_master pg\n", - "0 1 A 0 A A\n", - "1 2 A;B 0 A A\n", - "2 3 B;C 0 C C;B\n", - "3 4 C 0 C C;B" + " precursor_idx proteins decoy pg_master pg\n", + "0 1 A 0 A A\n", + "1 2 A;B 0 A A\n", + "2 3 B;C 0 C C\n", + "3 4 C 0 C C" ] }, "metadata": {}, @@ -1029,7 +1029,7 @@ " \n", " precursor_idx\n", " proteins\n", - " _decoy\n", + " decoy\n", " \n", " \n", " \n", @@ -1062,11 +1062,11 @@ "" ], "text/plain": [ - " precursor_idx proteins _decoy\n", - "0 1 A;B 0\n", - "1 2 A;B;C 0\n", - "2 3 A;B;C 0\n", - "3 4 A;C 0" + " precursor_idx proteins decoy\n", + "0 1 A;B 0\n", + "1 2 A;B;C 0\n", + "2 3 A;B;C 0\n", + "3 4 A;C 0" ] }, "metadata": {}, @@ -1095,7 +1095,7 @@ " \n", " precursor_idx\n", " proteins\n", - " _decoy\n", + " decoy\n", " pg_master\n", " pg\n", " \n", @@ -1107,7 +1107,7 @@ " A;B\n", " 0\n", " A\n", - " A;B;C\n", + " A\n", " \n", " \n", " 1\n", @@ -1115,7 +1115,7 @@ " A;B;C\n", " 0\n", " A\n", - " A;B;C\n", + " A\n", " \n", " \n", " 2\n", @@ -1123,7 +1123,7 @@ " A;B;C\n", " 0\n", " A\n", - " A;B;C\n", + " A\n", " \n", " \n", " 3\n", @@ -1131,18 +1131,18 @@ " A;C\n", " 0\n", " A\n", - " A;B;C\n", + " A\n", " \n", " \n", "\n", "" ], "text/plain": [ - " precursor_idx proteins _decoy pg_master pg\n", - "0 1 A;B 0 A A;B;C\n", - "1 2 A;B;C 0 A A;B;C\n", - "2 3 A;B;C 0 A A;B;C\n", - "3 4 A;C 0 A A;B;C" + " precursor_idx proteins decoy pg_master pg\n", + "0 1 A;B 0 A A\n", + "1 2 A;B;C 0 A A\n", + "2 3 A;B;C 0 A A\n", + "3 4 A;C 0 A A" ] }, "metadata": {}, @@ -1178,7 +1178,7 @@ " \n", " precursor_idx\n", " proteins\n", - " _decoy\n", + " decoy\n", " \n", " \n", " \n", @@ -1211,11 +1211,11 @@ "" ], "text/plain": [ - " precursor_idx proteins _decoy\n", - "0 1 A;B;C 0\n", - "1 2 B;C;D 0\n", - "2 3 C;D;E 0\n", - "3 4 D;E;A 0" + " precursor_idx proteins decoy\n", + "0 1 A;B;C 0\n", + "1 2 B;C;D 0\n", + "2 3 C;D;E 0\n", + "3 4 D;E;A 0" ] }, "metadata": {}, @@ -1244,7 +1244,7 @@ " \n", " precursor_idx\n", " proteins\n", - " _decoy\n", + " decoy\n", " pg_master\n", " pg\n", " \n", @@ -1256,7 +1256,7 @@ " A;B;C\n", " 0\n", " C\n", - " C;B\n", + " A;C\n", " \n", " \n", " 1\n", @@ -1264,7 +1264,7 @@ " B;C;D\n", " 0\n", " C\n", - " C;B\n", + " C\n", " \n", " \n", " 2\n", @@ -1272,7 +1272,7 @@ " C;D;E\n", " 0\n", " C\n", - " C;B\n", + " C\n", " \n", " \n", " 3\n", @@ -1280,18 +1280,18 @@ " D;E;A\n", " 0\n", " A\n", - " A;D;E\n", + " A\n", " \n", " \n", "\n", "" ], "text/plain": [ - " precursor_idx proteins _decoy pg_master pg\n", - "0 1 A;B;C 0 C C;B\n", - "1 2 B;C;D 0 C C;B\n", - "2 3 C;D;E 0 C C;B\n", - "3 4 D;E;A 0 A A;D;E" + " precursor_idx proteins decoy pg_master pg\n", + "0 1 A;B;C 0 C A;C\n", + "1 2 B;C;D 0 C C\n", + "2 3 C;D;E 0 C C\n", + "3 4 D;E;A 0 A A" ] }, "metadata": {}, @@ -1327,7 +1327,7 @@ " \n", " precursor_idx\n", " proteins\n", - " _decoy\n", + " decoy\n", " \n", " \n", " \n", @@ -1360,11 +1360,11 @@ "" ], "text/plain": [ - " precursor_idx proteins _decoy\n", - "0 0 P1;P2;P3;P4 0\n", - "1 1 P1;P4 0\n", - "2 2 P2 0\n", - "3 3 P2;P5 0" + " precursor_idx proteins decoy\n", + "0 0 P1;P2;P3;P4 0\n", + "1 1 P1;P4 0\n", + "2 2 P2 0\n", + "3 3 P2;P5 0" ] }, "metadata": {}, @@ -1393,7 +1393,7 @@ " \n", " precursor_idx\n", " proteins\n", - " _decoy\n", + " decoy\n", " pg_master\n", " pg\n", " \n", @@ -1405,7 +1405,7 @@ " P1;P2;P3;P4\n", " 0\n", " P2\n", - " P2;P3;P5\n", + " P1;P2\n", " \n", " \n", " 1\n", @@ -1413,7 +1413,7 @@ " P1;P4\n", " 0\n", " P1\n", - " P1;P4\n", + " P1\n", " \n", " \n", " 2\n", @@ -1421,7 +1421,7 @@ " P2\n", " 0\n", " P2\n", - " P2;P3;P5\n", + " P2\n", " \n", " \n", " 3\n", @@ -1429,18 +1429,18 @@ " P2;P5\n", " 0\n", " P2\n", - " P2;P3;P5\n", + " P2\n", " \n", " \n", "\n", "" ], "text/plain": [ - " precursor_idx proteins _decoy pg_master pg\n", - "0 0 P1;P2;P3;P4 0 P2 P2;P3;P5\n", - "1 1 P1;P4 0 P1 P1;P4\n", - "2 2 P2 0 P2 P2;P3;P5\n", - "3 3 P2;P5 0 P2 P2;P3;P5" + " precursor_idx proteins decoy pg_master pg\n", + "0 0 P1;P2;P3;P4 0 P2 P1;P2\n", + "1 1 P1;P4 0 P1 P1\n", + "2 2 P2 0 P2 P2\n", + "3 3 P2;P5 0 P2 P2" ] }, "metadata": {}, @@ -1455,7 +1455,7 @@ " {\n", " \"precursor_idx\": [1, 2, 3, 4],\n", " \"proteins\": [\"A\", \"A\", \"B\", \"B\"],\n", - " \"_decoy\": [0, 0, 0, 0],\n", + " \"decoy\": [0, 0, 0, 0],\n", " }\n", " )\n", " print(\"distinct proteins\")\n", @@ -1466,9 +1466,10 @@ " ) == {\n", " \"precursor_idx\": [1, 2, 3, 4],\n", " \"proteins\": [\"A\", \"A\", \"B\", \"B\"],\n", - " \"_decoy\": [0, 0, 0, 0],\n", + " \"decoy\": [0, 0, 0, 0],\n", " \"pg_master\": [\"A\", \"A\", \"B\", \"B\"],\n", - " \"pg\": [\"A\", \"A\", \"B\", \"B\"],\n", + " # \"pg\": [\"A\", \"A\", \"B\", \"B\"], # parsimonious grouping\n", + " \"pg\" : [\"A\", \"A\", \"B\", \"B\"]\n", " }\n", "\n", " # 2. differentiable proteins: shared peptides go to one protein\n", @@ -1476,7 +1477,7 @@ " {\n", " \"precursor_idx\": [1, 2, 3, 4],\n", " \"proteins\": [\"A\", \"A;B\", \"A;B\", \"B\"],\n", - " \"_decoy\": [0, 0, 0, 0],\n", + " \"decoy\": [0, 0, 0, 0],\n", " }\n", " )\n", " print(\"differentiable proteins\")\n", @@ -1487,9 +1488,10 @@ " ).to_dict(orient=\"list\") == {\n", " \"precursor_idx\": [1, 2, 3, 4],\n", " \"proteins\": [\"A\", \"A;B\", \"A;B\", \"B\"],\n", - " \"_decoy\": [0, 0, 0, 0],\n", + " \"decoy\": [0, 0, 0, 0],\n", " \"pg_master\": [\"A\", \"A\", \"A\", \"B\"],\n", - " \"pg\": [\"A\", \"A\", \"A\", \"B\"],\n", + " # \"pg\": [\"A\", \"A\", \"A\", \"B\"], # parsimonious grouping\n", + " \"pg\" : [\"A\", \"A;B\", \"A;B\", \"B\"]\n", " }\n", "\n", " # 3. indistinguishable proteins: one is totally removed\n", @@ -1497,7 +1499,7 @@ " {\n", " \"precursor_idx\": [1, 2, 3, 4],\n", " \"proteins\": [\"A;B\", \"A;B\", \"A;B\", \"A;B\"],\n", - " \"_decoy\": [0, 0, 0, 0],\n", + " \"decoy\": [0, 0, 0, 0],\n", " }\n", " )\n", " print(\"indistinguishable proteins\")\n", @@ -1508,9 +1510,10 @@ " ).to_dict(orient=\"list\") == {\n", " \"precursor_idx\": [1, 2, 3, 4],\n", " \"proteins\": [\"A;B\", \"A;B\", \"A;B\", \"A;B\"],\n", - " \"_decoy\": [0, 0, 0, 0],\n", + " \"decoy\": [0, 0, 0, 0],\n", " \"pg_master\": [\"A\", \"A\", \"A\", \"A\"],\n", - " \"pg\": [\"A;B\", \"A;B\", \"A;B\", \"A;B\"],\n", + " # \"pg\": [\"A;B\", \"A;B\", \"A;B\", \"A;B\"], # parsimonious grouping\n", + " \"pg\" : [\"A\", \"A\", \"A\", \"A\"]\n", " }\n", "\n", " # 4. subset protein: subsetted protein is removed\n", @@ -1518,7 +1521,7 @@ " {\n", " \"precursor_idx\": [1, 2, 3, 4],\n", " \"proteins\": [\"A\", \"A;B\", \"A;B\", \"A;B\"],\n", - " \"_decoy\": [0, 0, 0, 0],\n", + " \"decoy\": [0, 0, 0, 0],\n", " }\n", " )\n", " print(\"subset proteins\")\n", @@ -1529,9 +1532,10 @@ " ) == {\n", " \"precursor_idx\": [1, 2, 3, 4],\n", " \"proteins\": [\"A\", \"A;B\", \"A;B\", \"A;B\"],\n", - " \"_decoy\": [0, 0, 0, 0],\n", + " \"decoy\": [0, 0, 0, 0],\n", " \"pg_master\": [\"A\", \"A\", \"A\", \"A\"],\n", - " \"pg\": [\"A;B\", \"A;B\", \"A;B\", \"A;B\"],\n", + " # \"pg\": [\"A;B\", \"A;B\", \"A;B\", \"A;B\"], # parsimonious grouping\n", + " \"pg\" : [\"A\", \"A\", \"A\", \"A\"]\n", " }\n", "\n", " # 5. subsumable proteins --> there are two possible outcomes depending on whether one starts with the middle or either end of the chain\n", @@ -1539,7 +1543,7 @@ " {\n", " \"precursor_idx\": [1, 2, 3, 4],\n", " \"proteins\": [\"A\", \"A;B\", \"B;C\", \"C\"],\n", - " \"_decoy\": [0, 0, 0, 0],\n", + " \"decoy\": [0, 0, 0, 0],\n", " }\n", " )\n", " print(\"subsumable proteins\")\n", @@ -1550,9 +1554,10 @@ " ) == {\n", " \"precursor_idx\": [1, 2, 3, 4],\n", " \"proteins\": [\"A\", \"A;B\", \"B;C\", \"C\"],\n", - " \"_decoy\": [0, 0, 0, 0],\n", + " \"decoy\": [0, 0, 0, 0],\n", " \"pg_master\": [\"A\", \"A\", \"C\", \"C\"],\n", - " \"pg\": [\"A\", \"A\", \"C;B\", \"C;B\"],\n", + " # \"pg\": [\"A\", \"A\", \"C;B\", \"C;B\"], # parsimonious grouping\n", + " \"pg\" : [\"A\", \"A\", \"C\", \"C\"]\n", " }\n", "\n", " # 6. a group of proteins identified by shared peptides only\n", @@ -1560,7 +1565,7 @@ " {\n", " \"precursor_idx\": [1, 2, 3, 4],\n", " \"proteins\": [\"A;B\", \"A;B;C\", \"A;B;C\", \"A;C\"],\n", - " \"_decoy\": [0, 0, 0, 0],\n", + " \"decoy\": [0, 0, 0, 0],\n", " }\n", " )\n", " print(\"shared only\")\n", @@ -1571,9 +1576,10 @@ " ) == {\n", " \"precursor_idx\": [1, 2, 3, 4],\n", " \"proteins\": [\"A;B\", \"A;B;C\", \"A;B;C\", \"A;C\"],\n", - " \"_decoy\": [0, 0, 0, 0],\n", + " \"decoy\": [0, 0, 0, 0],\n", " \"pg_master\": [\"A\", \"A\", \"A\", \"A\"],\n", - " \"pg\": [\"A;B;C\", \"A;B;C\", \"A;B;C\", \"A;B;C\"],\n", + " # \"pg\": [\"A;B;C\", \"A;B;C\", \"A;B;C\", \"A;B;C\"], # parsimonious grouping\n", + " \"pg\" : [\"A\", \"A\", \"A\", \"A\"]\n", " }\n", "\n", " # 7. circular proteins\n", @@ -1581,7 +1587,7 @@ " {\n", " \"precursor_idx\": [1, 2, 3, 4],\n", " \"proteins\": [\"A;B;C\", \"B;C;D\", \"C;D;E\", \"D;E;A\"],\n", - " \"_decoy\": [0, 0, 0, 0],\n", + " \"decoy\": [0, 0, 0, 0],\n", " }\n", " )\n", " print(\"circular\")\n", @@ -1592,9 +1598,10 @@ " ) == {\n", " \"precursor_idx\": [1, 2, 3, 4],\n", " \"proteins\": [\"A;B;C\", \"B;C;D\", \"C;D;E\", \"D;E;A\"],\n", - " \"_decoy\": [0, 0, 0, 0],\n", + " \"decoy\": [0, 0, 0, 0],\n", " \"pg_master\": [\"C\", \"C\", \"C\", \"A\"],\n", - " \"pg\": [\"C;B\", \"C;B\", \"C;B\", \"A;D;E\"],\n", + " # \"pg\": [\"C;B\", \"C;B\", \"C;B\", \"A;D;E\"], # parsimonious grouping\n", + " \"pg\" : [\"A;C\", \"C\", \"C\", \"A\"]\n", " }\n", "\n", " # 8. Complex example --> depending on which of the equivalent proteins P1 and P4 is chosen first, the grouping will be different\n", @@ -1602,7 +1609,7 @@ " {\n", " \"precursor_idx\": [0, 1, 2, 3],\n", " \"proteins\": [\"P1;P2;P3;P4\", \"P1;P4\", \"P2\", \"P2;P5\"],\n", - " \"_decoy\": [0, 0, 0, 0],\n", + " \"decoy\": [0, 0, 0, 0],\n", " }\n", " )\n", " print(\"complex example\")\n", @@ -1613,9 +1620,10 @@ " ) == {\n", " \"precursor_idx\": [0, 1, 2, 3],\n", " \"proteins\": [\"P1;P2;P3;P4\", \"P1;P4\", \"P2\", \"P2;P5\"],\n", - " \"_decoy\": [0, 0, 0, 0],\n", + " \"decoy\": [0, 0, 0, 0],\n", " \"pg_master\": [\"P2\", \"P1\", \"P2\", \"P2\"],\n", - " \"pg\": [\"P2;P3;P5\", \"P1;P4\", \"P2;P3;P5\", \"P2;P3;P5\"],\n", + " # \"pg\": [\"P2;P3;P5\", \"P1;P4\", \"P2;P3;P5\", \"P2;P3;P5\"], # parsimonious grouping\n", + " \"pg\" : [\"P1;P2\", \"P1\", \"P2\", \"P2\"]\n", " }\n", "\n", "\n", diff --git a/tests/unit_tests/test_grouping.py b/tests/unit_tests/test_grouping.py index cbb63180..cd7c9c4b 100644 --- a/tests/unit_tests/test_grouping.py +++ b/tests/unit_tests/test_grouping.py @@ -20,7 +20,7 @@ def construct_test_cases(): "proteins": ["A", "A", "B", "B"], "decoy": [0, 0, 0, 0], "pg_master": ["A", "A", "B", "B"], - "pg": ["A", "A", "B", "B"], + "pg": ["A", "A", "B", "B"], # heuristic grouping } differentiable_proteins_input = { @@ -33,7 +33,7 @@ def construct_test_cases(): "proteins": ["A", "A;B", "A;B", "B"], "decoy": [0, 0, 0, 0], "pg_master": ["A", "A", "A", "B"], - "pg": ["A", "A;B", "A;B", "B"], + "pg": ["A", "A;B", "A;B", "B"], # heuristic grouping } indistinguishable_proteins_input = { @@ -46,7 +46,7 @@ def construct_test_cases(): "proteins": ["A;B", "A;B", "A;B", "A;B"], "decoy": [0, 0, 0, 0], "pg_master": ["A", "A", "A", "A"], - "pg": ["A", "A", "A", "A"], + "pg": ["A", "A", "A", "A"], # heuristic grouping } subset_proteins_input = { @@ -59,33 +59,33 @@ def construct_test_cases(): "proteins": ["A", "A;B", "A;B", "A;B"], "decoy": [0, 0, 0, 0], "pg_master": ["A", "A", "A", "A"], - "pg": ["A", "A", "A", "A"], + "pg": ["A", "A", "A", "A"], # heuristic grouping } subsumable_proteins_input = { "precursor_idx": [1, 2, 3, 4], "proteins": ["A", "A;B", "B;C", "C"], - "decoy": [0, 0, 0, 0], + "decoy": [0, 0, 0, 0], # heuristic grouping } subsumable_proteins_expected = { "precursor_idx": [1, 2, 3, 4], "proteins": ["A", "A;B", "B;C", "C"], "decoy": [0, 0, 0, 0], "pg_master": ["A", "A", "C", "C"], - "pg": ["A", "A", "C", "C"], + "pg": ["A", "A", "C", "C"], # heuristic grouping } shared_only_proteins_input = { "precursor_idx": [1, 2, 3, 4], "proteins": ["A;B", "A;B;C", "A;B;C", "A;C"], - "decoy": [0, 0, 0, 0], + "decoy": [0, 0, 0, 0], # heuristic grouping } shared_only_proteins_expected = { "precursor_idx": [1, 2, 3, 4], "proteins": ["A;B", "A;B;C", "A;B;C", "A;C"], "decoy": [0, 0, 0, 0], "pg_master": ["A", "A", "A", "A"], - "pg": ["A", "A", "A", "A"], + "pg": ["A", "A", "A", "A"], # heuristic grouping } circular_proteins_input = { @@ -98,7 +98,7 @@ def construct_test_cases(): "proteins": ["A;B;C", "B;C;D", "C;D;E", "D;E;A"], "decoy": [0, 0, 0, 0], "pg_master": ["C", "C", "C", "A"], - "pg": ["A;C", "C", "C", "A"], + "pg": ["A;C", "C", "C", "A"], # heuristic grouping } complex_example_proteins_input = { @@ -111,11 +111,15 @@ def construct_test_cases(): "proteins": ["P1;P2;P3;P4", "P1;P4", "P2", "P2;P5"], "decoy": [0, 0, 0, 0], "pg_master": ["P2", "P1", "P2", "P2"], - "pg": ["P1;P2", "P1", "P2", "P2"], + "pg": ["P1;P2", "P1", "P2", "P2"], # heuristic grouping } test_cases = [ - ("distinct_proteins", distinct_proteins_input, distinct_proteins_expected), + ( + "distinct_proteins", + distinct_proteins_input, + distinct_proteins_expected + ), ( "differentiable proteins", differentiable_proteins_input, @@ -126,14 +130,26 @@ def construct_test_cases(): indistinguishable_proteins_input, indistinguishable_proteins_expected, ), - ("subset proteins", subset_proteins_input, subset_proteins_expected), + ( + "subset proteins", + subset_proteins_input, + subset_proteins_expected + ), ( "subsumable proteins", subsumable_proteins_input, subsumable_proteins_expected, ), - ("shared only", shared_only_proteins_input, shared_only_proteins_expected), - ("circular", circular_proteins_input, circular_proteins_expected), + ( + "shared only", + shared_only_proteins_input, + shared_only_proteins_expected + ), + ( + "circular", + circular_proteins_input, + circular_proteins_expected + ), ( "complex example", complex_example_proteins_input, From b60f877540789021a29c655050daae14b87d3371 Mon Sep 17 00:00:00 2001 From: Vincenth Brennsteiner Date: Fri, 24 May 2024 11:37:55 +0200 Subject: [PATCH 08/48] add 'return_parsimony_groups' option to perform_grouping and 'return_groups' option to group_and_parsimony in order to obtain maximum parsimony derived protein groups. This does not affect the current output of the maximum_parsimony and heuristic mode. --- alphadia/grouping.py | 14 ++++++++------ nbs/tutorial_nbs/protein_grouping_tutorial.ipynb | 11 +++++++---- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/alphadia/grouping.py b/alphadia/grouping.py index f0ba1588..b832776e 100644 --- a/alphadia/grouping.py +++ b/alphadia/grouping.py @@ -14,6 +14,7 @@ def group_and_parsimony( precursor_idx: NDArray[np.int64], precursor_ids: NDArray[Any], + return_groups: bool = False, ): """Function to group ids based on precursor indices and return groups & master ids as lists @@ -63,8 +64,8 @@ def group_and_parsimony( id_dict[subject_protein] = new_subject_set # With the following lines commented out, the query will only eliminate peptides from # respective subject proteins, but we will not add them to the query group - # if len(new_subject_set) == 0: - # query_group.append(subject_protein) + if return_groups and len(new_subject_set) == 0: + query_group.append(subject_protein) # save query to output lists id_group.append(query_group) @@ -106,6 +107,7 @@ def perform_grouping( genes_or_proteins: str = "proteins", decoy_column: str = "decoy", group: bool = True, + return_parsimony_groups: bool = False, ): """Highest level function for grouping proteins in precursor table @@ -143,7 +145,7 @@ def perform_grouping( if len(unique_decoys) == 1: upsm[decoy_column] = -1 upsm["pg_master"], upsm["pg"] = group_and_parsimony( - upsm.precursor_idx.values, upsm[genes_or_proteins].values + upsm.precursor_idx.values, upsm[genes_or_proteins].values, return_parsimony_groups ) upsm = upsm[["precursor_idx", "pg_master", "pg", genes_or_proteins]] else: @@ -155,14 +157,14 @@ def perform_grouping( t_df = upsm[target_mask].copy() # TODO: consider directly assigning to t_df["pg_master"], t_df["pg"] = group_and_parsimony(...) new_columns = group_and_parsimony( - t_df.precursor_idx.values, t_df[genes_or_proteins].values + t_df.precursor_idx.values, t_df[genes_or_proteins].values, return_parsimony_groups ) t_df["pg_master"], t_df["pg"] = new_columns # greedy set cover on decoys d_df = upsm[decoy_mask].copy() new_columns = group_and_parsimony( - d_df.precursor_idx.values, d_df[genes_or_proteins].values + d_df.precursor_idx.values, d_df[genes_or_proteins].values, return_parsimony_groups ) d_df["pg_master"], d_df["pg"] = new_columns @@ -173,7 +175,7 @@ def perform_grouping( # heuristic grouping: from each initial precursor's protein ID set, filter out proteins that # are never master proteins if group: - # select all master protein groups + # select all master protein groups, which are the first in the semicolon separated list allowed_pg = upsm["pg"].str.split(";", expand=True)[0].unique() allowed_set_pg = set(allowed_pg) diff --git a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb index fd07c12b..12d80446 100644 --- a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb +++ b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb @@ -25,10 +25,13 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", @@ -37,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -178,7 +181,7 @@ "9 4 P6 P6 1 P6 P6" ] }, - "execution_count": 4, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -251,7 +254,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 3, "metadata": {}, "outputs": [ { From d9a21b6ff223ad04b3038cc28a9bea57a8fc3818 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 24 May 2024 09:54:45 +0000 Subject: [PATCH 09/48] Apply Black formatting --- alphadia/grouping.py | 29 ++-- nbs/debug/dev_grouping_comparison.ipynb | 125 ++++++++++-------- .../protein_grouping_tutorial.ipynb | 16 +-- tests/unit_tests/test_grouping.py | 44 ++---- 4 files changed, 113 insertions(+), 101 deletions(-) diff --git a/alphadia/grouping.py b/alphadia/grouping.py index b832776e..5d64d499 100644 --- a/alphadia/grouping.py +++ b/alphadia/grouping.py @@ -19,15 +19,15 @@ def group_and_parsimony( """Function to group ids based on precursor indices and return groups & master ids as lists Args: - precursor_idx (np.array[int]): array containing unique integer indices corresponding + precursor_idx (np.array[int]): array containing unique integer indices corresponding to each peptide precursor - precursor_ids (np.array[str]): array of variable length semicolon separated str belonging + precursor_ids (np.array[str]): array of variable length semicolon separated str belonging to a given peptide precursor id Returns - ids (list[str]): list of ids linked to a given peptide precursor, such that each + ids (list[str]): list of ids linked to a given peptide precursor, such that each precursor only belongs to one id. This list is ordered by precursor_idx. - groups (list[str]): list of semicolon separated ids belonging to a given peptide precursor, + groups (list[str]): list of semicolon separated ids belonging to a given peptide precursor, such that each precursor only belongs to one group. This list is ordered by precursor_idx. """ @@ -62,10 +62,10 @@ def group_and_parsimony( continue new_subject_set = subject_peptides - query_peptides id_dict[subject_protein] = new_subject_set - # With the following lines commented out, the query will only eliminate peptides from + # With the following lines commented out, the query will only eliminate peptides from # respective subject proteins, but we will not add them to the query group if return_groups and len(new_subject_set) == 0: - query_group.append(subject_protein) + query_group.append(subject_protein) # save query to output lists id_group.append(query_group) @@ -102,6 +102,7 @@ def group_and_parsimony( return ids, groups + def perform_grouping( psm: pd.DataFrame, genes_or_proteins: str = "proteins", @@ -127,7 +128,7 @@ def perform_grouping( # create non-duplicated view of precursor table duplicate_mask = ~psm.duplicated(subset=["precursor_idx"], keep="first") - + # make sure column is string and subset to relevant columns psm[genes_or_proteins] = psm[genes_or_proteins].astype(str) upsm = psm.loc[duplicate_mask, ["precursor_idx", genes_or_proteins, decoy_column]] @@ -145,7 +146,9 @@ def perform_grouping( if len(unique_decoys) == 1: upsm[decoy_column] = -1 upsm["pg_master"], upsm["pg"] = group_and_parsimony( - upsm.precursor_idx.values, upsm[genes_or_proteins].values, return_parsimony_groups + upsm.precursor_idx.values, + upsm[genes_or_proteins].values, + return_parsimony_groups, ) upsm = upsm[["precursor_idx", "pg_master", "pg", genes_or_proteins]] else: @@ -157,14 +160,18 @@ def perform_grouping( t_df = upsm[target_mask].copy() # TODO: consider directly assigning to t_df["pg_master"], t_df["pg"] = group_and_parsimony(...) new_columns = group_and_parsimony( - t_df.precursor_idx.values, t_df[genes_or_proteins].values, return_parsimony_groups + t_df.precursor_idx.values, + t_df[genes_or_proteins].values, + return_parsimony_groups, ) t_df["pg_master"], t_df["pg"] = new_columns # greedy set cover on decoys d_df = upsm[decoy_mask].copy() new_columns = group_and_parsimony( - d_df.precursor_idx.values, d_df[genes_or_proteins].values, return_parsimony_groups + d_df.precursor_idx.values, + d_df[genes_or_proteins].values, + return_parsimony_groups, ) d_df["pg_master"], d_df["pg"] = new_columns @@ -172,7 +179,7 @@ def perform_grouping( ["precursor_idx", "pg_master", "pg", genes_or_proteins] ] - # heuristic grouping: from each initial precursor's protein ID set, filter out proteins that + # heuristic grouping: from each initial precursor's protein ID set, filter out proteins that # are never master proteins if group: # select all master protein groups, which are the first in the semicolon separated list diff --git a/nbs/debug/dev_grouping_comparison.ipynb b/nbs/debug/dev_grouping_comparison.ipynb index fa381c51..0d9c0c37 100644 --- a/nbs/debug/dev_grouping_comparison.ipynb +++ b/nbs/debug/dev_grouping_comparison.ipynb @@ -34,92 +34,114 @@ "metadata": {}, "outputs": [], "source": [ - "# Inspect QC files and generate an overview of the data \n", + "# Inspect QC files and generate an overview of the data\n", "\n", - "import os \n", + "import os\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", + "\n", "# Utility functions\n", "# to be sure and to enable parsing without user input, infer cohort from file itself\n", "def infer_engine(\n", - " headers : list,\n", - " alphadia_pe_columns : list = ['base_width_mobility', 'base_width_rt', 'mono_ms1_intensity'],\n", - " alphadia_pg_columns : list = ['pg'],\n", - " diann_pe_columns : list = ['File.Name', 'Run', 'Protein.Group', 'Protein.Ids'],\n", - " diann_pg_columns : list = ['First.Protein.Description'],\n", - " spectronaut_pe_columns : list = ['PG.ProteinGroups', 'PG.ProteinAccessions', 'PG.Genes', 'PG.UniProtIds'],\n", - " spectronaut_pg_columns : list = ['PG.NrOfStrippedSequencesIdentified (Experiment-wide)', 'PG.NrOfPrecursorsIdentified (Experiment-wide)'],\n", + " headers: list,\n", + " alphadia_pe_columns: list = [\n", + " \"base_width_mobility\",\n", + " \"base_width_rt\",\n", + " \"mono_ms1_intensity\",\n", + " ],\n", + " alphadia_pg_columns: list = [\"pg\"],\n", + " diann_pe_columns: list = [\"File.Name\", \"Run\", \"Protein.Group\", \"Protein.Ids\"],\n", + " diann_pg_columns: list = [\"First.Protein.Description\"],\n", + " spectronaut_pe_columns: list = [\n", + " \"PG.ProteinGroups\",\n", + " \"PG.ProteinAccessions\",\n", + " \"PG.Genes\",\n", + " \"PG.UniProtIds\",\n", + " ],\n", + " spectronaut_pg_columns: list = [\n", + " \"PG.NrOfStrippedSequencesIdentified (Experiment-wide)\",\n", + " \"PG.NrOfPrecursorsIdentified (Experiment-wide)\",\n", + " ],\n", "):\n", " engine = []\n", " level = []\n", " if set(alphadia_pe_columns).issubset(headers):\n", - " engine.append('Alphadia')\n", - " level.append('peptide')\n", - " elif set (alphadia_pg_columns).issubset(headers):\n", - " engine.append('Alphadia')\n", - " level.append('protein_group')\n", + " engine.append(\"Alphadia\")\n", + " level.append(\"peptide\")\n", + " elif set(alphadia_pg_columns).issubset(headers):\n", + " engine.append(\"Alphadia\")\n", + " level.append(\"protein_group\")\n", " elif set(diann_pe_columns).issubset(headers):\n", - " engine.append('DiaNN')\n", - " level.append('peptide')\n", + " engine.append(\"DiaNN\")\n", + " level.append(\"peptide\")\n", " elif set(diann_pg_columns).issubset(headers):\n", - " engine.append('DiaNN')\n", - " level.append('protein_group')\n", + " engine.append(\"DiaNN\")\n", + " level.append(\"protein_group\")\n", " elif set(spectronaut_pe_columns).issubset(headers):\n", - " engine.append('Spectronaut')\n", - " level.append('peptide')\n", + " engine.append(\"Spectronaut\")\n", + " level.append(\"peptide\")\n", " elif set(spectronaut_pg_columns).issubset(headers):\n", - " engine.append('Spectronaut')\n", - " level.append('protein_group')\n", + " engine.append(\"Spectronaut\")\n", + " level.append(\"protein_group\")\n", " else:\n", - " raise ValueError('Could not infer search engine from column names')\n", + " raise ValueError(\"Could not infer search engine from column names\")\n", "\n", " if len(engine) > 1:\n", - " raise ValueError('More than one search engine detected')\n", + " raise ValueError(\"More than one search engine detected\")\n", "\n", " return engine[0], level[0]\n", "\n", + "\n", "# parse result files to usable format\n", "\n", + "\n", "def parse_alphadia_pe_table(\n", - " pe_table : pd.DataFrame,\n", + " pe_table: pd.DataFrame,\n", "):\n", " return pe_table\n", "\n", + "\n", "def parse_alphadia_pg_table(\n", - " pg_table : pd.DataFrame,\n", + " pg_table: pd.DataFrame,\n", "):\n", " return pg_table\n", "\n", + "\n", "def parse_diann_pe_table(\n", - " pe_table : pd.DataFrame,\n", + " pe_table: pd.DataFrame,\n", "):\n", " return pe_table\n", "\n", + "\n", "def parse_diann_pg_table(\n", - " pg_table : pd.DataFrame,\n", + " pg_table: pd.DataFrame,\n", "):\n", " return pg_table\n", "\n", + "\n", "def parse_spectronaut_pe_table(\n", - " pe_table : pd.DataFrame,\n", + " pe_table: pd.DataFrame,\n", "):\n", " return pe_table\n", "\n", + "\n", "def parse_spectronaut_pg_table(\n", - " pg_table : pd.DataFrame,\n", + " pg_table: pd.DataFrame,\n", "):\n", " return pg_table\n", "\n", + "\n", "# Higher level wrapper to read and parse peptide and protein group level results table\n", "\n", + "\n", "def read_and_parse_peptide_table(\n", - " input_table_path : str,\n", - " input_table_filename : str,\n", + " input_table_path: str,\n", + " input_table_filename: str,\n", ") -> pd.DataFrame:\n", " \"\"\"Read and parse results table from respective search engine output. First step in analysing QC data.\n", - " \n", + "\n", " Parameters:\n", " ----------\n", "\n", @@ -134,30 +156,29 @@ "\n", " out_table : pd.DataFrame\n", " DataFrame containing the peptide level results\n", - " \n", + "\n", " \"\"\"\n", "\n", " # read peptide level table\n", " input_table = pd.read_csv(\n", - " os.path.join(input_table_path, input_table_filename),\n", - " sep = '\\t'\n", + " os.path.join(input_table_path, input_table_filename), sep=\"\\t\"\n", " )\n", - " \n", + "\n", " engine, level = infer_engine(input_table.columns.tolist())\n", "\n", - " if level == 'peptide':\n", - " if engine == 'Alphadia':\n", + " if level == \"peptide\":\n", + " if engine == \"Alphadia\":\n", " out_table = parse_alphadia_pe_table(input_table)\n", - " elif engine == 'DiaNN':\n", + " elif engine == \"DiaNN\":\n", " out_table = parse_diann_pe_table(input_table)\n", - " elif engine == 'Spectronaut':\n", + " elif engine == \"Spectronaut\":\n", " out_table = parse_spectronaut_pe_table(input_table)\n", - " elif level == 'protein_group':\n", - " if engine == 'Alphadia':\n", + " elif level == \"protein_group\":\n", + " if engine == \"Alphadia\":\n", " out_table = parse_alphadia_pg_table(input_table)\n", - " elif engine == 'DiaNN':\n", + " elif engine == \"DiaNN\":\n", " out_table = parse_diann_pg_table(input_table)\n", - " elif engine == 'Spectronaut':\n", + " elif engine == \"Spectronaut\":\n", " out_table = parse_spectronaut_pg_table(input_table)\n", "\n", " return out_table, engine" @@ -1388,42 +1409,42 @@ "pe_table_path = \"./dev_grouping_comparison_data/alphadia/precursor_level\"\n", "pe_table_filename = \"precursors.tsv\"\n", "pe_table, engine = read_and_parse_peptide_table(pe_table_path, pe_table_filename)\n", - "assert engine == 'Alphadia'\n", + "assert engine == \"Alphadia\"\n", "display(pe_table.head())\n", "\n", "# test alphadia PG table\n", "pg_table_path = \"./dev_grouping_comparison_data/alphadia/group_level\"\n", "pg_table_filename = \"pg.matrix.tsv\"\n", "pg_table, engine = read_and_parse_peptide_table(pg_table_path, pg_table_filename)\n", - "assert engine == 'Alphadia'\n", + "assert engine == \"Alphadia\"\n", "display(pg_table.head())\n", "\n", "# test diann PE table\n", "pe_table_path = \"./dev_grouping_comparison_data/diann/precursor_level\"\n", "pe_table_filename = \"report.tsv\"\n", "pe_table, engine = read_and_parse_peptide_table(pe_table_path, pe_table_filename)\n", - "assert engine == 'DiaNN'\n", + "assert engine == \"DiaNN\"\n", "display(pe_table.head())\n", "\n", "# test diann PG table\n", "pg_table_path = \"./dev_grouping_comparison_data/diann/group_level\"\n", "pg_table_filename = \"report.pg_matrix.tsv\"\n", "pg_table, engine = read_and_parse_peptide_table(pg_table_path, pg_table_filename)\n", - "assert engine == 'DiaNN'\n", + "assert engine == \"DiaNN\"\n", "display(pg_table.head())\n", - " \n", + "\n", "# test spectronaut PE table\n", "pe_table_path = \"./dev_grouping_comparison_data/spectronaut/precursor_level\"\n", "pe_table_filename = \"HeLa_QC_PE_20240409_140530_20240321_Report.tsv\"\n", "pe_table, engine = read_and_parse_peptide_table(pe_table_path, pe_table_filename)\n", - "assert engine == 'Spectronaut'\n", + "assert engine == \"Spectronaut\"\n", "display(pe_table.head())\n", "\n", "# test spectronaut PG table\n", "pg_table_path = \"./dev_grouping_comparison_data/spectronaut/group_level\"\n", "pg_table_filename = \"HeLa_QC_PG_20240409_140824_20240321_Report.tsv\"\n", "pg_table, engine = read_and_parse_peptide_table(pg_table_path, pg_table_filename)\n", - "assert engine == 'Spectronaut'\n", + "assert engine == \"Spectronaut\"\n", "display(pg_table.head())" ] }, diff --git a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb index 12d80446..780d4fb3 100644 --- a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb +++ b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb @@ -1472,7 +1472,7 @@ " \"decoy\": [0, 0, 0, 0],\n", " \"pg_master\": [\"A\", \"A\", \"B\", \"B\"],\n", " # \"pg\": [\"A\", \"A\", \"B\", \"B\"], # parsimonious grouping\n", - " \"pg\" : [\"A\", \"A\", \"B\", \"B\"]\n", + " \"pg\": [\"A\", \"A\", \"B\", \"B\"],\n", " }\n", "\n", " # 2. differentiable proteins: shared peptides go to one protein\n", @@ -1494,7 +1494,7 @@ " \"decoy\": [0, 0, 0, 0],\n", " \"pg_master\": [\"A\", \"A\", \"A\", \"B\"],\n", " # \"pg\": [\"A\", \"A\", \"A\", \"B\"], # parsimonious grouping\n", - " \"pg\" : [\"A\", \"A;B\", \"A;B\", \"B\"]\n", + " \"pg\": [\"A\", \"A;B\", \"A;B\", \"B\"],\n", " }\n", "\n", " # 3. indistinguishable proteins: one is totally removed\n", @@ -1516,7 +1516,7 @@ " \"decoy\": [0, 0, 0, 0],\n", " \"pg_master\": [\"A\", \"A\", \"A\", \"A\"],\n", " # \"pg\": [\"A;B\", \"A;B\", \"A;B\", \"A;B\"], # parsimonious grouping\n", - " \"pg\" : [\"A\", \"A\", \"A\", \"A\"]\n", + " \"pg\": [\"A\", \"A\", \"A\", \"A\"],\n", " }\n", "\n", " # 4. subset protein: subsetted protein is removed\n", @@ -1538,7 +1538,7 @@ " \"decoy\": [0, 0, 0, 0],\n", " \"pg_master\": [\"A\", \"A\", \"A\", \"A\"],\n", " # \"pg\": [\"A;B\", \"A;B\", \"A;B\", \"A;B\"], # parsimonious grouping\n", - " \"pg\" : [\"A\", \"A\", \"A\", \"A\"]\n", + " \"pg\": [\"A\", \"A\", \"A\", \"A\"],\n", " }\n", "\n", " # 5. subsumable proteins --> there are two possible outcomes depending on whether one starts with the middle or either end of the chain\n", @@ -1560,7 +1560,7 @@ " \"decoy\": [0, 0, 0, 0],\n", " \"pg_master\": [\"A\", \"A\", \"C\", \"C\"],\n", " # \"pg\": [\"A\", \"A\", \"C;B\", \"C;B\"], # parsimonious grouping\n", - " \"pg\" : [\"A\", \"A\", \"C\", \"C\"]\n", + " \"pg\": [\"A\", \"A\", \"C\", \"C\"],\n", " }\n", "\n", " # 6. a group of proteins identified by shared peptides only\n", @@ -1582,7 +1582,7 @@ " \"decoy\": [0, 0, 0, 0],\n", " \"pg_master\": [\"A\", \"A\", \"A\", \"A\"],\n", " # \"pg\": [\"A;B;C\", \"A;B;C\", \"A;B;C\", \"A;B;C\"], # parsimonious grouping\n", - " \"pg\" : [\"A\", \"A\", \"A\", \"A\"]\n", + " \"pg\": [\"A\", \"A\", \"A\", \"A\"],\n", " }\n", "\n", " # 7. circular proteins\n", @@ -1604,7 +1604,7 @@ " \"decoy\": [0, 0, 0, 0],\n", " \"pg_master\": [\"C\", \"C\", \"C\", \"A\"],\n", " # \"pg\": [\"C;B\", \"C;B\", \"C;B\", \"A;D;E\"], # parsimonious grouping\n", - " \"pg\" : [\"A;C\", \"C\", \"C\", \"A\"]\n", + " \"pg\": [\"A;C\", \"C\", \"C\", \"A\"],\n", " }\n", "\n", " # 8. Complex example --> depending on which of the equivalent proteins P1 and P4 is chosen first, the grouping will be different\n", @@ -1626,7 +1626,7 @@ " \"decoy\": [0, 0, 0, 0],\n", " \"pg_master\": [\"P2\", \"P1\", \"P2\", \"P2\"],\n", " # \"pg\": [\"P2;P3;P5\", \"P1;P4\", \"P2;P3;P5\", \"P2;P3;P5\"], # parsimonious grouping\n", - " \"pg\" : [\"P1;P2\", \"P1\", \"P2\", \"P2\"]\n", + " \"pg\": [\"P1;P2\", \"P1\", \"P2\", \"P2\"],\n", " }\n", "\n", "\n", diff --git a/tests/unit_tests/test_grouping.py b/tests/unit_tests/test_grouping.py index cd7c9c4b..c9c5a12f 100644 --- a/tests/unit_tests/test_grouping.py +++ b/tests/unit_tests/test_grouping.py @@ -20,7 +20,7 @@ def construct_test_cases(): "proteins": ["A", "A", "B", "B"], "decoy": [0, 0, 0, 0], "pg_master": ["A", "A", "B", "B"], - "pg": ["A", "A", "B", "B"], # heuristic grouping + "pg": ["A", "A", "B", "B"], # heuristic grouping } differentiable_proteins_input = { @@ -33,7 +33,7 @@ def construct_test_cases(): "proteins": ["A", "A;B", "A;B", "B"], "decoy": [0, 0, 0, 0], "pg_master": ["A", "A", "A", "B"], - "pg": ["A", "A;B", "A;B", "B"], # heuristic grouping + "pg": ["A", "A;B", "A;B", "B"], # heuristic grouping } indistinguishable_proteins_input = { @@ -46,7 +46,7 @@ def construct_test_cases(): "proteins": ["A;B", "A;B", "A;B", "A;B"], "decoy": [0, 0, 0, 0], "pg_master": ["A", "A", "A", "A"], - "pg": ["A", "A", "A", "A"], # heuristic grouping + "pg": ["A", "A", "A", "A"], # heuristic grouping } subset_proteins_input = { @@ -59,33 +59,33 @@ def construct_test_cases(): "proteins": ["A", "A;B", "A;B", "A;B"], "decoy": [0, 0, 0, 0], "pg_master": ["A", "A", "A", "A"], - "pg": ["A", "A", "A", "A"], # heuristic grouping + "pg": ["A", "A", "A", "A"], # heuristic grouping } subsumable_proteins_input = { "precursor_idx": [1, 2, 3, 4], "proteins": ["A", "A;B", "B;C", "C"], - "decoy": [0, 0, 0, 0], # heuristic grouping + "decoy": [0, 0, 0, 0], # heuristic grouping } subsumable_proteins_expected = { "precursor_idx": [1, 2, 3, 4], "proteins": ["A", "A;B", "B;C", "C"], "decoy": [0, 0, 0, 0], "pg_master": ["A", "A", "C", "C"], - "pg": ["A", "A", "C", "C"], # heuristic grouping + "pg": ["A", "A", "C", "C"], # heuristic grouping } shared_only_proteins_input = { "precursor_idx": [1, 2, 3, 4], "proteins": ["A;B", "A;B;C", "A;B;C", "A;C"], - "decoy": [0, 0, 0, 0], # heuristic grouping + "decoy": [0, 0, 0, 0], # heuristic grouping } shared_only_proteins_expected = { "precursor_idx": [1, 2, 3, 4], "proteins": ["A;B", "A;B;C", "A;B;C", "A;C"], "decoy": [0, 0, 0, 0], "pg_master": ["A", "A", "A", "A"], - "pg": ["A", "A", "A", "A"], # heuristic grouping + "pg": ["A", "A", "A", "A"], # heuristic grouping } circular_proteins_input = { @@ -98,7 +98,7 @@ def construct_test_cases(): "proteins": ["A;B;C", "B;C;D", "C;D;E", "D;E;A"], "decoy": [0, 0, 0, 0], "pg_master": ["C", "C", "C", "A"], - "pg": ["A;C", "C", "C", "A"], # heuristic grouping + "pg": ["A;C", "C", "C", "A"], # heuristic grouping } complex_example_proteins_input = { @@ -111,15 +111,11 @@ def construct_test_cases(): "proteins": ["P1;P2;P3;P4", "P1;P4", "P2", "P2;P5"], "decoy": [0, 0, 0, 0], "pg_master": ["P2", "P1", "P2", "P2"], - "pg": ["P1;P2", "P1", "P2", "P2"], # heuristic grouping + "pg": ["P1;P2", "P1", "P2", "P2"], # heuristic grouping } test_cases = [ - ( - "distinct_proteins", - distinct_proteins_input, - distinct_proteins_expected - ), + ("distinct_proteins", distinct_proteins_input, distinct_proteins_expected), ( "differentiable proteins", differentiable_proteins_input, @@ -130,26 +126,14 @@ def construct_test_cases(): indistinguishable_proteins_input, indistinguishable_proteins_expected, ), - ( - "subset proteins", - subset_proteins_input, - subset_proteins_expected - ), + ("subset proteins", subset_proteins_input, subset_proteins_expected), ( "subsumable proteins", subsumable_proteins_input, subsumable_proteins_expected, ), - ( - "shared only", - shared_only_proteins_input, - shared_only_proteins_expected - ), - ( - "circular", - circular_proteins_input, - circular_proteins_expected - ), + ("shared only", shared_only_proteins_input, shared_only_proteins_expected), + ("circular", circular_proteins_input, circular_proteins_expected), ( "complex example", complex_example_proteins_input, From 303f0c4cf17da27a5678fe638b133aff76852d23 Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Fri, 24 May 2024 18:07:23 +0200 Subject: [PATCH 10/48] #140: hack to have history shown in neptune --- tests/e2e_tests/calc_metrics.py | 67 +++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/tests/e2e_tests/calc_metrics.py b/tests/e2e_tests/calc_metrics.py index b5ce1b3f..72f5fedc 100644 --- a/tests/e2e_tests/calc_metrics.py +++ b/tests/e2e_tests/calc_metrics.py @@ -11,6 +11,9 @@ import pandas as pd import neptune +import matplotlib.pyplot as plt +from datetime import datetime + from tests.e2e_tests.prepare_test_data import get_test_case, OUTPUT_DIR_NAME NEPTUNE_PROJECT_NAME = os.environ.get("NEPTUNE_PROJECT_NAME") @@ -118,6 +121,62 @@ def _calc(self): self._metrics[f"{self._name}/{col}_std"] = df[col].std() +def _basic_plot(df: pd.DataFrame, test_case: str, metric: str, metric_std: str = None): + """Draw a basic line plot of `metric` for `test_case` over time.""" + + df = ( + df[df["test_case"] == test_case] + .sort_index(ascending=False) + .reset_index(drop=True) + ) + + fig, ax = plt.subplots() + ax.scatter(x=df.index, y=df[metric]) + if metric_std: + ax.errorbar(x=df.index, y=df[metric], yerr=df[metric_std]) + + ax.set_title(f"test_case: {test_case}, metric: {metric}") + ax.set_ylabel(metric) + ax.set_xlabel("test runs") + + labels = [] + for x, y, z in zip( + df["sys/creation_time"], + df["branch_name"], + df["short_sha"], + ): + fmt = "%Y-%m-%d %H:%M:%S.%f" + dt = datetime.strptime(str(x), fmt) + x = dt.strftime("%Y%m%d_%H:%M:%S") + + labels.append(f"{x}:\n{y} [{z}]") + + ax.set_xticks(df.index, labels, rotation=66) + + return fig + + +def _get_history_plot(test_results: dict): + """Get all past runs from neptune, add the current one and create plots.""" + + test_results = test_results.copy() + test_results["sys/creation_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") + test_results_df = pd.DataFrame(test_results, index=[0]) + + project = neptune.init_project(project=NEPTUNE_PROJECT_NAME, mode="read-only") + runs_table_df = project.fetch_runs_table().to_pandas() + + df = pd.concat([runs_table_df, test_results_df]) + + test_case_name = test_results["test_case"] + # TODO do this for all metrics + fig = _basic_plot( + df, test_case_name, "BasicStats/proteins_mean", "BasicStats/proteins_std" + ) + + return [("BasicStats/proteins_mean", fig)] + + if __name__ == "__main__": test_case_name = sys.argv[1] run_time_minutes = int(sys.argv[2]) / 60 @@ -167,4 +226,12 @@ def _calc(self): if os.path.exists(file_path): neptune_run["output/" + file_name].track_files(file_path) + try: + history_plots = _get_history_plot(test_results) + + for name, plot in history_plots: + neptune_run[f"plots/{name}"].upload(plot) + except Exception as e: + print(f"no plots today: {e}") + neptune_run.stop() From 5c22f12bf8845151e6ca8d187e07fd595b43cccb Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Fri, 24 May 2024 18:12:52 +0200 Subject: [PATCH 11/48] #140: hack to have history shown in neptune --- tests/e2e_tests/calc_metrics.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/e2e_tests/calc_metrics.py b/tests/e2e_tests/calc_metrics.py index 72f5fedc..a723f9c7 100644 --- a/tests/e2e_tests/calc_metrics.py +++ b/tests/e2e_tests/calc_metrics.py @@ -156,7 +156,7 @@ def _basic_plot(df: pd.DataFrame, test_case: str, metric: str, metric_std: str = return fig -def _get_history_plot(test_results: dict): +def _get_history_plots(test_results: dict, metrics_classes: list): """Get all past runs from neptune, add the current one and create plots.""" test_results = test_results.copy() @@ -169,12 +169,15 @@ def _get_history_plot(test_results: dict): df = pd.concat([runs_table_df, test_results_df]) test_case_name = test_results["test_case"] - # TODO do this for all metrics - fig = _basic_plot( - df, test_case_name, "BasicStats/proteins_mean", "BasicStats/proteins_std" - ) - return [("BasicStats/proteins_mean", fig)] + figs = [] + for metrics_class in [cls.__name__ for cls in metrics_classes]: + # TODO find a smarter way to get the metrics + for metric in [k for k in test_results.keys() if k.startswith(metrics_class)]: + fig = _basic_plot(df, test_case_name, metric) + figs.append((metric, fig)) + + return figs if __name__ == "__main__": @@ -227,7 +230,7 @@ def _get_history_plot(test_results: dict): neptune_run["output/" + file_name].track_files(file_path) try: - history_plots = _get_history_plot(test_results) + history_plots = _get_history_plots(test_results, metrics_classes) for name, plot in history_plots: neptune_run[f"plots/{name}"].upload(plot) From 7354250d04d27c59762e449635837d639ac0a7b0 Mon Sep 17 00:00:00 2001 From: Vincenth Brennsteiner Date: Fri, 24 May 2024 18:13:50 +0200 Subject: [PATCH 12/48] add comparison of benchmark HeLa results with heuristic or maximum_parsimony grouping to dev_grouping_comparison.ipynb notebook --- nbs/debug/dev_grouping_comparison.ipynb | 111 +++++++++++++++++++++++- 1 file changed, 108 insertions(+), 3 deletions(-) diff --git a/nbs/debug/dev_grouping_comparison.ipynb b/nbs/debug/dev_grouping_comparison.ipynb index fa381c51..d15e0d3d 100644 --- a/nbs/debug/dev_grouping_comparison.ipynb +++ b/nbs/debug/dev_grouping_comparison.ipynb @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -1472,10 +1472,115 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# read and parse peptide level results\n", + "heuristic_data_path = \"./dev_grouping_comparison_data/ad_benchmark_heuristic/\"\n", + "parsimony_data_path = \"./dev_grouping_comparison_data/ad_benchmark_parsimony/\"\n", + "\n", + "# load peptide and protein group level results\n", + "data_tables = {\n", + " \"pe_heuristic\" : read_and_parse_peptide_table(heuristic_data_path, \"precursors.tsv\")[0],\n", + " \"pg_heuristic\" : read_and_parse_peptide_table(heuristic_data_path, \"pg.matrix.tsv\")[0],\n", + " \"pe_parsimony\" : read_and_parse_peptide_table(parsimony_data_path, \"precursors.tsv\")[0],\n", + " \"pg_parsimony\" : read_and_parse_peptide_table(parsimony_data_path, \"pg.matrix.tsv\")[0],\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnUAAAIdCAYAAABbUItjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAACO/0lEQVR4nOzdeXxM1/8/8Ndkm+wji2xExBaJxBZK0CaWRFQs1ZY2GqKK2oJQrSqCEmsoilbt+6e11NJGYm8qsYSUkKLEksqIEhOCrOf3h1/u10hCQmKSyev5eMzj0Xvue868772Z07d777kjE0IIEBEREVGlpqPpBIiIiIjo9bGoIyIiItICLOqIiIiItACLOiIiIiItwKKOiIiISAuwqCMiIiLSAizqiIiIiLQAizoiIiIiLcCijoiIiEgLsKjTUmvWrIFMJoOhoSGuX79eaL2Pjw/c3d01kBlw+PBhyGQy/PLLLxr5/NK6du0aunbtCktLS8hkMowePVrTKRXp2LFjCAsLw/379wut8/HxgY+Pz0v7uHbtGmQyGdasWVPm+ZF24lhTdirqWLNp0yYsXLjwtfoo6RhEr0dP0wlQ+crKysI333yD9evXazqVSmvMmDE4fvw4Vq1aBTs7O9jb22s6pSIdO3YMU6dORXBwMKpVq6a2bunSpZpJiqoMjjWvr6KONZs2bUJiYuJrFZkcg94MnqnTcv7+/ti0aRP++usvTafyxj1+/Bhl8dPGiYmJeOutt9CzZ0+0bt0aTk5OZZDdm+Xm5gY3NzdNp0FajGNN5Rlryirf0qhoY9Djx481nUK5YFGn5caPHw8rKyt8+eWXL4x70WU3mUyGsLAwaTksLAwymQxnz57Fhx9+CIVCAUtLS4SGhiI3NxcXL16Ev78/zMzMULt2bcyZM6fIz3zy5AlCQ0NhZ2cHIyMjeHt748yZM4XiTp06he7du8PS0hKGhoZo1qwZ/ve//6nFFFwCioqKwqefforq1avD2NgYWVlZxW7zjRs38Mknn8DGxgZyuRyurq6YP38+8vPzAfzfpZt//vkHv//+O2QyGWQyGa5du1ZsnzKZDCNGjMAPP/yABg0aQC6Xw83NDVu2bCkUq1QqMWTIENSsWRMGBgZwdnbG1KlTkZubK8UUHJc5c+ZgxowZqFWrFgwNDdGiRQscOHBA7Zh88cUXAABnZ2cp18OHDwMo+tLHrVu30Lt3b5iZmUGhUKBPnz5QKpVFbldJjsGjR48wbtw4ODs7w9DQEJaWlmjRogU2b95c7P4i7cGxpmKONS/KNz8/H3PmzEHDhg0hl8thY2ODfv36ISUlRXq/j48P9u7di+vXr0t5yWQyaX12dja+/fZbqY/q1atjwIABuHPnjloez49BBX8H8+bNQ0REBJydnWFqagovLy/ExcUVu93PiomJgZeXFwwNDVGjRg1MmjQJP/30U6F9V7t2bQQEBGD79u1o1qwZDA0NMXXqVABPC+kePXrAwsIChoaGaNq0KdauXVvkPnz+eBQct4JxtmA73d3d8ccff6B169YwMjKScsvLy1N7/7Jly9CkSROYmprCzMwMDRs2xNdff12ibS+WIK20evVqAUCcPHlSfPfddwKAOHDggLTe29tbNGrUSFpOTk4WAMTq1asL9QVATJkyRVqeMmWKACBcXFzE9OnTRXR0tBg/frwAIEaMGCEaNmwoFi1aJKKjo8WAAQMEALFt2zbp/YcOHRIAhKOjo+jRo4fYvXu32LBhg6hXr54wNzcXV65ckWIPHjwoDAwMxNtvvy22bt0qIiMjRXBwcKFcC7a3Ro0aYvDgweL3338Xv/zyi8jNzS1y/6SlpYkaNWqI6tWri+XLl4vIyEgxYsQIAUAMHTpUCCGESqUSsbGxws7OTrRt21bExsaK2NhY8eTJk2L3e8F2ubm5ic2bN4tdu3YJf39/AUD8/PPPUlxqaqpwdHQUTk5O4ocffhD79+8X06dPF3K5XAQHBxc6Lo6OjqJdu3Zi27Zt4ueffxYtW7YU+vr64tixY0IIIW7evClGjhwpAIjt27dLuapUKul4e3t7S/0+evRIuLq6CoVCIRYvXiz27dsnQkJCRK1atQrt25IegyFDhghjY2MREREhDh06JPbs2SNmzZolFi9eXOz+osqPY03FHmtelO/gwYOlfRkZGSmWL18uqlevLhwdHcWdO3eEEEKcP39etG3bVtjZ2Ul5xcbGCiGEyMvLE/7+/sLExERMnTpVREdHi59++knUqFFDuLm5iUePHqn9HTw7BhX8HdSuXVv4+/uLnTt3ip07dwoPDw9hYWEh7t+/X+y2CyHEX3/9JQwNDUXjxo3Fli1bxK5du8S7774rateuLQCI5ORkKdbJyUnY29uLOnXqiFWrVolDhw6JEydOiL///luYmZmJunXrinXr1om9e/eKjz/+WAAQs2fPLrQPn+1TiP/7+zp06JDadlpZWQkHBwexaNEiaWwFIIYPHy7Fbd68WQAQI0eOFFFRUWL//v1i+fLlIiQk5IXb/TIs6rTUswNtVlaWqFOnjmjRooXIz88XQpTNQDt//ny1uKZNm0pFRYGcnBxRvXp10atXL6mt4IvQvHlzKR8hhLh27ZrQ19cXn332mdTWsGFD0axZM5GTk6P2WQEBAcLe3l7k5eWpbW+/fv1KtH+++uorAUAcP35crX3o0KFCJpOJixcvSm1OTk6ia9euJeoXgDAyMhJKpVJqy83NFQ0bNhT16tWT2oYMGSJMTU3F9evX1d4/b948AUCcP39eCPF/x8XBwUE8fvxYisvIyBCWlpaiU6dOUtvcuXOLHHiEKDygLlu2TAAQv/76q1rcoEGDCv0dlPQYuLu7i549e75kD5G24VjzYpoea4rLNykpSQAQw4YNU2s/fvy4ACC+/vprqa1r167CycmpUA4FhcmzhbQQQpw8eVIAEEuXLpXaiivqPDw81AriEydOCABi8+bNL9z+Dz/8UJiYmEjFpxBPi0w3N7ciizpdXV21fS2EEB999JGQy+Xixo0bau1dunQRxsbGUmFZ2qKuuLFVR0dHGvNHjBghqlWr9sJtfBW8/FoFGBgY4Ntvv8WpU6cKXUp4HQEBAWrLrq6ukMlk6NKli9Smp6eHevXqFTkrLjAwUO00vpOTE9q0aYNDhw4BAP755x/8/fff6Nu3LwAgNzdXer377rtITU3FxYsX1fp8//33S5T7wYMH4ebmhrfeekutPTg4GEIIHDx4sET9FKVjx46wtbWVlnV1ddGnTx/8888/0mWNPXv2oH379nBwcFDbroJ9d+TIEbU+e/XqBUNDQ2nZzMwM3bp1w9GjRwud0i+JQ4cOwczMDN27d1drDwwMVFsuzTF466238Pvvv+Orr77C4cOHtfaeFSoex5rCND3WFJdvwbYHBwertb/11ltwdXVVu72jOHv27EG1atXQrVs3tX3WtGlT2NnZqV2WLE7Xrl2hq6srLTdu3BgAijyOzzpy5Ag6dOgAa2trqU1HRwe9e/cuMr5x48Zo0KCBWtvBgwfRsWNHODo6qrUHBwfj0aNHiI2NfWn+RSlubM3Pz8fRo0cBPN3P9+/fx8cff4xff/0V//333yt91vNY1FURH330EZo3b46JEyciJyenTPq0tLRUWzYwMICxsbFa8VHQ/uTJk0Lvt7OzK7Lt7t27AIDbt28DAMaNGwd9fX2117BhwwCg0BehpLPF7t69W2Ssg4ODtP5VFbddz/Z7+/Zt7N69u9B2NWrUCEDh7Squz+zsbDx8+LDUOd69e1ftfwbFfU5pjsGiRYvw5ZdfYufOnWjfvj0sLS3Rs2dPXL58udT5UeXFsUadpseaAs/nULC+uNxKktft27dx//59GBgYFNpvSqWyRIWKlZWV2rJcLgfw8okMxY1hRbUBRW9neR2bF42tBX0GBQVh1apVuH79Ot5//33Y2NigVatWiI6OfqXPLMBHmlQRMpkMs2fPhq+vL3788cdC6wsGx+dv9n2dAedliropX6lUSl/ygn+BTZgwAb169SqyDxcXF7XlZ/81/iJWVlZITU0t1H7r1i21z34VxW1XwecW9N+4cWPMmDGjyD4KBpWX9WlgYABTU9NS52hlZYUTJ068NPfSHAMTExNMnToVU6dOxe3bt6Wzdt26dcPff/9d6hypcuJYo07TY02B5/MtWJ+amoqaNWsWyq0keVlbW8PKygqRkZFFrjczM3tpH6/KyspKKsafVdxkr6KOV0mPTXF/s8UVrS/K69njMmDAAAwYMACZmZk4evQopkyZgoCAAFy6dOmVZz7zTF0V0qlTJ/j6+mLatGmFzu7Y2trC0NAQZ8+eVWv/9ddfyy2fzZs3q02rv379Oo4dOybNkHJxcUH9+vXx119/oUWLFkW+XnXQ6NixIy5cuIDTp0+rta9btw4ymQzt27d/5e06cOCA2pc6Ly8PW7duRd26daXBMyAgAImJiahbt26R2/V8Ubd9+3a1MxAPHjzA7t278fbbb0uXLkr6L1wAaN++PR48eIBdu3aptW/atElt+VWPga2tLYKDg/Hxxx/j4sWLePTo0UtzIu3Bseb/aHqsKU6HDh0AABs2bFBrP3nyJJKSktCxY0epTS6XFzmuBAQE4O7du8jLyytynz1fCJclb29vHDx4UK2wys/Px88//1ziPjp27IiDBw9KRVyBdevWwdjYGK1btwbwdPYsgEJ/s8+PnwWKG1t1dHTwzjvvFIo3MTFBly5dMHHiRGRnZ+P8+fMl3obn8UxdFTN79mx4enoiLS1NutQHPP1XzCeffIJVq1ahbt26aNKkCU6cOFHof/JlKS0tDe+99x4GDRoElUqFKVOmwNDQEBMmTJBifvjhB3Tp0gWdO3dGcHAwatSogXv37iEpKQmnT58u1Rf4WWPGjMG6devQtWtXTJs2DU5OTti7dy+WLl2KoUOHFrr3ojSsra3RoUMHTJo0CSYmJli6dCn+/vtvtUcNTJs2DdHR0WjTpg1CQkLg4uKCJ0+e4Nq1a/jtt9+wfPlytUFZV1cXvr6+CA0NRX5+PmbPno2MjAxpWj4AeHh4AAC+++479O/fH/r6+nBxcSnyf0b9+vXDggUL0K9fP8yYMQP169fHb7/9hn379hWKLekxaNWqFQICAtC4cWNYWFggKSkJ69evh5eXF4yNjV95f1LlxLHmKU2PNcVxcXHB4MGDsXjxYujo6KBLly64du0aJk2aBEdHR4wZM0aK9fDwwPbt27Fs2TJ4enpCR0cHLVq0wEcffYSNGzfi3XffxahRo/DWW29BX18fKSkpOHToEHr06IH33nvvlbfvRSZOnIjdu3ejY8eOmDhxIoyMjLB8+XJkZmYCeHp/3ctMmTJFur958uTJsLS0xMaNG7F3717MmTMHCoUCANCyZUu4uLhg3LhxyM3NhYWFBXbs2IGYmJgi+7WyssLQoUNx48YNNGjQAL/99htWrFiBoUOHolatWgCAQYMGwcjICG3btoW9vT2USiXCw8OhUCjQsmXLV98xZT71giqEZ2ekPS8wMFAAUJuRJsTTafWfffaZsLW1FSYmJqJbt27i2rVrxc5Ie3bWkRBC9O/fX5iYmBT6vOdnvxXMGFq/fr0ICQkR1atXF3K5XLz99tvi1KlThd7/119/id69ewsbGxuhr68v7OzsRIcOHcTy5ctLtL3FuX79uggMDBRWVlZCX19fuLi4iLlz50qz3AqUdkba8OHDxdKlS0XdunWFvr6+aNiwodi4cWOh2Dt37oiQkBDh7Ows9PX1haWlpfD09BQTJ04UDx8+FEL83wyx2bNni6lTp4qaNWsKAwMD0axZM7Fv375CfU6YMEE4ODgIHR0dtVlZz888E0KIlJQU8f777wtTU1NhZmYm3n//fXHs2LEiZyaW5Bh89dVXokWLFsLCwkLI5XJRp04dMWbMGPHff/+VaN9R5cSx5uU0Oda8KN+8vDwxe/Zs0aBBA6Gvry+sra3FJ598Im7evKkWd+/ePfHBBx+IatWqCZlMJp4tHXJycsS8efNEkyZNhKGhoTA1NRUNGzYUQ4YMEZcvX5biipv9Onfu3CK37dm/g+L88ccfolWrVkIulws7OzvxxRdfiNmzZwsAao9EedF+PXfunOjWrZtQKBTCwMBANGnSpMiZ2ZcuXRJ+fn7C3NxcVK9eXYwcOVLs3bu3yNmvjRo1EocPHxYtWrQQcrlc2Nvbi6+//lptZvXatWtF+/btha2trTAwMBAODg6id+/e4uzZsy/d7heRCfGGHytNpMVkMhmGDx+OJUuWlEl/165dg7OzM+bOnYtx48aVSZ9EVPmV9VijLfz8/HDt2jVcunRJI5/v4+OD//77D4mJiRr5fF5+JSIiokonNDQUzZo1g6OjI+7du4eNGzciOjoaK1eu1HRqGsOijoiIiCqdvLw8TJ48GUqlEjKZDG5ubli/fj0++eQTTaemMbz8SkRERKQF+EgTIiIiIi3Aoo7oGdeuXYNMJsOaNWs0nUq52rRpExYuXFiovWD7582bV+45hIWFlfgBrkRUehzPqt54xqKO6Bn29vaIjY1F165dNZ1KuSpuECQi7cHxrOrhRAkqtUePHpXLw2SFEHjy5AmMjIzKvO+Sksvl0lPEiUj7cTwjbcIzdVqk4PTvmTNn0KtXL5ibm0OhUOCTTz7BnTt3CsVv3boVXl5eMDExgampKTp37owzZ86oxQQHB8PU1BTnzp2Dn58fzMzM1H4+pii//vorGjduDLlcjjp16uC7774r8tS0TCbDiBEjsHz5cri6ukIul2Pt2rUAgJiYGHTs2BFmZmYwNjZGmzZtsHfv3iK393lr1qyBTCbDtWvXpLbatWsjICAAO3bsQOPGjWFoaIg6depg0aJFau8t6nJFweecP38eH3/8MRQKBWxtbfHpp59CpVKpvf/+/fsYOHAgLC0tYWpqiq5du+Lq1auQyWQICwt74X47fPgwZDIZNm3ahC+//BL29vYwNTVFt27dcPv2bTx48ACDBw+GtbU1rK2tMWDAgEI/wSSEwNKlS9G0aVMYGRnBwsICH3zwAa5evSrF+Pj4YO/evbh+/TpkMpn0el5ERAScnZ1hamoKLy8vxMXFFYrZtWuX9IsRZmZm8PX1RWxsbKG4vXv3omnTppDL5XB2dn4jl0OocuN49hTHM45npfJajy6mCqXg6etOTk7iiy++EPv27RMRERHCxMRENGvWTGRnZ0uxM2bMEDKZTHz66adiz549Yvv27cLLy0uYmJiI8+fPS3H9+/cX+vr6onbt2iI8PFwcOHCgyF8yKPD7778LHR0d4ePjI3bs2CF+/vln0apVK1G7dm3x/J8bAFGjRg3RuHFjsWnTJnHw4EGRmJgoDh8+LPT19YWnp6fYunWr2Llzp/Dz8xMymUxs2bKl0PY+r+AJ6snJyVKbk5OTqFGjhqhVq5ZYtWqV+O2330Tfvn0LPdG84Cnnzz5RvOBzXFxcxOTJk0V0dLSIiIgQcrlcDBgwQIrLy8sT7dq1E4aGhmLWrFkiKipKTJ06VdSvX79ET0gvePq9k5OTCA4OFpGRkWL58uXC1NRUtG/fXvj6+opx48aJqKgoMXv2bKGrqytGjhyp1segQYOEvr6+GDt2rIiMjBSbNm0SDRs2FLa2tkKpVAohhDh//rxo27atsLOzE7GxsdLr2e2vXbu28Pf3Fzt37hQ7d+4UHh4ewsLCQu0p7Rs3bhQAhJ+fn9i5c6fYunWr8PT0FAYGBuKPP/6Q4vbv3y90dXVFu3btxPbt28XPP/8sWrZsKWrVqlXk8SMSguNZAY5nHM9KQ/MZUJkp+LKOGTNGrb3gj3XDhg1CCCFu3Lgh9PT0Cn2BHjx4IOzs7ETv3r2ltv79+wsAYtWqVSXKoWXLlsLR0VFkZWWp9WtlZVXkIKhQKMS9e/fU2lu3bi1sbGzEgwcPpLbc3Fzh7u4uatasKfLz89W293nFDYIymUwkJCSoxfr6+gpzc3ORmZkphHjxIDhnzhy19w4bNkwYGhpK+RT8ZMyyZcvU4sLDw0s1CHbr1k2tffTo0QKACAkJUWvv2bOnsLS0lJZjY2MFADF//ny1uJs3bwojIyMxfvx4qa1r167CycmpUA4F2+/h4SFyc3Ol9hMnTggAYvPmzUKIpwO+g4OD8PDwUPupowcPHggbGxvRpk0bqa1Vq1bCwcFBPH78WGrLyMgQlpaWFWIQpIqJ49lTHM84npUGL79qob59+6ot9+7dG3p6ejh06BAAYN++fcjNzUW/fv2Qm5srvQwNDeHt7Y3Dhw8X6vP9999/6edmZmbi1KlT6NmzJwwMDKT2glPuRenQoQMsLCzU+jh+/Dg++OADmJqaSu26uroICgpCSkoKLl68+NJcitKoUSM0adJErS0wMBAZGRk4ffr0S9/fvXt3teXGjRvjyZMnSEtLAwAcOXIEwNP9/ayPP/64VHkGBASoLbu6ugJAoZudXV1dce/ePemSxZ49e6QfS3/2uNrZ2aFJkyZFHtfidO3aFbq6utJy48aNAQDXr18HAFy8eBG3bt1CUFCQ2g9nm5qa4v3330dcXBwePXqEzMxMnDx5Er169YKhoaEUZ2ZmVuzfBNGzOJ4VjePZ4RLnUJXGM06U0EJ2dnZqy3p6erCyssLdu3cBALdv3wYAtGzZssj3P/tHDQDGxsYwNzd/6eemp6dDCAFbW9tC64pqA57Oziqqj+fbAcDBwQEApO0oref3y7NtJenTyspKbVkulwMAHj9+LPWhp6cHS0tLtbjitr04z7+/4H8oxbU/efIEpqamuH37drH7HwDq1KlT4hxKsq1A4eMHPD1O+fn50rHMz89/4b4nehGOZ0XjeMbxrCgs6rSQUqlEjRo1pOXc3FzcvXtX+sO2trYGAPzyyy9wcnJ6aX8lffaOhYUFZDKZNMg+n1NJ+rawsICOjg5SU1MLxd66dQvA/+Vf8C+lrKws6UsKAP/991+Rn1VUDgVtz3/pX4WVlRVyc3Nx7949tQGruG0va9bW1pDJZPjjjz/U9keBotpeVcH+Ku446ejowMLCAkIIyGSyF+57ohfheMbxjONZyfHyqxbauHGj2vL//vc/5ObmwsfHBwDQuXNn6Onp4cqVK2jRokWRr1dhYmKCFi1aYOfOncjOzpbaHz58iD179pS4j1atWmH79u3Sv6IAID8/Hxs2bEDNmjXRoEEDAE9ngAHA2bNn1frYvXt3kX2fP38ef/31l1rbpk2bYGZmhubNm5covxfx9vYG8HQW3rO2bNny2n2XREBAAIQQ+Pfff4s8ph4eHlKsXC5X27+l5eLigho1amDTpk0Qz/zSYGZmJrZt2ybNIDMxMcFbb72F7du348mTJ1LcgwcPij1ORM/ieMbxjONZyfFMnRbavn079PT04Ovri/Pnz2PSpElo0qSJdG9E7dq1MW3aNEycOBFXr16Fv78/LCwscPv2bZw4cQImJiaYOnXqK332tGnT0LVrV3Tu3BmjRo1CXl4e5s6dC1NTU9y7d69EfYSHh8PX1xft27fHuHHjYGBggKVLlyIxMRGbN2+W/jX87rvvwtLSEgMHDsS0adOgp6eHNWvW4ObNm0X26+DggO7duyMsLAz29vbYsGEDoqOjMXv27DJ5TpW/vz/atm2LsWPHIiMjA56enoiNjcW6desAFL4MVNbatm2LwYMHY8CAATh16hTeeecdmJiYIDU1FTExMfDw8MDQoUMBAB4eHti+fTuWLVsGT09P6OjolOp/fjo6OpgzZw769u2LgIAADBkyBFlZWZg7dy7u37+PWbNmSbHTp0+Hv78/fH19MXbsWOTl5WH27NkwMTEp8d8EVV0czziecTwrBU3MzqDyUTCrKT4+XnTr1k2YmpoKMzMz8fHHH4vbt28Xit+5c6do3769MDc3F3K5XDg5OYkPPvhA7N+/X4rp37+/MDExKVUeO3bsEB4eHsLAwEDUqlVLzJo1S4SEhAgLCwu1OABi+PDhRfbxxx9/iA4dOggTExNhZGQkWrduLXbv3l0o7sSJE6JNmzbCxMRE1KhRQ0yZMkX89NNPRc4W69q1q/jll19Eo0aNhIGBgahdu7aIiIhQ6+9Fs8Xu3LmjFlvUrLR79+6JAQMGiGrVqgljY2Ph6+sr4uLiBADx3XffvXC/FcwW+/nnn4v8nJMnT6q1F5fXqlWrRKtWraR9V7duXdGvXz9x6tQptTw/+OADUa1aNSGTyaRZWwXb/+xjEQqgiBlvO3fuFK1atRKGhobCxMREdOzYUfz555+F3rtr1y7RuHFjtb+J4mb7EQnB8Yzj2VMcz0pHJsQz5xqpUgsLC8PUqVNx584d6T6NiiAnJwdNmzZFjRo1EBUVpZEcateuDXd39xJfNilLmzZtQt++ffHnn3+iTZs2b/zziSojjmfF43hGxeHlVypzAwcOhK+vL+zt7aFUKrF8+XIkJSXhu+++03Rq5W7z5s34999/4eHhAR0dHcTFxWHu3Ll45513OAASVUIczzieVSYs6qjMPXjwAOPGjcOdO3egr6+P5s2b47fffkOnTp00nVq5MzMzw5YtW/Dtt98iMzMT9vb2CA4Oxrfffqvp1IjoFXA843hWmfDyKxEREZEW4CNNiIiIiLQAizoiIiIiLcCijoiIiEgLcKLEG5afn49bt27BzMysxD9XQ0SlJ4TAgwcP4ODgUO4PSq2KOJYRvTklHc9Y1L1ht27dgqOjo6bTIKoybt68iZo1a2o6Da3DsYzozXvZeMai7g0zMzMD8PTAmJubazgbIu2VkZEBR0dH6TtHZYtjGdGbU9LxjEXdG1ZwmcLc3JwDIdEbwEuD5YNjGdGb97LxjDeaEBEREWkBFnVEREREWkCjRd2yZcvQuHFj6fS9l5cXfv/9d2m9EAJhYWFwcHCAkZERfHx8cP78ebU+srKyMHLkSFhbW8PExATdu3dHSkqKWkx6ejqCgoKgUCigUCgQFBSE+/fvq8XcuHED3bp1g4mJCaytrRESEoLs7Gy1mHPnzsHb2xtGRkaoUaMGpk2bBv4gBxEREVUEGi3qatasiVmzZuHUqVM4deoUOnTogB49ekiF25w5cxAREYElS5bg5MmTsLOzg6+vLx48eCD1MXr0aOzYsQNbtmxBTEwMHj58iICAAOTl5UkxgYGBSEhIQGRkJCIjI5GQkICgoCBpfV5eHrp27YrMzEzExMRgy5Yt2LZtG8aOHSvFZGRkwNfXFw4ODjh58iQWL16MefPmISIi4g3sKSIiIqKXEBWMhYWF+Omnn0R+fr6ws7MTs2bNktY9efJEKBQKsXz5ciGEEPfv3xf6+vpiy5YtUsy///4rdHR0RGRkpBBCiAsXLggAIi4uToqJjY0VAMTff/8thBDit99+Ezo6OuLff/+VYjZv3izkcrlQqVRCCCGWLl0qFAqFePLkiRQTHh4uHBwcRH5+fom3T6VSCQBSv0RUPvhdK1/cv0RvTkm/bxXmnrq8vDxs2bIFmZmZ8PLyQnJyMpRKJfz8/KQYuVwOb29vHDt2DAAQHx+PnJwctRgHBwe4u7tLMbGxsVAoFGjVqpUU07p1aygUCrUYd3d3ODg4SDGdO3dGVlYW4uPjpRhvb2/I5XK1mFu3buHatWvFbldWVhYyMjLUXkRERERlTeNF3blz52Bqagq5XI7PP/8cO3bsgJubG5RKJQDA1tZWLd7W1lZap1QqYWBgAAsLixfG2NjYFPpcGxsbtZjnP8fCwgIGBgYvjClYLogpSnh4uHQvn0Kh0MjDOsPDw9GyZUuYmZnBxsYGPXv2xMWLF9Vibt++jeDgYDg4OMDY2Bj+/v64fPmyWsyPP/4IHx8fmJubQyaTFbov8VlZWVlo2rQpZDIZEhISpPa7d+/C398fDg4OkMvlcHR0xIgRI9SK3bCwMMhkskIvExOTMtkflUlFOnYAijwuy5cvL7Kff/75B2ZmZqhWrdqrbDrRK8vNzcU333wDZ2dnGBkZoU6dOpg2bRry8/OlmJJ8b0pyz/alS5fQo0cPWFtbw9zcHG3btsWhQ4feyHYSPU/jRZ2LiwsSEhIQFxeHoUOHon///rhw4YK0/vlnsgghXvqcludjioovixjx/ydJvCifCRMmQKVSSa+bN2++MPfycOTIEQwfPhxxcXGIjo5Gbm4u/Pz8kJmZCeDpdvTs2RNXr17Fr7/+ijNnzsDJyQmdOnWSYgDg0aNH8Pf3x9dff/3Szxw/frzamc8COjo66NGjB3bt2oVLly5hzZo12L9/Pz7//HMpZty4cUhNTVV7ubm54cMPPyyDvVG5VKRjV2D16tVqx6Z///6FYnJycvDxxx/j7bfffoWtJno9s2fPxvLly7FkyRIkJSVhzpw5mDt3LhYvXgyg5N+bktyz3bVrV+Tm5uLgwYOIj49H06ZNERAQ8MJ/7BOVm3K+DFxqHTt2FIMHDxZXrlwRAMTp06fV1nfv3l3069dPCCHEgQMHBABx7949tZjGjRuLyZMnCyGEWLlypVAoFIU+R6FQiFWrVgkhhJg0aZJo3Lix2vp79+4JAOLgwYNCCCGCgoJE9+7d1WJOnz4tAIirV6+WePsqwn0oaWlpAoA4cuSIEEKIixcvCgAiMTFRisnNzRWWlpZixYoVhd5/6NAhAUCkp6cX2f9vv/0mGjZsKM6fPy8AiDNnzrwwn++++07UrFmz2PUJCQkCgDh69OjLN07LafrYARA7dux4aZ7jx48Xn3zyiVi9enWR3783oSJ817RZRd6/Xbt2FZ9++qlaW69evcQnn3wihCjZ96Yk92zfuXOn0NiUkZEhAIj9+/eX2/ZR1VPp7qkrIIRAVlYWnJ2dYWdnh+joaGlddnY2jhw5gjZt2gAAPD09oa+vrxaTmpqKxMREKcbLywsqlQonTpyQYo4fPw6VSqUWk5iYiNTUVCkmKioKcrkcnp6eUszRo0fVHnMSFRUFBwcH1K5du+x3RDlSqVQAAEtLSwBPLzEAgKGhoRSjq6sLAwMDxMTElKrv27dvY9CgQVi/fj2MjY1fGn/r1i1s374d3t7excb89NNPaNCgAc/6oGIcuxEjRsDa2hotW7bE8uXL1S5pAcDBgwfx888/4/vvvy/V5xOVlXbt2uHAgQO4dOkSAOCvv/5CTEwM3n33XQAl+96U5J5tKysruLq6Yt26dcjMzERubi5++OEH2NraSv/vIHqj3kyNWbQJEyaIo0ePiuTkZHH27Fnx9ddfCx0dHREVFSWEEGLWrFlCoVCI7du3i3PnzomPP/5Y2Nvbi4yMDKmPzz//XNSsWVPs379fnD59WnTo0EE0adJE5ObmSjH+/v6icePGIjY2VsTGxgoPDw8REBAgrc/NzRXu7u6iY8eO4vTp02L//v2iZs2aYsSIEVLM/fv3ha2trfj444/FuXPnxPbt24W5ubmYN29eqbZZ0/+6zc/PF926dRPt2rWT2rKzs4WTk5P48MMPxb1790RWVpYIDw8XAISfn1+hPoo725Ofny/8/f3F9OnThRBCJCcnF3um7qOPPhJGRkYCgOjWrZt4/Phxkfk+efJEWFhYiNmzZ7/6RmuJinDspk+fLo4dOybOnDkj5s2bJ4yNjaX3CCHEf//9JxwdHaUziTxTp70q8v7Nz88XX331lZDJZEJPT0/IZDIxc+ZMaX1JvjcbN24UBgYGhfr29fUVgwcPlpZTUlKEp6enkMlkQldXVzg4OLz06gRRaZX0+6bRou7TTz8VTk5OwsDAQFSvXl107NhRKuiEePrFnDJlirCzsxNyuVy888474ty5c2p9PH78WIwYMUJYWloKIyMjERAQIG7cuKEWc/fuXdG3b19hZmYmzMzMRN++fQv9T+369euia9euwsjISFhaWooRI0aoPb5ECCHOnj0r3n77bSGXy4WdnZ0ICwsr1eNMhND8QDhs2DDh5OQkbt68qdZ+6tQp0aRJEwFA6Orqis6dO4suXbqILl26FOqjuMLgu+++E23atJEK6hcVdampqSIpKUns3LlTuLm5iaFDhxaZ76ZNm4Senp5ITU19tQ3WIhXl2D1r3rx5wtzcXFp+7733xJdffikts6jTXhV5/27evFnUrFlTbN68WZw9e1asW7dOWFpaijVr1kgxL/veFFfUderUSQwZMkQI8fT/Ud27dxddunQRMTExIj4+XgwdOlTUqFFD3Lp1681sLFUJlaKoq4o0ORCOGDFC1KxZ84X3AN6/f1+kpaUJIYR46623xLBhwwrFFFcY9OjRQ+jo6AhdXV3pVTBgFtwHWZQ//vhDAChyEOzQoYPo2bNnCbdQe1XUYxcTEyMACKVSKYR4eq/qs33o6OhI/axcufIVtvzVVeSiQxtU5P1bs2ZNsWTJErW26dOnCxcXl0KxxX1vSnLP9v79+4WOjk6hfVCvXj0RHh5eZttDVNLvm145X92lCkAIgZEjR2LHjh04fPgwnJ2di41VKBQAgMuXL+PUqVOYPn16iT9n0aJF+Pbbb6XlW7duoXPnzti6davacwKLyg/4v/tcCiQnJ+PQoUPYtWtXiXPQNhX92J05cwaGhobSY0tiY2PVZgb++uuvmD17No4dO4YaNWqUOB+i1/Ho0SPo6KjfMq6rq1vo/k+g+O/Ns/ds9+7dG8D/3bM9Z84c6XMAFPosHR2dIj+LqLyxqKsChg8fjk2bNuHXX3+FmZmZNNVeoVDAyMgIAPDzzz+jevXqqFWrFs6dO4dRo0ahZ8+eajcJK5VKKJVK/PPPPwCePmPQzMwMtWrVgqWlJWrVqqX2uaampgCAunXrombNmgCA3377Dbdv30bLli1hamqKCxcuYPz48Wjbtm2hCSerVq2Cvb09unTpUi77pTKoSMdu9+7dUCqV8PLygpGREQ4dOoSJEydi8ODB0kO5XV1d1fo5deoUdHR04O7uXg57h6ho3bp1w4wZM1CrVi00atQIZ86cQUREBD799FMp5mXfG4VCgYEDB2Ls2LGwsrKCpaUlxo0bBw8PD3Tq1AnA0wl0FhYW6N+/PyZPngwjIyOsWLECycnJ6Nq1q0a2naq4N3HakP6PJi5ZACjytXr1aimm4LEi+vr6olatWuKbb74RWVlZav1MmTLlpf08q6j7sg4ePCi8vLyEQqEQhoaGon79+uLLL78sdDkwLy9P1KxZU3z99ddltBcqp4p07H7//XfRtGlTYWpqKoyNjYW7u7tYuHChyMnJKTZ/3lOnvSry/s3IyBCjRo0StWrVEoaGhqJOnTpi4sSJat+LknxvSnLP9smTJ4Wfn5+wtLQUZmZmonXr1uK33357I9tJVUdJv28yIf7/tS96IzIyMqBQKKBSqWBubq7pdIi0Fr9r5Yv7l+jNKen3jZdfK7DHuUA2b8soNwY6gFE5fAN43MpfeR07Kh/8TpQ/ficIYFFXYT3OBQ7efnqNjMqHDEAH27IdCHnc3ozyOHZUPvideDP4nSCARV2FlZ3/dBA8cxd4kKvpbLSPmR7QzOrpfjYqw3553MpfeR07Kh/8TpQ/fieoAIu6Cu5BLpCRo+ksqLR43IjU8TtBVP4q3G+/EhEREVHpsagjIiIi0gIs6oiIiIi0AIs6IiIiIi3Aoo6IiIhIC7CoIyIiItICLOqIiIiItACLOiIiIiItwKKOiIiISAuwqCMiIiLSAizqiIiIiLQAizoiIiIiLcCijoiIiEgLsKgjIiIi0gIs6oiIiIi0AIs6IiIiIi3Aoo6IiIhIC7CoIyIiItICLOqIiIiItACLOiIiIiItwKKOiIiISAuwqCMiIiLSAizqiIiIiLQAizoiIiIiLcCijoiIiEgLsKgjIiIi0gIs6oiIiIi0AIs6IiIiIi3Aoo6IiIhIC7CoIyIiItICLOqIiIiItACLOiIiIqoQateuDZlMVug1fPhwAMDt27cRHBwMBwcHGBsbw9/fH5cvX5bef+/ePYwcORIuLi4wNjZGrVq1EBISApVKpfY5p0+fhq+vL6pVqwYrKysMHjwYDx8+fKPbWh5Y1BEREVGFcPLkSaSmpkqv6OhoAMCHH34IIQR69uyJq1ev4tdff8WZM2fg5OSETp06ITMzEwBw69Yt3Lp1C/PmzcO5c+ewZs0aREZGYuDAgdJn3Lp1C506dUK9evVw/PhxREZG4vz58wgODtbEJpcpPU0nQERERAQA1atXV1ueNWsW6tatC29vb1y+fBlxcXFITExEo0aNAABLly6FjY0NNm/ejM8++wzu7u7Ytm2b9P66detixowZ+OSTT5Cbmws9PT3s2bMH+vr6+P7776Gj8/Tc1vfff49mzZrhn3/+Qb169d7cBpcxnqkjIiKiCic7OxsbNmzAp59+CplMhqysLACAoaGhFKOrqwsDAwPExMQU249KpYK5uTn09J6ex8rKyoKBgYFU0AGAkZERALywn8qARR0RERFVODt37sT9+/ely6INGzaEk5MTJkyYgPT0dGRnZ2PWrFlQKpVITU0tso+7d+9i+vTpGDJkiNTWoUMHKJVKzJ07F9nZ2UhPT8fXX38NAMX2U1mwqCMiqiByc3PxzTffwNnZGUZGRqhTpw6mTZuG/Px8KUYIgbCwMDg4OMDIyAg+Pj44f/68Wj9ZWVkYOXIkrK2tYWJigu7duyMlJUUtJj09HUFBQVAoFFAoFAgKCsL9+/ffxGYSlcjKlSvRpUsXODg4AAD09fWxbds2XLp0CZaWljA2Nsbhw4fRpUsX6OrqFnp/RkYGunbtCjc3N0yZMkVqb9SoEdauXYv58+fD2NgYdnZ2qFOnDmxtbYvspzJhUUdEVEHMnj0by5cvx5IlS5CUlIQ5c+Zg7ty5WLx4sRQzZ84cREREYMmSJTh58iTs7Ozg6+uLBw8eSDGjR4/Gjh07sGXLFsTExODhw4cICAhAXl6eFBMYGIiEhARERkYiMjISCQkJCAoKeqPbS1Sc69evY//+/fjss8/U2j09PZGQkID79+8jNTUVkZGRuHv3LpydndXiHjx4AH9/f5iammLHjh3Q19dXWx8YGAilUol///0Xd+/eRVhYGO7cuVOon8qGEyWIiCqI2NhY9OjRA127dgXw9PEOmzdvxqlTpwA8PUu3cOFCTJw4Eb169QIArF27Fra2tti0aROGDBkClUqFlStXYv369ejUqRMAYMOGDXB0dMT+/fvRuXNnJCUlITIyEnFxcWjVqhUAYMWKFfDy8sLFixfh4uKiga0n+j+rV6+GjY2N9F14nkKhAABcvnwZp06dwvTp06V1GRkZ6Ny5M+RyOXbt2qV2D97zbG1tAQCrVq2CoaEhfH19y3Ar3jyeqSMiqiDatWuHAwcO4NKlSwCAv/76CzExMXj33XcBAMnJyVAqlfDz85PeI5fL4e3tjWPHjgEA4uPjkZOToxbj4OAAd3d3KSY2NhYKhUIq6ACgdevWUCgUUszzsrKykJGRofYiKg/5+flYvXo1+vfvL01uKPDzzz/j8OHD0mNNfH190bNnT+nv/cGDB/Dz80NmZiZWrlyJjIwMKJVKKJVKtTPVS5YswenTp3Hp0iV8//33GDFiBMLDw1GtWrU3ualljmfqiIgqiC+//BIqlQoNGzaErq4u8vLyMGPGDHz88ccAAKVSCeD/zi4UsLW1xfXr16UYAwMDWFhYFIopeL9SqYSNjU2hz7exsZFinhceHo6pU6e+3gYSlcD+/ftx48YNfPrpp4XWpaamIjQ0FLdv34a9vT369euHSZMmSevj4+Nx/PhxACj0aJLk5GTUrl0bAHDixAlMmTIFDx8+RMOGDfHDDz9oxe0HLOqIiCqIrVu3YsOGDdi0aRMaNWqEhIQEjB49Gg4ODujfv78UJ5PJ1N4nhCjU9rznY4qKf1E/EyZMQGhoqLSckZEBR0fHEm0XUWn4+flBCFHkupCQEISEhBT7Xh8fn2Lf+6x169a9cn4VGYs6IqIK4osvvsBXX32Fjz76CADg4eGB69evIzw8HP3794ednR2Ap2fa7O3tpfelpaVJZ+/s7OykxzQ8e7YuLS0Nbdq0kWJu375d6PPv3LlT6CxgAblcDrlcXjYbSpXG41wgO//lcfTqDHQAozKqxljUERFVEI8ePVJ7ICrw9OGqBY80cXZ2hp2dHaKjo9GsWTMATx/QeuTIEcyePRvA09mB+vr6iI6ORu/evQE8vWSVmJiIOXPmAAC8vLygUqlw4sQJvPXWWwCA48ePQ6VSSYUf0eNc4OBt4OXnveh1yAB0sC2bwo5FHRFRBdGtWzfMmDEDtWrVQqNGjXDmzBlERERI9xbJZDKMHj0aM2fORP369VG/fn3MnDkTxsbGCAwMBPB0VuDAgQMxduxYWFlZwdLSEuPGjYOHh4c0G9bV1RX+/v4YNGgQfvjhBwDA4MGDERAQwJmvJMnOf1rQnbkLPMjVdDbayUwPaGb1dF8blUF/LOqIiCqIxYsXY9KkSRg2bBjS0tLg4OCAIUOGYPLkyVLM+PHj8fjxYwwbNgzp6elo1aoVoqKiYGZmJsUsWLAAenp66N27Nx4/foyOHTtizZo1ag9W3bhxI0JCQqRZg927d8eSJUve3MZSpfEgF8jI0XQWVBIyUZI7CqnMZGRkQKFQSL9FVxxVNvDHHeDobX6ZyoO5PvCOLfB2dUBhUHb98riVv5Ieu5J+1+jVcCyrODieVV5lPZ7xOXVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFtBoURceHo6WLVvCzMwMNjY26NmzJy5evKgWExwcDJlMpvZq3bq1WkxWVhZGjhwJa2trmJiYoHv37khJSVGLSU9PR1BQEBQKBRQKBYKCgnD//n21mBs3bqBbt24wMTGBtbU1QkJCkJ2drRZz7tw5eHt7w8jICDVq1MC0adNK9DtzREREROVJo0XdkSNHMHz4cMTFxSE6Ohq5ubnw8/NDZmamWpy/vz9SU1Ol12+//aa2fvTo0dixYwe2bNmCmJgYPHz4EAEBAcjLy5NiAgMDkZCQgMjISERGRiIhIQFBQUHS+ry8PHTt2hWZmZmIiYnBli1bsG3bNowdO1aKycjIgK+vLxwcHHDy5EksXrwY8+bNQ0RERDntISIiIqKS0egvSkRGRqotr169GjY2NoiPj8c777wjtcvlcumHrJ+nUqmwcuVKrF+/XvoJnA0bNsDR0RH79+9H586dkZSUhMjISMTFxaFVq1YAgBUrVsDLywsXL16Ei4sLoqKicOHCBdy8eRMODg4AgPnz5yM4OBgzZsyAubk5Nm7ciCdPnmDNmjWQy+Vwd3fHpUuXEBERgdDQUMhksvLYTUREREQvVaHuqVOpVAAAS0tLtfbDhw/DxsYGDRo0wKBBg5CWliati4+PR05OjvRTNwDg4OAAd3d3HDt2DAAQGxsLhUIhFXQA0Lp1aygUCrUYd3d3qaADgM6dOyMrKwvx8fFSjLe3N+RyuVrMrVu3cO3atSK3KSsrCxkZGWovIiIiorJWYYo6IQRCQ0PRrl07uLu7S+1dunTBxo0bcfDgQcyfPx8nT55Ehw4dkJWVBQBQKpUwMDCAhYWFWn+2trZQKpVSjI2NTaHPtLGxUYuxtbVVW29hYQEDA4MXxhQsF8Q8Lzw8XLqPT6FQwNHRscT7hIiIiKikNHr59VkjRozA2bNnERMTo9bep08f6b/d3d3RokULODk5Ye/evejVq1ex/Qkh1C6HFnVptCxiCiZJFHfpdcKECQgNDZWWMzIyWNgRERFRmasQZ+pGjhyJXbt24dChQ6hZs+YLY+3t7eHk5ITLly8DAOzs7JCdnY309HS1uLS0NOksmp2dHW7fvl2orzt37qjFPH+2LT09HTk5OS+MKbgU/PwZvAJyuRzm5uZqLyIiIqKyptGiTgiBESNGYPv27Th48CCcnZ1f+p67d+/i5s2bsLe3BwB4enpCX18f0dHRUkxqaioSExPRpk0bAICXlxdUKhVOnDghxRw/fhwqlUotJjExEampqVJMVFQU5HI5PD09pZijR4+qPeYkKioKDg4OqF279qvvCCIiIqLXpNGibvjw4diwYQM2bdoEMzMzKJVKKJVKPH78GADw8OFDjBs3DrGxsbh27RoOHz6Mbt26wdraGu+99x4AQKFQYODAgRg7diwOHDiAM2fO4JNPPoGHh4c0G9bV1RX+/v4YNGgQ4uLiEBcXh0GDBiEgIAAuLi4AAD8/P7i5uSEoKAhnzpzBgQMHMG7cOAwaNEg6uxYYGAi5XI7g4GAkJiZix44dmDlzJme+EhERkcZptKhbtmwZVCoVfHx8YG9vL722bt0KANDV1cW5c+fQo0cPNGjQAP3790eDBg0QGxsLMzMzqZ8FCxagZ8+e6N27N9q2bQtjY2Ps3r0burq6UszGjRvh4eEBPz8/+Pn5oXHjxli/fr20XldXF3v37oWhoSHatm2L3r17o2fPnpg3b54Uo1AoEB0djZSUFLRo0QLDhg1DaGio2j1zRERERJqg0YkSL/slBiMjI+zbt++l/RgaGmLx4sVYvHhxsTGWlpbYsGHDC/upVasW9uzZ88IYDw8PHD169KU5EREREb1JFWKiBBERERG9HhZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVERK8hMjISMTEx0vL333+Ppk2bIjAwEOnp6RrMjIiqGhZ1RESv4YsvvkBGRgYA4Ny5cxg7dizeffddXL16FaGhoRrOjoiqEj1NJ0BEVJklJyfDzc0NALBt2zYEBARg5syZOH36NN59910NZ0dEVQnP1BERvQYDAwM8evQIALB//374+fkBACwtLaUzeEREbwLP1BERvYZ27dohNDQUbdu2xYkTJ7B161YAwKVLl1CzZk0NZ0dEVQnP1BERvYYlS5ZAT08Pv/zyC5YtW4YaNWoAAH7//Xf4+/trODsiqkp4po6I6DXUqlULe/bsKdS+YMECDWRDRFUZizoioteUl5eHHTt2ICkpCTKZDA0bNkTPnj2hp8chlojeHI44RESvITExEd27d8ft27fh4uIC4On9dNWrV8euXbvg4eGh4QyJqKrgPXVERK/hs88+g7u7O1JSUnD69GmcPn0aN2/eROPGjTF48GBNp0dEVQjP1BERvYa//voLp06dgoWFhdRmYWGBGTNmoGXLlhrMjIiqGp6pIyJ6DS4uLrh9+3ah9rS0NNSrV08DGRFRVcWijojoNcycORMhISH45ZdfkJKSgpSUFPzyyy8YPXo0Zs+ejYyMDOlFRFSeePmViOg1BAQEAAB69+4NmUwGABBCAAC6desmLctkMuTl5WkmSSKqEljUERG9hkOHDmk6BSIiACzqiIhei7e3t6ZTICICoOF76sLDw9GyZUuYmZnBxsYGPXv2xMWLF9VihBAICwuDg4MDjIyM4OPjg/Pnz6vFZGVlYeTIkbC2toaJiQm6d++OlJQUtZj09HQEBQVBoVBAoVAgKCgI9+/fV4u5ceMGunXrBhMTE1hbWyMkJATZ2dlqMefOnYO3tzeMjIxQo0YNTJs2TbrUQkRVz9GjR1/4IiJ6UzR6pu7IkSMYPnw4WrZsidzcXEycOBF+fn64cOECTExMAABz5sxBREQE1qxZgwYNGuDbb7+Fr68vLl68CDMzMwDA6NGjsXv3bmzZsgVWVlYYO3YsAgICEB8fD11dXQBAYGAgUlJSEBkZCQAYPHgwgoKCsHv3bgBPnwjftWtXVK9eHTExMbh79y769+8PIQQWL14MAMjIyICvry/at2+PkydP4tKlSwgODoaJiQnGjh37pncfEVUAPj4+hdoK7q0DwPvoiOiN0WhRV1BgFVi9ejVsbGwQHx+Pd955B0IILFy4EBMnTkSvXr0AAGvXroWtrS02bdqEIUOGQKVSYeXKlVi/fj06deoEANiwYQMcHR2xf/9+dO7cGUlJSYiMjERcXBxatWoFAFixYgW8vLxw8eJFuLi4ICoqChcuXMDNmzfh4OAAAJg/fz6Cg4MxY8YMmJubY+PGjXjy5AnWrFkDuVwOd3d3XLp0CREREQgNDVUbyImoakhPT1dbzsnJwZkzZzBp0iTMmDFDQ1kRUVVUoR5polKpAACWlpYAgOTkZCiVSvj5+Ukxcrkc3t7eOHbsGAAgPj4eOTk5ajEODg5wd3eXYmJjY6FQKKSCDgBat24NhUKhFuPu7i4VdADQuXNnZGVlIT4+Xorx9vaGXC5Xi7l16xauXbtW5DZlZWWpPdKAjzUg0i4Ft3QUvKytreHr64s5c+Zg/Pjxmk6PiKqQUhd1jx8/xqNHj6Tl69evY+HChYiKinqtRIQQCA0NRbt27eDu7g4AUCqVAABbW1u1WFtbW2mdUqmEgYGB2tPci4qxsbEp9Jk2NjZqMc9/joWFBQwMDF4YU7BcEPO88PBwtQHf0dHxJXuCiLRB9erVC90jTERUnkpd1PXo0QPr1q0DANy/fx+tWrXC/Pnz0aNHDyxbtuyVExkxYgTOnj2LzZs3F1r3/GXNgmc+vcjzMUXFl0VMwSSJ4vKZMGECVCqV9Lp58+YL8yaiyuXs2bNqr7/++guRkZEYOnQomjRpUur+/v33X3zyySewsrKCsbExmjZtKl0tAN7s5DEiqlxKXdSdPn0ab7/9NgDgl19+ga2tLa5fv45169Zh0aJFr5TEyJEjsWvXLhw6dAg1a9aU2u3s7AAUPguWlpYmnSGzs7NDdnZ2oftano8p6md87ty5oxbz/Oekp6cjJyfnhTFpaWkACp9NLCCXy2Fubq72IiLt0bRpUzRr1gxNmzaV/vvdd99FdnY2Vq5cWaq+0tPT0bZtW+jr6+P333/HhQsXMH/+fFSrVk2KKZg8tmTJEpw8eRJ2dnbw9fXFgwcPpJjRo0djx44d2LJlC2JiYvDw4UMEBASoTdoIDAxEQkICIiMjERkZiYSEBAQFBb32/iAizSl1Uffo0SNp1mlUVBR69eoFHR0dtG7dGtevXy9VX0IIjBgxAtu3b8fBgwfh7Oystt7Z2Rl2dnaIjo6W2rKzs3HkyBG0adMGAODp6Ql9fX21mNTUVCQmJkoxXl5eUKlUOHHihBRz/PhxqFQqtZjExESkpqZKMVFRUZDL5fD09JRijh49qvaYk6ioKDg4OKB27dql2nYi0g7Jycm4evUqkpOTkZycjOvXr+PRo0c4duwYGjZsWKq+Zs+eDUdHR6xevRpvvfUWateujY4dO6Ju3boAUGjymLu7O9auXYtHjx5h06ZNACBNHps/fz46deqEZs2aYcOGDTh37hz2798PANLksZ9++gleXl7w8vLCihUrsGfPHl4yJqrESl3U1atXDzt37sTNmzexb98+aYJCWlpaqc9CDR8+HBs2bMCmTZtgZmYGpVIJpVKJx48fA3h6SXP06NGYOXMmduzYgcTERAQHB8PY2BiBgYEAnt6kPHDgQIwdOxYHDhzAmTNn8Mknn8DDw0OaDevq6gp/f38MGjQIcXFxiIuLw6BBgxAQEAAXFxcAgJ+fH9zc3BAUFIQzZ87gwIEDGDduHAYNGiRtV2BgIORyOYKDg5GYmIgdO3Zg5syZnPlKVIU5OTmpvRwdHWFoaPhKfe3atQstWrTAhx9+CBsbGzRr1gwrVqyQ1r/JyWPP46Qvooqv1EXd5MmTMW7cONSuXRtvvfUWvLy8ADw9Y9WsWbNS9bVs2TKoVCr4+PjA3t5eem3dulWKGT9+PEaPHo1hw4ahRYsW+PfffxEVFSWdLQSABQsWoGfPnujduzfatm0LY2Nj7N69W3pGHQBs3LgRHh4e8PPzg5+fHxo3boz169dL63V1dbF3714YGhqibdu26N27N3r27Il58+ZJMQqFAtHR0UhJSUGLFi0wbNgwhIaGIjQ0tLS7kYi0yJEjR9CtWzfUq1cP9evXR/fu3fHHH3+Uup+rV69i2bJlqF+/Pvbt24fPP/8cISEh0n3Mb3Ly2PM46Yuo4iv1c+o++OADtGvXDqmpqWo3AXfs2BHvvfdeqfoqyS8xyGQyhIWFISwsrNgYQ0NDLF68WHpIcFEsLS2xYcOGF35WrVq1sGfPnhfGeHh48CnxRCTZsGEDBgwYgF69eiEkJARCCBw7dgwdO3bEmjVrpKsKJZGfn48WLVpg5syZAIBmzZrh/PnzWLZsGfr16yfFvanJY8+aMGGC2j9gMzIyWNgRVTCv9Jw6Ozs7mJmZITo6WrpU2rJly1LfP0JEVNnNmDEDc+bMwdatWxESEoJRo0Zh69atmDVrFqZPn16qvuzt7eHm5qbW5urqihs3bgB4s5PHnsdJX0QVX6mLurt376Jjx45o0KAB3n33XWliwWeffcafyiKiKufq1avo1q1bofbu3bsjOTm5VH21bdu20ESFS5cuwcnJCcCbnTxGRJVPqYu6MWPGQF9fHzdu3ICxsbHU3qdPn0I/+0VEpO0cHR1x4MCBQu0HDhwo9eXJMWPGIC4uDjNnzsQ///yDTZs24ccff8Tw4cMBvNnJY0RU+ZT6nrqoqCjs27dP7XlyAFC/fv1SP9KEiKiyGzt2LEJCQpCQkIA2bdpAJpMhJiYGa9aswXfffVeqvlq2bIkdO3ZgwoQJmDZtGpydnbFw4UL07dtXihk/fjweP36MYcOGIT09Ha1atSpy8pienh569+6Nx48fS/f3PT95LCQkRJol2717dyxZsuQ19wYRaVKpi7rMzEy1M3QF/vvvP7XfRCUiqgqGDh0KOzs7zJ8/H//73/8APD0TtnXrVvTo0aPU/QUEBCAgIKDY9W9y8hgRVS6lLureeecdrFu3TroBWCaTIT8/H3PnzkX79u3LPEEioooqNzcXM2bMwKeffoqYmBhNp0NEVVypi7q5c+fCx8cHp06dQnZ2NsaPH4/z58/j3r17+PPPP8sjRyKiCklPTw9z585F//79NZ0KEVHpJ0q4ubnh7NmzeOutt+Dr64vMzEz06tULZ86ckX7KhoioqujUqRMOHz6s6TSIiEp/pg54+oyjqVOnlnUuRESVTpcuXTBhwgQkJibC09MTJiYmauu7d++uocyIqKopUVF39uzZEnfYuHHjV06GiKiyGTp0KAAgIiKi0DqZTIa8vLw3nRIRVVElKuqaNm0KmUxW6CdkCn7m69k2DmBEVJXk5+drOgUiIgAlvKcuOTkZV69eRXJyMrZt2wZnZ2csXboUCQkJSEhIwNKlS1G3bl1s27atvPMlIiIioiKU6ExdwU/UAMCHH36IRYsW4d1335XaGjduDEdHR0yaNAk9e/Ys8ySJiCqqRYsWFdkuk8lgaGiIevXq4Z133lF78C8RUXko9USJc+fOwdnZuVC7s7MzLly4UCZJERFVFgsWLMCdO3fw6NEjWFhYQAiB+/fvw9jYGKampkhLS0OdOnVw6NChUv9sGBFRaZT6kSaurq749ttv8eTJE6ktKysL3377LVxdXcs0OSKiim7mzJlo2bIlLl++jLt37+LevXu4dOkSWrVqhe+++w43btyAnZ0dxowZo+lUiUjLlfpM3fLly9GtWzc4OjqiSZMmAIC//voLMpkMe/bsKfMEiYgqsm+++Qbbtm1Te05nvXr1MG/ePLz//vu4evUq5syZg/fff1+DWRJRVVDqou6tt95CcnIyNmzYgL///htCCPTp0weBgYGFns9ERKTtUlNTkZubW6g9NzcXSqUSAODg4IAHDx686dSIqIp5pYcPGxsbY/DgwWWdCxFRpdO+fXsMGTIEP/30E5o1awYAOHPmDIYOHYoOHToAKP5eZCKislSiom7Xrl3o0qUL9PX1sWvXrhfG8unpRFSVrFy5EkFBQfD09IS+vj6Ap2fpOnbsiJUrVwIATE1NMX/+fE2mSURVQImKup49e0KpVMLGxuaFjyzh09OJqKqxs7NDdHQ0Ll68iIsXL0IIgYYNG8LFxUWKad++vQYzJKKqokRF3bNPTOfT04mICnNxcVEr5IiI3rRSP9Jk3bp1yMrKKtSenZ2NdevWlUlSRERERFQ6pS7qBgwYAJVKVaj9wYMHGDBgQJkkRURERESlU+qiTggBmUxWqD0lJQUKhaJMkiIiIiKi0inxI02aNWsGmUwGmUyGjh07Qk/v/96al5eH5ORk+Pv7l0uSRERERPRiJS7qCma9JiQkoHPnzjA1NZXWGRgYoHbt2nxiOhFVSffv38eJEyeQlpZWaDJZv379NJQVEVU1JS7qpkyZAgCoXbs2+vTpA0NDw3JLioiosti9ezf69u2LzMxMmJmZqd2eIpPJWNQR0RtT6l+U6N+/PwDg1KlTSEpKgkwmg6urKzw9Pcs8OSKiim7s2LH49NNPMXPmTBgbG2s6HSKqwkpd1P3777/46KOP8Oeff6JatWoAnl56aNOmDTZv3gxHR8eyzpGIqML6999/ERISwoKOiDTulR5pkpOTg6SkJNy7dw/37t1DUlIShBAYOHBgeeRIRFRhde7cGadOndJ0GkREpT9T98cff+DYsWNqT053cXHB4sWL0bZt2zJNjoioouvatSu++OILXLhwAR4eHtLvvxbg72ET0ZtS6qKuVq1ayMnJKdSem5uLGjVqlElSRESVxaBBgwAA06ZNK7SOv4dNRG9SqS+/zpkzByNHjsSpU6cghADwdNLEqFGjMG/evDJPkIioIsvPzy/2xYKOiN6kUp+pCw4OxqNHj9CqVSvpAcS5ubnQ09PDp59+ik8//VSKvXfvXtllSkRERETFKnVRt3DhwnJIg4io8li0aBEGDx4MQ0NDLFq06IWxISEhbygrIqrqXvk5dUREVdWCBQvQt29fGBoaYsGCBcXGyWQyFnVE9MaUuqgDgCtXrmD16tW4cuUKvvvuO9jY2CAyMhKOjo5o1KhRWedIRFShJCcnF/nfRESaVOqJEkeOHIGHhweOHz+O7du34+HDhwCAs2fPSj8lRkRU1WRnZ+PixYvIzc3VdCpEVEWVuqj76quv8O233yI6OhoGBgZSe/v27REbG1umyRERVXSPHj3CwIEDYWxsjEaNGuHGjRsAnt5LN2vWLA1nR0RVSamLunPnzuG9994r1F69enXcvXu3TJIiIqosJkyYgL/++guHDx+GoaGh1N6pUyds3bpVg5kRUVVT6qKuWrVqSE1NLdR+5swZPnyYiKqcnTt3YsmSJWjXrh1kMpnU7ubmhitXrmgwMyKqakpd1AUGBuLLL7+EUqmETCZDfn4+/vzzT4wbNw79+vUrjxyJiCqsO3fuwMbGplB7ZmamWpFHRFTeSl3UzZgxA7Vq1UKNGjXw8OFDuLm54Z133kGbNm3wzTfflEeOREQVVsuWLbF3715puaCQW7FiBby8vDSVFhFVQaV+pIm+vj42btyIadOm4cyZM8jPz0ezZs1Qv3798siPiKhCCw8Ph7+/Py5cuIDc3Fx89913OH/+PGJjY3HkyBFNp0dEVcgrPacOAOrWrYs6deoAAC8xEFGV1aZNG/z555+YN28e6tati6ioKDRv3hyxsbHw8PDQdHpEVIW8UlG3cuVKLFiwAJcvXwYA1K9fH6NHj8Znn31WpskREVUGHh4eWLt2rabTIKIqrtRF3aRJk7BgwQKMHDlSul8kNjYWY8aMwbVr1/Dtt9+WeZJERBWVrq4uUlNTC02WuHv3LmxsbJCXl6ehzIioqil1Ubds2TKsWLECH3/8sdTWvXt3NG7cGCNHjmRRR0RVihCiyPasrCy1B7QTEZW3Uhd1eXl5aNGiRaF2T09P/jwOEVUZixYtAvD0nuKffvoJpqam0rq8vDwcPXoUDRs21FR6RFQFlbqo++STT7Bs2TJERESotf/444/o27dvmSVGRFSRLViwAMDTM3XLly+Hrq6utM7AwAC1a9fG8uXLNZUeEVVBrzxRIioqCq1btwYAxMXF4ebNm+jXrx9CQ0OluOcLPyIibZGcnAzg6e9eb9++HRYWFhrOiIiqulIXdYmJiWjevDkASD+BU716dVSvXh2JiYlSHB9zQkRVwaFDh6T/Lri/juMfEWlCqYu6ZwcwIiIC1q1bh7lz50qPeWrQoAG++OILBAUFaTgzIqpKXvnhw0RE9PQ2k0mTJmHEiBFo27YthBD4888/8fnnn+O///7DmDFjNJ0iEVURLOqIiF7D4sWLsWzZMvTr109q69GjBxo1aoSwsDAWdUT0xuhoOgEiososNTUVbdq0KdTepk0bpKamaiAjIqqqWNQREb2GevXq4X//+1+h9q1bt6J+/foayIiIqqoSXX5t3rw5Dhw4AAsLC0ybNg3jxo2DsbFxeedGRFThTZ06FX369MHRo0fRtm1byGQyxMTE4MCBA0UWe0RE5aVEZ+qSkpKQmZkJ4OkA9vDhw3JNioiosnj//fdx4sQJWFtbY+fOndi+fTusra1x4sQJvPfee5pOj4iqkBKdqWvatCkGDBiAdu3aQQiBefPmqf0kzrMmT55cpgkSEVVUOTk5GDx4MCZNmoQNGzZoOh0iquJKVNStWbMGU6ZMwZ49eyCTyfD7779DT6/wW2UyGYs6Iqoy9PX1sWPHDkyaNEnTqRARlezyq4uLC7Zs2YKTJ09CCIEDBw7gzJkzhV6nT58u1YcfPXoU3bp1g4ODA2QyGXbu3Km2Pjg4GDKZTO1V8NNkBbKysjBy5EhYW1vDxMQE3bt3R0pKilpMeno6goKCoFAooFAoEBQUhPv376vF3LhxA926dYOJiQmsra0REhKC7OxstZhz587B29sbRkZGqFGjBqZNmyY9QZ6Iqqb33nuv0NhFRKQJpX5OXX5+fpl9eGZmJpo0aYIBAwbg/fffLzLG398fq1evlpYNDAzU1o8ePRq7d+/Gli1bYGVlhbFjxyIgIADx8fHSD2wHBgYiJSUFkZGRAIDBgwcjKCgIu3fvBgDk5eWha9euqF69OmJiYnD37l30798fQggsXrwYAJCRkQFfX1+0b98eJ0+exKVLlxAcHAwTExOMHTu2zPYJEVUu9erVw/Tp03Hs2DF4enrCxMREbX1ISIiGMiOiquaVHj585coVLFy4EElJSZDJZHB1dcWoUaNQt27dUvXTpUsXdOnS5YUxcrkcdnZ2Ra5TqVRYuXIl1q9fj06dOgEANmzYAEdHR+zfvx+dO3dGUlISIiMjERcXh1atWgEAVqxYAS8vL1y8eBEuLi6IiorChQsXcPPmTTg4OAAA5s+fj+DgYMyYMQPm5ubYuHEjnjx5gjVr1kAul8Pd3R2XLl1CREQEQkND+VuPRFXUTz/9hGrVqiE+Ph7x8fFq62QyGYs6InpjSv2cun379sHNzQ0nTpxA48aN4e7ujuPHj6NRo0aIjo4u8wQPHz4MGxsbNGjQAIMGDUJaWpq0Lj4+Hjk5OfDz85PaHBwc4O7ujmPHjgEAYmNjoVAopIIOAFq3bg2FQqEW4+7uLhV0ANC5c2dkZWVJg3RsbCy8vb0hl8vVYm7duoVr164Vm39WVhYyMjLUXkSkPZKTk4t9Xb16VdPpEVEVUuozdV999RXGjBmDWbNmFWr/8ssv4evrW2bJdenSBR9++CGcnJyQnJyMSZMmoUOHDoiPj4dcLodSqYSBgQEsLCzU3mdrawulUgkAUCqVsLGxKdS3jY2NWoytra3aegsLCxgYGKjF1K5du9DnFKxzdnYuchvCw8MxderU0m88EVU6BffY8sw9EWlCqc/UJSUlYeDAgYXaP/30U1y4cKFMkirQp08fdO3aFe7u7ujWrRt+//13XLp0CXv37n3h+4QQaoNqUQNsWcSUZACfMGECVCqV9Lp58+YLcyeiymflypVwd3eHoaEhDA0N4e7ujp9++knTaRFRFVPqoq569epISEgo1J6QkFDkGbGyZG9vDycnJ1y+fBkAYGdnh+zsbKSnp6vFpaWlSWfR7OzscPv27UJ93blzRy2m4IxcgfT0dOTk5LwwpuBS8PNn+Z4ll8thbm6u9iIi7TFp0iSMGjUK3bp1w88//4yff/4Z3bp1w5gxY/DNN99oOj0iqkJKXdQNGjQIgwcPxuzZs/HHH38gJiYGs2bNwpAhQzB48ODyyFFy9+5d3Lx5E/b29gAAT09P6Ovrq93Ll5qaisTEROkHtr28vKBSqXDixAkp5vjx41CpVGoxiYmJaj++HRUVBblcDk9PTynm6NGjao85iYqKgoODQ6HLskRUdSxbtgwrVqxAeHg4unfvju7duyM8PBw//vgjli9frun0iKgKKfU9dZMmTYKZmRnmz5+PCRMmAHg6OSEsLKzUs7wePnyIf/75R1pOTk5GQkICLC0tYWlpibCwMLz//vuwt7fHtWvX8PXXX8Pa2lr66R2FQoGBAwdi7NixsLKygqWlJcaNGwcPDw9pNqyrqyv8/f0xaNAg/PDDDwCePtIkICAALi4uAAA/Pz+4ubkhKCgIc+fOxb179zBu3DgMGjRIOrMWGBiIqVOnIjg4GF9//TUuX76MmTNnYvLkybx/hqgKy8vLQ4sWLQq1e3p6Ijc3VwMZEVFVVeozdTKZDGPGjEFKSop0n1hKSgpGjRpV6uLm1KlTaNasGZo1awYACA0NRbNmzTB58mTo6uri3Llz6NGjBxo0aID+/fujQYMGiI2NhZmZmdTHggUL0LNnT/Tu3Rtt27aFsbExdu/eLT2jDgA2btwIDw8P+Pn5wc/PD40bN8b69eul9bq6uti7dy8MDQ3Rtm1b9O7dGz179sS8efOkGIVCgejoaKSkpKBFixYYNmwYQkNDERoaWtpdSERa5JNPPsGyZcsKtf/444/o27evBjIioqrqlZ5TV+DZ4upV+Pj4vPAXGfbt2/fSPgwNDbF48WLpIcFFsbS0fOnvMtaqVQt79ux5YYyHhweOHj360pyIqGpZuXIloqKipF+8iYuLw82bN9GvXz+1f/hFRERoKkUiqgJeq6gjIqrqEhMT0bx5cwBPH8wOPJ1QVr16dSQmJkpxvE2DiMobizoiotdw6NAhTadARATgFe6pIyIiIqKKp1RFXU5ODtq3b49Lly6VVz5ERERE9ApKVdTp6+sjMTGR94YQERERVTClvvzar18/rFy5sjxyISIiIqJXVOqJEtnZ2fjpp58QHR2NFi1awMTERG09p+wTkbZr3rw5Dhw4AAsLC0ybNg3jxo2DsbGxptMioiqu1EXds9P3n7+3jpdliagqSEpKQmZmJiwsLDB16lR8/vnnLOqISONKXdRx+j4RVXVNmzbFgAED0K5dOwghMG/ePJiamhYZO3ny5DecHRFVVa/8nLp//vkHV65cwTvvvAMjIyMIIXimjoiqhDVr1mDKlCnYs2cPZDIZfv/9d+jpFR5OZTIZizoiemNKXdTdvXsXvXv3xqFDhyCTyXD58mXUqVMHn332GapVq4b58+eXR55ERBWGi4sLtmzZAgDQ0dHBgQMHYGNjo+GsiKiqK/Xs1zFjxkBfXx83btxQu4ekT58+iIyMLNPkiIgquvz8fBZ0RFQhlPpMXVRUFPbt24eaNWuqtdevXx/Xr18vs8SIiCqLK1euYOHChUhKSoJMJoOrqytGjRqFunXrajo1IqpCSn2mLjMzs8hZXv/99x/kcnmZJEVEVFns27cPbm5uOHHiBBo3bgx3d3ccP34cjRo1QnR0tKbTI6IqpNRn6t555x2sW7cO06dPB/D0RuD8/HzMnTsX7du3L/MEiYgqsq+++gpjxozBrFmzCrV/+eWX8PX11VBmRFTVlLqomzt3Lnx8fHDq1ClkZ2dj/PjxOH/+PO7du4c///yzPHIkIqqwkpKS8L///a9Q+6effoqFCxe++YSIqMoq9eVXNzc3nD17Fm+99RZ8fX2RmZmJXr164cyZM7x/hIiqnOrVqyMhIaFQe0JCAidQENEbVeqiDgDs7OwwdepU7NmzB7/99hu+/fZb2Nvbl3VuREQV3qBBgzB48GDMnj0bf/zxB2JiYjBr1iwMGTIEgwcPfuV+w8PDIZPJMHr0aKlNCIGwsDA4ODjAyMgIPj4+OH/+vNr7srKyMHLkSFhbW8PExATdu3dHSkqKWkx6ejqCgoKgUCigUCgQFBSE+/fvv3KuRFQxvNLDh9PT07Fy5Uq1mV4DBgyApaVlWedHRFShTZo0CWZmZpg/fz4mTJgAAHBwcEBYWBhCQkJeqc+TJ0/ixx9/ROPGjdXa58yZg4iICKxZswYNGjTAt99+C19fX1y8eBFmZmYAgNGjR2P37t3YsmULrKysMHbsWAQEBCA+Ph66uroAgMDAQKSkpEiPoRo8eDCCgoKwe/fuV90NRFQBlPpM3ZEjR+Ds7IxFixYhPT0d9+7dw6JFi+Ds7IwjR46UR45ERBWWTCbDmDFjkJKSApVKBZVKhZSUFIwaNeqVfmXn4cOH6Nu3L1asWAELCwupXQiBhQsXYuLEiejVqxfc3d2xdu1aPHr0CJs2bQIAqFQqrFy5EvPnz0enTp3QrFkzbNiwAefOncP+/fsBPL0HMDIyEj/99BO8vLzg5eWFFStWYM+ePbh48WKxeWVlZSEjI0PtRUQVS6mLuuHDh6N3795ITk7G9u3bsX37dly9ehUfffQRhg8fXh45EhFVCmZmZtIZs1c1fPhwdO3aFZ06dVJrT05OhlKphJ+fn9Qml8vh7e2NY8eOAQDi4+ORk5OjFuPg4AB3d3cpJjY2FgqFAq1atZJiWrduDYVCIcUUJTw8XLpcq1Ao4Ojo+FrbSURlr9RF3ZUrVzB27FjpND4A6OrqIjQ0FFeuXCnT5IiIqpItW7bg9OnTCA8PL7ROqVQCAGxtbdXabW1tpXVKpRIGBgZqZ/iKiilqAoeNjY0UU5QJEyZIZyJVKhVu3rxZuo0jonJX6nvqmjdvjqSkJLi4uKi1JyUloWnTpmWVFxFRlXLz5k2MGjUKUVFRMDQ0LDbu+Uu6QoiXXuZ9Pqao+Jf1I5fL+YB5ogquREXd2bNnpf8OCQnBqFGj8M8//6B169YAgLi4OHz//feFHr5JREQlEx8fj7S0NHh6ekpteXl5OHr0KJYsWSLd76ZUKtWeNpCWliadvbOzs0N2djbS09PVztalpaWhTZs2Uszt27cLff6dO3cKnQUkosqlREVd06ZNIZPJIISQ2saPH18oLjAwEH369Cm77IiIKrCC+9d++OEHNGjQ4LX66tixI86dO6fWNmDAADRs2BBffvkl6tSpAzs7O0RHR6NZs2YAgOzsbBw5cgSzZ88GAHh6ekJfXx/R0dHo3bs3ACA1NRWJiYmYM2cOAMDLywsqlQonTpzAW2+9BQA4fvw4VCqVVPgRUeVUoqIuOTm5vPMgIqp09PX1kZiY+EqzXJ9nZmYGd3d3tTYTExNYWVlJ7aNHj8bMmTNRv3591K9fHzNnzoSxsTECAwMBAAqFAgMHDsTYsWNhZWUFS0tLjBs3Dh4eHtLEC1dXV/j7+2PQoEH44YcfADx9pElAQECh22qIqHIpUVHn5ORU3nkQEVVK/fr1w8qVK9/I7Sfjx4/H48ePMWzYMKSnp6NVq1aIiopSm3G7YMEC6OnpoXfv3nj8+DE6duyINWvWqE1u27hxI0JCQqRZst27d8eSJUvKPX8iKl+v9PDhf//9F3/++SfS0tKQn5+vtu5VH7ZJRFQZZWdn46effkJ0dDRatGgBExMTtfURERGv3Pfhw4fVlmUyGcLCwhAWFlbsewwNDbF48WIsXry42BhLS0ts2LDhlfMiooqp1EXd6tWr8fnnn8PAwABWVlaFZlSxqCOiqiQxMRHNmzcHAFy6dEltXVlcliUiKqlSF3WTJ0/G5MmTMWHCBOjovNJPxxIRaY1Dhw5pOgUiIgCv8PDhR48e4aOPPmJBR0T0jH/++Qf79u3D48ePAUDtaQFERG9CqSuzgQMH4ueffy6PXIiIKp27d++iY8eOaNCgAd59912kpqYCAD777DOMHTtWw9kRUVVS6suv4eHhCAgIQGRkJDw8PKCvr6+2/nVuCiYiqmzGjBkDfX193LhxA66urlJ7nz59MGbMGMyfP1+D2RFRVVLqom7mzJnYt2+f9Dyjl/30DBGRNouKisK+fftQs2ZNtfb69evj+vXrGsqKiKqiUhd1ERERWLVqFYKDg8shHSKiyiUzMxPGxsaF2v/77z/+VioRvVGlvqdOLpejbdu25ZELEVGl884772DdunXSskwmQ35+PubOnYv27dtrMDMiqmpKfaZu1KhRWLx4MRYtWlQe+RARVSpz586Fj48PTp06hezsbIwfPx7nz5/HvXv38Oeff2o6PSKqQkpd1J04cQIHDx7Enj170KhRo0ITJbZv315myRERVXRubm44e/Ysli1bBl1dXWRmZqJXr14YPnw47O3tNZ0eEVUhpS7qqlWrhl69epVHLkRElZKdnR2mTp2q6TSIqIp7pZ8JIyKi/5Oeno6VK1ciKSkJMpkMrq6uGDBgACwtLTWdGhFVIfxZCCKi13DkyBE4Oztj0aJFSE9Px71797Bo0SI4OzvjyJEjmk6PiKqQUp+pc3Z2fuHz6K5evfpaCRERVSbDhw9H7969pXvqACAvLw/Dhg3D8OHDkZiYqOEMiaiqKHVRN3r0aLXlnJwcnDlzBpGRkfjiiy/KKi8iokrhypUr2LZtm1TQAYCuri5CQ0PVHnVCRFTeXumRJkX5/vvvcerUqddOiIioMmnevDmSkpKkX9kpkJSUhKZNm2omKSKqkkpd1BWnS5cumDBhAidSEJHWO3v2rPTfISEhGDVqFP755x+0bt0aABAXF4fvv/8es2bN0lSKRFQFlVlR98svv3CmFxFVCU2bNoVMJoMQQmobP358objAwED06dPnTaZGRFVYqYu6Zs2aqU2UEEJAqVTizp07WLp0aZkmR0RUESUnJ2s6BSKiQkpd1PXs2VNtWUdHB9WrV4ePjw8aNmxYVnkREVVYTk5Omk6BiKiQUhd1U6ZMKY88iIgqrX///Rd//vkn0tLSkJ+fr7YuJCREQ1kRUVVTZvfUERFVRatXr8bnn38OAwMDWFlZqd2eIpPJWNQR0RtT4qJOR0fnhQ8dBp4OYLm5ua+dFBFRZTF58mRMnjwZEyZMgI4Of6SHiDSnxEXdjh07il137NgxLF68WG0mGBFRVfDo0SN89NFHLOiISONKXNT16NGjUNvff/+NCRMmYPfu3ejbty+mT59epskREVV0AwcOxM8//4yvvvpK06kQURX3SvfU3bp1C1OmTMHatWvRuXNnJCQkwN3dvaxzIyKq8MLDwxEQEIDIyEh4eHhAX19fbX1ERISGMiOiqqZURZ1KpcLMmTOxePFiNG3aFAcOHMDbb79dXrkREVV4M2fOxL59+6SfCXt+ogQR0ZtS4qJuzpw5mD17Nuzs7LB58+YiL8cSEVU1ERERWLVqFYKDgzWdChFVcSUu6r766isYGRmhXr16WLt2LdauXVtk3Pbt28ssOSKiik4ul6Nt27aaToOIqORFXb9+/XgpgYjoOaNGjcLixYuxaNEiTadCRFVciYu6NWvWlGMaRESV04kTJ3Dw4EHs2bMHjRo1KjRRglcviOhN4S9KEBG9hmrVqqFXr16aToOIiEUdEdHrWL16taZTICICAGj0EehHjx5Ft27d4ODgAJlMhp07d6qtF0IgLCwMDg4OMDIygo+PD86fP68Wk5WVhZEjR8La2homJibo3r07UlJS1GLS09MRFBQEhUIBhUKBoKAg3L9/Xy3mxo0b6NatG0xMTGBtbY2QkBBkZ2erxZw7dw7e3t4wMjJCjRo1MG3aNP6KBhEREVUIGi3qMjMz0aRJEyxZsqTI9XPmzEFERASWLFmCkydPws7ODr6+vnjw4IEUM3r0aOzYsQNbtmxBTEwMHj58iICAAOTl5UkxgYGBSEhIQGRkJCIjI5GQkICgoCBpfV5eHrp27YrMzEzExMRgy5Yt2LZtG8aOHSvFZGRkwNfXFw4ODjh58iQWL16MefPm8cGiRFWcs7Mz6tSpU+yLiOhN0ejl1y5duqBLly5FrhNCYOHChZg4caJ0v8ratWtha2uLTZs2YciQIVCpVFi5ciXWr1+PTp06AQA2bNgAR0dH7N+/H507d0ZSUhIiIyMRFxeHVq1aAQBWrFgBLy8vXLx4ES4uLoiKisKFCxdw8+ZNODg4AADmz5+P4OBgzJgxA+bm5ti4cSOePHmCNWvWQC6Xw93dHZcuXUJERARCQ0M5M5ioiho9erTack5ODs6cOYPIyEh88cUXmkmKiKqkCntPXXJyMpRKJfz8/KQ2uVwOb29vHDt2DEOGDEF8fDxycnLUYhwcHODu7o5jx46hc+fOiI2NhUKhkAo6AGjdujUUCgWOHTsGFxcXxMbGwt3dXSroAKBz587IyspCfHw82rdvj9jYWHh7e0Mul6vFTJgwAdeuXYOzs3OR25GVlYWsrCxpOSMjo0z2DxFVDKNGjSqy/fvvv8epU6fecDZEVJVp9PLriyiVSgCAra2tWrutra20TqlUwsDAABYWFi+MsbGxKdS/jY2NWszzn2NhYQEDA4MXxhQsF8QUJTw8XLqXT6FQwNHR8cUbTkRaoUuXLti2bZum0yCiKqTCFnUFnr+sKYR46aXO52OKii+LmIJJEi/KZ8KECVCpVNLr5s2bL8ydiLTDL7/8AktLS02nQURVSIW9/GpnZwfg6Vkwe3t7qT0tLU06Q2ZnZ4fs7Gykp6erna1LS0tDmzZtpJjbt28X6v/OnTtq/Rw/flxtfXp6OnJyctRinj8jl5aWBqDw2cRnyeVytUu2RKRdmjVrpvYPOyEElEol7ty5g6VLl2owMyKqairsmTpnZ2fY2dkhOjpaasvOzsaRI0ekgs3T0xP6+vpqMampqUhMTJRivLy8oFKpcOLECSnm+PHjUKlUajGJiYlITU2VYqKioiCXy+Hp6SnFHD16VO0xJ1FRUXBwcEDt2rXLfgcQUaXQs2dP9OjRQ3r16tULU6ZMQWJiIgYPHqzp9IioCtHombqHDx/in3/+kZaTk5ORkJAAS0tL1KpVC6NHj8bMmTNRv3591K9fHzNnzoSxsTECAwMBAAqFAgMHDsTYsWNhZWUFS0tLjBs3Dh4eHtJsWFdXV/j7+2PQoEH44YcfAACDBw9GQEAAXFxcAAB+fn5wc3NDUFAQ5s6di3v37mHcuHEYNGgQzM3NATx9LMrUqVMRHByMr7/+GpcvX8bMmTMxefJkznwlqsKmTJmi6RSIiABouKg7deoU2rdvLy2HhoYCAPr37481a9Zg/PjxePz4MYYNG4b09HS0atUKUVFRMDMzk96zYMEC6OnpoXfv3nj8+DE6duyINWvWQFdXV4rZuHEjQkJCpFmy3bt3V3s2nq6uLvbu3Ythw4ahbdu2MDIyQmBgIObNmyfFKBQKREdHY/jw4WjRogUsLCwQGhoq5UxERESkSTLBn0R4ozIyMqBQKKBSqaSzgEVRZQN/3AGO3gYyct5gglWEuT7wji3wdnVAYVB2/fK4lb+SHruSftdelY6OzkvP0stkMuTm5pb5Z1cEHMsqDo5nlVdZj2cVdqIEEVFFtmPHjmLXHTt2DIsXL+bPCBLRG8WijojoFfTo0aNQ299//40JEyZg9+7d6Nu3L6ZPn66BzIioqqqws1+JiCqLW7duYdCgQWjcuDFyc3ORkJCAtWvXolatWppOjYiqEBZ1RESvSKVS4csvv0S9evVw/vx5HDhwALt374a7u7umUyOiKoiXX4mIXsGcOXMwe/Zs2NnZYfPmzUVejiUiepNY1BERvYKvvvoKRkZGqFevHtauXYu1a9cWGbd9+/Y3nBkRVVUs6oiIXkG/fv344HEiqlBY1BERvYI1a9ZoOgUiIjWcKEFERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RUQURHh6Oli1bwszMDDY2NujZsycuXryoFiOEQFhYGBwcHGBkZAQfHx+cP39eLSYrKwsjR46EtbU1TExM0L17d6SkpKjFpKenIygoCAqFAgqFAkFBQbh//355byIRlSMWdUREFcSRI0cwfPhwxMXFITo6Grm5ufDz80NmZqYUM2fOHERERGDJkiU4efIk7Ozs4OvriwcPHkgxo0ePxo4dO7BlyxbExMTg4cOHCAgIQF5enhQTGBiIhIQEREZGIjIyEgkJCQgKCnqj20tEZUtP0wkQEdFTkZGRasurV6+GjY0N4uPj8c4770AIgYULF2LixIno1asXAGDt2rWwtbXFpk2bMGTIEKhUKqxcuRLr169Hp06dAAAbNmyAo6Mj9u/fj86dOyMpKQmRkZGIi4tDq1atAAArVqyAl5cXLl68CBcXl0K5ZWVlISsrS1rOyMgor91ARK+IZ+qIiCoolUoFALC0tAQAJCcnQ6lUws/PT4qRy+Xw9vbGsWPHAADx8fHIyclRi3FwcIC7u7sUExsbC4VCIRV0ANC6dWsoFAop5nnh4eHSpVqFQgFHR8ey3Vgiem0s6oiIKiAhBEJDQ9GuXTu4u7sDAJRKJQDA1tZWLdbW1lZap1QqYWBgAAsLixfG2NjYFPpMGxsbKeZ5EyZMgEqlkl43b958vQ0kojLHy69ERBXQiBEjcPbsWcTExBRaJ5PJ1JaFEIXanvd8TFHxL+pHLpdDLpeXJHUi0hCeqSMiqmBGjhyJXbt24dChQ6hZs6bUbmdnBwCFzqalpaVJZ+/s7OyQnZ2N9PT0F8bcvn270OfeuXOn0FlAIqo8WNQREVUQQgiMGDEC27dvx8GDB+Hs7Ky23tnZGXZ2doiOjpbasrOzceTIEbRp0wYA4OnpCX19fbWY1NRUJCYmSjFeXl5QqVQ4ceKEFHP8+HGoVCophogqH15+JSKqIIYPH45Nmzbh119/hZmZmXRGTqFQwMjICDKZDKNHj8bMmTNRv3591K9fHzNnzoSxsTECAwOl2IEDB2Ls2LGwsrKCpaUlxo0bBw8PD2k2rKurK/z9/TFo0CD88MMPAIDBgwcjICCgyJmvRFQ5sKgjIqogli1bBgDw8fFRa1+9ejWCg4MBAOPHj8fjx48xbNgwpKeno1WrVoiKioKZmZkUv2DBAujp6aF37954/PgxOnbsiDVr1kBXV1eK2bhxI0JCQqRZst27d8eSJUvKdwOJqFyxqCMiqiCEEC+NkclkCAsLQ1hYWLExhoaGWLx4MRYvXlxsjKWlJTZs2PAqaRJRBcV76oiIiIi0AIs6IiIiIi3Aoo6IiIhIC7CoIyIiItICLOqIiIiItACLOiIiIiItwKKOiIiISAuwqCMiIiLSAizqiIiIiLQAizoiIiIiLcCijoiIiEgLVOiiLiwsDDKZTO1lZ2cnrRdCICwsDA4ODjAyMoKPjw/Onz+v1kdWVhZGjhwJa2trmJiYoHv37khJSVGLSU9PR1BQEBQKBRQKBYKCgnD//n21mBs3bqBbt24wMTGBtbU1QkJCkJ2dXW7bTkRERFQaFbqoA4BGjRohNTVVep07d05aN2fOHERERGDJkiU4efIk7Ozs4OvriwcPHkgxo0ePxo4dO7BlyxbExMTg4cOHCAgIQF5enhQTGBiIhIQEREZGIjIyEgkJCQgKCpLW5+XloWvXrsjMzERMTAy2bNmCbdu2YezYsW9mJxARERG9hJ6mE3gZPT09tbNzBYQQWLhwISZOnIhevXoBANauXQtbW1ts2rQJQ4YMgUqlwsqVK7F+/Xp06tQJALBhwwY4Ojpi//796Ny5M5KSkhAZGYm4uDi0atUKALBixQp4eXnh4sWLcHFxQVRUFC5cuICbN2/CwcEBADB//nwEBwdjxowZMDc3f0N7g4iIiKhoFf5M3eXLl+Hg4ABnZ2d89NFHuHr1KgAgOTkZSqUSfn5+UqxcLoe3tzeOHTsGAIiPj0dOTo5ajIODA9zd3aWY2NhYKBQKqaADgNatW0OhUKjFuLu7SwUdAHTu3BlZWVmIj49/Yf5ZWVnIyMhQexERERGVtQpd1LVq1Qrr1q3Dvn37sGLFCiiVSrRp0wZ3796FUqkEANja2qq9x9bWVlqnVCphYGAACwuLF8bY2NgU+mwbGxu1mOc/x8LCAgYGBlJMccLDw6V79RQKBRwdHUuxB4iIiIhKpkIXdV26dMH7778PDw8PdOrUCXv37gXw9DJrAZlMpvYeIUShtuc9H1NU/KvEFGXChAlQqVTS6+bNmy+MJyIiInoVFbqoe56JiQk8PDxw+fJl6T6758+UpaWlSWfV7OzskJ2djfT09BfG3L59u9Bn3blzRy3m+c9JT09HTk5OoTN4z5PL5TA3N1d7EREREZW1SlXUZWVlISkpCfb29nB2doadnR2io6Ol9dnZ2Thy5AjatGkDAPD09IS+vr5aTGpqKhITE6UYLy8vqFQqnDhxQoo5fvw4VCqVWkxiYiJSU1OlmKioKMjlcnh6epbrNhMRERGVRIWe/Tpu3Dh069YNtWrVQlpaGr799ltkZGSgf//+kMlkGD16NGbOnIn69eujfv36mDlzJoyNjREYGAgAUCgUGDhwIMaOHQsrKytYWlpi3Lhx0uVcAHB1dYW/vz8GDRqEH374AQAwePBgBAQEwMXFBQDg5+cHNzc3BAUFYe7cubh37x7GjRuHQYMG8cwbERERVQgVuqhLSUnBxx9/jP/++w/Vq1dH69atERcXBycnJwDA+PHj8fjxYwwbNgzp6elo1aoVoqKiYGZmJvWxYMEC6OnpoXfv3nj8+DE6duyINWvWQFdXV4rZuHEjQkJCpFmy3bt3x5IlS6T1urq62Lt3L4YNG4a2bdvCyMgIgYGBmDdv3hvaE0REREQvJhNCCE0nUZVkZGRAoVBApVK98CyfKhv44w5w9DaQkfMGE6wizPWBd2yBt6sDCoOy65fHrfyV9NiV9LtGr4ZjWcXB8azyKuvxrFLdU0dERERERWNRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBR9wqWLl0KZ2dnGBoawtPTE3/88YemUyIiKjWOZUTahUVdKW3duhWjR4/GxIkTcebMGbz99tvo0qULbty4oenUiIhKjGMZkfZhUVdKERERGDhwID777DO4urpi4cKFcHR0xLJlyzSdGhFRiXEsI9I+eppOoDLJzs5GfHw8vvrqK7V2Pz8/HDt2rMj3ZGVlISsrS1pWqVQAgIyMjBd+VkY28OgBoP8EkOe+ZuJUiH7e0/2bIQdkBmXXL49b+SvpsSv4jgkh3lBmlQfHMu3C8azyKuvxjEVdKfz333/Iy8uDra2tWrutrS2USmWR7wkPD8fUqVMLtTs6OpZLjkSk7sGDB1AoFJpOo0LhWEZUOb1sPGNR9wpkMpnashCiUFuBCRMmIDQ0VFrOz8/HvXv3YGVlVex7KqOMjAw4Ojri5s2bMDc313Q6VAraeuyEEHjw4AEcHBw0nUqFxbGsMG39PlQF2nzsSjqesagrBWtra+jq6hb6l2xaWlqhf/EWkMvlkMvlam3VqlUrrxQ1ztzcXOu+TFWFNh47nqErGseyl9PG70NVoa3HriTjGSdKlIKBgQE8PT0RHR2t1h4dHY02bdpoKCsiotLhWEaknXimrpRCQ0MRFBSEFi1awMvLCz/++CNu3LiBzz//XNOpERGVGMcyIu3Doq6U+vTpg7t372LatGlITU2Fu7s7fvvtNzg5OWk6NY2Sy+WYMmVKocszVPHx2FVNHMuKxu9D5cVjB8gE5/sTERERVXq8p46IiIhIC7CoIyIiItICLOqIiIiItACLOi3l4+OD0aNHa+SzZTIZdu7c+crvDwsLQ9OmTcssH3pqzZo1Wv1cMdJeHM/oeRzPisaijspcamoqunTpUqLYogbMcePG4cCBA+WQWdXWp08fXLp0SdNpEFUqHM8qJo5nReMjTajMZGdnw8DAAHZ2dq/Vj6mpKUxNTcsoq6pBCIG8vDzo6RX/lTYyMoKRkdEbzIqo8uJ4pjkcz14dz9Rpsfz8fIwfPx6Wlpaws7NDWFiYtE6lUmHw4MGwsbGBubk5OnTogL/++ktaHxwcjJ49e6r1N3r0aPj4+EjLPj4+GDFiBEJDQ2FtbQ1fX18A6v9azc7OxogRI2Bvbw9DQ0PUrl0b4eHhAIDatWsDAN577z3IZDJpuajLFatWrUKjRo0gl8thb2+PESNGvPb+0aSCfTdixAhUq1YNVlZW+Oabb1DwhKENGzagRYsWMDMzg52dHQIDA5GWlia9//Dhw5DJZNi3bx9atGgBuVyOP/74A3/99Rfat28PMzMzmJubw9PTE6dOnQJQ+HJFwX5etWoVatWqBVNTUwwdOhR5eXmYM2cO7OzsYGNjgxkzZqjlfuPGDfTo0QOmpqYwNzdH7969cfv27UL9rl+/HrVr14ZCocBHH32EBw8eAADWrVsHKysrZGVlqfX7/vvvo1+/fmW6n0l7cDyruDieVZzxjEWdFlu7di1MTExw/PhxzJkzB9OmTUN0dDSEEOjatSuUSiV+++03xMfHo3nz5ujYsSPu3btX6s/Q09PDn3/+iR9++KHQ+kWLFmHXrl343//+h4sXL2LDhg3SYHfy5EkAwOrVq5GamiotP2/ZsmUYPnw4Bg8ejHPnzmHXrl2oV69e6XZGBVSw744fP45FixZhwYIF+OmnnwA8/Z/H9OnT8ddff2Hnzp1ITk5GcHBwoT7Gjx+P8PBwJCUloXHjxujbty9q1qyJkydPIj4+Hl999RX09fWLzeHKlSv4/fffERkZic2bN2PVqlXo2rUrUlJScOTIEcyePRvffPMN4uLiADz9F3TPnj1x7949HDlyBNHR0bhy5Qr69OlTqN+dO3diz5492LNnD44cOYJZs2YBAD788EPk5eVh165dUvx///2HPXv2YMCAAa+7W0lLcTyr2DieVZDxTJBW8vb2Fu3atVNra9mypfjyyy/FgQMHhLm5uXjy5Ina+rp164offvhBCCFE//79RY8ePdTWjxo1Snh7e6t9RtOmTQt9NgCxY8cOIYQQI0eOFB06dBD5+flF5vlsbIEpU6aIJk2aSMsODg5i4sSJL9jaysfb21u4urqq7Zcvv/xSuLq6Fhl/4sQJAUA8ePBACCHEoUOHBACxc+dOtTgzMzOxZs2aIvtYvXq1UCgU0vKUKVOEsbGxyMjIkNo6d+4sateuLfLy8qQ2FxcXER4eLoQQIioqSujq6oobN25I68+fPy8AiBMnThTb7xdffCFatWolLQ8dOlR06dJFWl64cKGoU6dOsX8nVLVxPKvYOJ5VnPGMZ+q0WOPGjdWW7e3tkZaWhvj4eDx8+BBWVlbS/R6mpqZITk7GlStXSvUZLVq0eOH64OBgJCQkwMXFBSEhIYiKiipV/2lpabh16xY6duxYqvdVBq1bt4ZMJpOWvby8cPnyZeTl5eHMmTPo0aMHnJycYGZmJl0munHjhlofz+//0NBQfPbZZ+jUqRNmzZr10uNZu3ZtmJmZScu2trZwc3ODjo6OWlvBpZKkpCQ4OjrC0dFRWu/m5oZq1aohKSmp2H4L/vYKDBo0CFFRUfj3338BPD27ERwcrLY/iJ7F8axi43hWMcYzFnVa7PnT1DKZDPn5+cjPz4e9vT0SEhLUXhcvXsQXX3wBANDR0ZHuhyiQk5NT6DNMTExemEPz5s2RnJyM6dOn4/Hjx+jduzc++OCDEm9DVbwR9smTJ/Dz84OpqSk2bNiAkydPYseOHQCeXsZ41vP7PywsDOfPn0fXrl1x8OBBuLm5Se8tSlF/I8X93QBPL1cUNVA93/6iPgCgWbNmaNKkCdatW4fTp0/j3LlzRV6OISrA8axy4nj2ZnH2axXUvHlzKJVK6OnpSfeDPK969epITExUa0tISHjh/QzFMTc3R58+fdCnTx988MEH8Pf3x71792BpaQl9fX3k5eUV+14zMzPUrl0bBw4cQPv27Uv92RVZwX0dzy7Xr18ff//9N/777z/MmjVL+hdkwc3BJdGgQQM0aNAAY8aMwccff4zVq1fjvffeK5Oc3dzccOPGDdy8eVPK7cKFC1CpVHB1dS1VX5999hkWLFiAf//9F506dVL71zJRSXE8qxg4nlWM8Yxn6qqg/9fe/cdEWcdxAH8fdKdwd1gCHgYmAwYhCwwOis4h1DmqEV1Z6nLhBunMJJyGLaMgdQUTDEUp5wTHH6ljo0XtWmbK+kEOSpiu8eOm0rJgTTpDhPKET384n3V4CVwqeLxf2233fJ/n+Tzf52G873v3PPec2WxGcnIyLBYLvvjiC3R1daGxsREFBQXKP9ujjz6KH374ATU1NbDZbCgsLLwuFMfi/fffx8GDB9He3o7Ozk7U1tYiKChI+dbStYDr6emB3W53WaOoqAhlZWXYuXMnbDYbTpw4gYqKCrf3f7L45ZdfsH79enR0dODAgQOoqKhAXl4e7rvvPmg0GlRUVODMmTOor6/Hli1bRq03ODiItWvXoqGhAT///DO+++47NDc3jzucbsRsNisXMJ84cQJNTU3IysrCwoULRz11NdLy5cvx66+/Yu/evcjOzr5pfaSphXk2OTDPJkeecVA3BalUKlitVqSkpCA7OxuRkZFYtmwZurq6YDAYAADp6el46623sHHjRiQmJuLixYtufT1bp9OhpKQERqMRiYmJ6OrqgtVqVa5xKCsrw5dffok5c+bgwQcfdFljxYoVKC8vR2VlJWJiYpCRkQGbzeb+AZgksrKyMDg4iKSkJLzyyivIzc3FqlWrEBgYiP3796O2thbz5s1DcXExSktLR63n7e2N3t5eZGVlITIyEkuWLMETTzyBd95556b1+drtHe655x6kpKTAbDYjLCwMhw4dGnctPz8/LF68GDqd7rrbTRCNFfNscmCeTY48U8nICw2I6JZLTU3F/PnzUV5ePtFdmVCLFi1CdHQ0du7cOdFdISI3Mc+umgx5xmvqiOi2++OPP3D48GEcPXoUu3btmujuEBG5bTLlGQd1RHTbxcfHw263o6SkBFFRURPdHSIit02mPOPpVyIiIiIPwC9KEBEREXkADuqIiIiIPAAHdUREREQegIM6IiIiIg/AQR0RERGRB+Cgjjza/v37lZ/w8STX7oR+s6WmpmLdunU3vS4R/X/Ms/GZinnGQR25raenB3l5eYiIiMD06dNhMBiwYMECfPjhhxgYGJjo7gEAli5dis7OzonuhtuKioowf/78ie4Gkcdjnt16zLNbjzcfJrecOXMGJpMJd999N95991088MADuHLlCjo7O1FVVYV7770XmZmZLtd1OBxQq9W3pZ8+Pj7w8fG5LdsiojsT84w8hhC5IT09XUJCQqS/v9/l/OHhYeU5APnggw8kMzNTfH195e233xYRkcrKSgkLCxO1Wi2RkZFSU1OjrHP27FkBIC0tLUqb3W4XAHLs2DERETl27JgAkM8++0xiY2Nl2rRpkpSUJCdPnlTWqa6ulhkzZijThYWFEhcXJzU1NTJ37lzx8/OTpUuXSl9fn7JMX1+fvPDCC+Lr6ytBQUGyfft2WbhwoeTl5f3n8bhWd9++fTJnzhzRarWyevVquXLlipSUlIjBYJDAwEDZunWr03oXLlyQlStXSmBgoOj1eklLS5PW1lal7wCcHtXV1cox3bt3r1gsFvHx8ZGIiAj55JNPnGo3NDRIYmKiaDQaCQoKktdff10cDocyv7+/X1588UXRarUSFBQkpaWlo+4nkSdinjljnt25OKijcTt//ryoVCp57733xrQ8AJk1a5bs27dPTp8+LV1dXVJXVydqtVp2794tHR0dUlZWJt7e3nL06FERGV8IRkdHy+HDh+XkyZOSkZEhoaGhcvnyZRFxHYI6nU6effZZOXXqlHz99dcSFBQkmzZtUpZ56aWXZO7cuXLkyBE5deqUPPPMM6LX60cNQZ1OJ88995z89NNPUl9fLxqNRtLT0yU3N1fa29ulqqpKAMj3338vIldfKEwmkzz11FPS3NwsnZ2dsmHDBvH395fe3l4ZGBiQDRs2SExMjHR3d0t3d7cMDAwoxzQkJEQ++ugjsdls8uqrr4pOp5Pe3l4RETl37pz4+vrKmjVrpK2tTT7++GMJCAiQwsJCpc8vv/yyhISEOB07nU435UKQpjbm2fWYZ3cuDupo3I4fPy4ApK6uzqnd399ftFqtaLVa2bhxo9IOQNatW+e07COPPCIrV650anv++eflySefFJHxheDBgweVZXp7e8XHx0cOHTokIq5D0NfX1+mdbH5+vjz00EMicvVdrVqtltraWmX+hQsXxNfXd9QQHFk3PT1dQkNDZWhoSGmLiopSXjy++uor8fPzk7/++supVnh4uOzZs0epGxcXd932AEhBQYEy3d/fLyqVSj7//HMREdm0aZNERUU5fcKwe/du0el0MjQ0JBcvXhSNRuPy2E21EKSpjXl2PebZnYvX1JHbVCqV03RTUxOGh4exfPly/P33307zjEaj03RbWxtWrVrl1GYymbBjx45x9yM5OVl5PnPmTERFRaGtre0/lw8NDYVer1emZ8+ejd9//x3A1WtrHA4HkpKSlPkzZswY0480j6xrMBjg7e0NLy8vp7Zr2/rxxx/R398Pf39/pzqDg4M4ffr0qNuLjY1Vnmu1Wuj1eqV2W1sbkpOTnf5GJpMJ/f39OHfuHOx2Oy5fvuzy2BFNRcyzG9dlnt0ZOKijcYuIiIBKpUJ7e7tTe1hYGAC4vJBXq9Ve1zYyREVEabsWHCKizHc4HGPu48ja/zbyomaVSoXh4WGn7bnq22hc1b3RtoaHhzF79mw0NDRcV2ssty0YbT/+ax9UKtWY9odoKmCejb0u82zy4y1NaNz8/f2xaNEi7Nq1C5cuXXKrRnR0NL799luntsbGRkRHRwMAAgMDAQDd3d3K/NbWVpe1jh8/rjy32+3o7OzE/fff71a/wsPDoVar0dTUpLT19fXBZrO5Ve9G4uPj0dPTg7vuugsRERFOj4CAAACARqPB0NDQuGvPmzcPjY2NTmHX2NgIvV6P4OBgREREQK1Wuzx2RFMJ8+zmYJ5NDvykjtxSWVkJk8kEo9GIoqIixMbGwsvLC83NzWhvb0dCQsIN18/Pz8eSJUsQHx+Pxx57DJ9++inq6upw5MgRAFffHT/88MMoLi5GaGgozp8/j4KCApe1Nm/eDH9/fxgMBrz55psICAiAxWJxa7/0ej1WrFiB/Px8zJw5E7NmzUJhYSG8vLxu+G7ZHWazGcnJybBYLCgpKUFUVBR+++03WK1WWCwWGI1GhIaG4uzZs2htbUVISAj0ej2mTZs2au01a9agvLwcubm5WLt2LTo6OlBYWIj169fDy8sLOp0OOTk5yM/Pdzp2/z61QjRVMM/+P+bZ5DD19phuivDwcLS0tMBsNuONN95AXFwcjEYjKioq8Nprr2HLli03XN9isWDHjh3Ytm0bYmJisGfPHlRXVyM1NVVZpqqqCg6HA0ajEXl5edi6davLWsXFxcjLy0NCQgK6u7tRX18PjUbj9r5t374dycnJyMjIgNlshslkQnR0NKZPn+52TVdUKhWsVitSUlKQnZ2NyMhILFu2DF1dXTAYDACAxYsX4/HHH0daWhoCAwNx4MCBMdUODg6G1WpFU1MT4uLisHr1auTk5Di9kGzbtg0pKSnIzMyE2WzGggULRn3xIvJEzLP/j3k2OaiEJ6PpDtXQ0IC0tDTY7fZb+tM5ly5dQnBwMMrKypCTk3PLtkNEUxfzjG4Gnn4lGqGlpQXt7e1ISkrCn3/+ic2bNwMAnn766QnuGRHR+DDPphYO6ohcKC0tRUdHBzQaDRISEvDNN98oF/sSEd1JmGdTB0+/EhEREXkAflGCiIiIyANwUEdERETkATioIyIiIvIAHNQREREReQAO6oiIiIg8AAd1RERERB6AgzoiIiIiD8BBHREREZEH+AfQyCmXwKOMJQAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# visualize comparison between heuristic and parsimony grouping\n", + "import matplotlib.pyplot as plt\n", + "from alphadia.plotting.utils import lighten_color \n", + "\n", + "def basic_barplot(\n", + " values : list,\n", + " names : list,\n", + " title : str,\n", + " xlabel : str,\n", + " ylabel : str,\n", + " color_hex : str,\n", + " hwspace : float = 0.4,\n", + "):\n", + " \"\"\"rudimentary visualization function to plot barplot\"\"\"\n", + "\n", + " panels = len(values)\n", + " fig, axs = plt.subplots(1, panels, figsize = (panels * 3, 5))\n", + " plt.tight_layout()\n", + " axs = axs.flatten()\n", + " for i, ax in enumerate(axs):\n", + " heights = [len(v) for v in values[i]]\n", + " bars = ax.bar(names[i], heights, color = color_hex[i])\n", + " for b in bars:\n", + " b.set_edgecolor(lighten_color(b.get_facecolor(), 0.5))\n", + " b.set_facecolor(color_hex[i])\n", + " ax.text(b.get_x() + (b.get_width() / 2), b.get_height(), str(int(b.get_height())), ha = 'center', va = 'bottom')\n", + "\n", + " ax.set_title(title[i])\n", + " ax.set_xlabel(xlabel[i])\n", + " ax.set_ylabel(ylabel[i])\n", + "\n", + " plt.subplots_adjust(wspace = hwspace)\n", + "\n", + " return fig, axs\n", + "\n", + "# Confirm that both methods have (nearly) the same number of precursors\n", + "pe_plot, _ = basic_barplot(\n", + " [[data_tables[\"pe_heuristic\"], data_tables[\"pe_parsimony\"]], [data_tables[\"pg_heuristic\"], data_tables[\"pg_parsimony\"]]],\n", + " [[\"heuristic\", \"parsimony\"], [\"heuristic\", \"parsimony\"]],\n", + " [\"Number of peptides \\nper grouping method\", \"Number of protein groups \\nper grouping method\"],\n", + " [\"Grouping method\", \"Grouping method\"],\n", + " [\"Number of peptides\", \"Number of protein groups\"],\n", + " [\"#67c8ff\", \"#67c8ff\"],\n", + ")\n", + "\n", + "# show plots next to each other in panel\n", + "layout = []\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Comparison of maximum_parsimony and heuristic grouping:" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(7929, 4)" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_tables[\"pg_parsimony\"].shape" + ] } ], "metadata": { From c5876c58a42fd4805afbd74976a2c414f7947602 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Sat, 25 May 2024 01:21:47 +0200 Subject: [PATCH 13/48] FIX transferlearning bug --- alphadia/outputaccumulator.py | 7 +++++++ alphadia/workflow/peptidecentric.py | 12 ++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/alphadia/outputaccumulator.py b/alphadia/outputaccumulator.py index 26c0a6fb..5328acab 100644 --- a/alphadia/outputaccumulator.py +++ b/alphadia/outputaccumulator.py @@ -100,6 +100,7 @@ def parse_output_folder( "mods", "mod_sites", "proba", + "decoy", ], ) -> Tuple[pd.DataFrame, pd.DataFrame]: """ @@ -132,10 +133,16 @@ def parse_output_folder( psm_df = psm_df[selected_precursor_columns] # validate.precursors_flat_from_output(psm_df) + # remove decoy precursors + psm_df = psm_df[psm_df["decoy"] == 0] + self._precursor_df = pd.DataFrame() for col in psm_df.columns: self._precursor_df[col] = psm_df[col] + self._precursor_df["decoy"] = self._precursor_df["decoy"].astype(int) + self._precursor_df = psm_df[psm_df["decoy"] == 0].reset_index(drop=True) + # self._precursor_df.set_index('precursor_idx', inplace=True) # Change the data type of the mods column to string self._precursor_df["mods"] = self._precursor_df["mods"].astype(str) diff --git a/alphadia/workflow/peptidecentric.py b/alphadia/workflow/peptidecentric.py index 2efe864f..ad438625 100644 --- a/alphadia/workflow/peptidecentric.py +++ b/alphadia/workflow/peptidecentric.py @@ -1109,6 +1109,9 @@ def _build_candidate_speclib_flat( "mod_sites", "sequence", "charge", + "rt_observed", + "mobility_observed", + "mz_observed", ], ) -> typing.Tuple[SpecLibFlat, pd.DataFrame]: """Build a candidate spectral library for transfer learning. @@ -1141,6 +1144,7 @@ def _build_candidate_speclib_flat( "mod_sites", "sequence", "charge", + "rt_observed", "mobility_observed", "mz_observed" ] Returns @@ -1152,13 +1156,13 @@ def _build_candidate_speclib_flat( Dataframe with scored candidates """ # remove decoys - psm_df = psm_df[psm_df["decoy"] == 0] + # psm_df = psm_df[psm_df["decoy"] == 0] - for col in ["rt_observed", "mobility_observed", "mz_observed"]: - optional_columns += [col] if col in psm_df.columns else [] + # make copy to avoid modifying the original dataframe + _optional_columns = [col for col in optional_columns if col in psm_df.columns] scored_candidates = plexscoring.candidate_features_to_candidates( - psm_df, optional_columns=optional_columns + psm_df, optional_columns=_optional_columns ) # create speclib with fragment_types of interest From 7b06333779c5fd5acd7688652d503962fb514391 Mon Sep 17 00:00:00 2001 From: Vincenth Brennsteiner Date: Sat, 25 May 2024 15:26:01 +0200 Subject: [PATCH 14/48] changed grouping.py docstring to numpy format, moved data to testdata, stripped notebook outputs, adapted grouping.py parsimony grouping argument. --- .gitignore | 1 - alphadia/grouping.py | 43 +- .../protein_grouping_tutorial.ipynb | 1346 +---------------- 3 files changed, 30 insertions(+), 1360 deletions(-) diff --git a/.gitignore b/.gitignore index 172f9b21..83dfd498 100644 --- a/.gitignore +++ b/.gitignore @@ -139,7 +139,6 @@ dmypy.json # Data testdata/ -nbs/debug/dev_grouping_comparison_data/ ###################### # OS generated files # diff --git a/alphadia/grouping.py b/alphadia/grouping.py index 5d64d499..1810c4af 100644 --- a/alphadia/grouping.py +++ b/alphadia/grouping.py @@ -14,21 +14,21 @@ def group_and_parsimony( precursor_idx: NDArray[np.int64], precursor_ids: NDArray[Any], - return_groups: bool = False, + return_parsimony_groups: bool = False, ): """Function to group ids based on precursor indices and return groups & master ids as lists - Args: - precursor_idx (np.array[int]): array containing unique integer indices corresponding - to each peptide precursor - precursor_ids (np.array[str]): array of variable length semicolon separated str belonging - to a given peptide precursor id + Parameters + ---------- + precursor_idx : np.array[int] + Array containing unique integer indices corresponding to each peptide precursor + precursor_ids : np.array[str] + Array of variable length semicolon separated str belonging to a given peptide precursor id Returns - ids (list[str]): list of ids linked to a given peptide precursor, such that each - precursor only belongs to one id. This list is ordered by precursor_idx. - groups (list[str]): list of semicolon separated ids belonging to a given peptide precursor, - such that each precursor only belongs to one group. This list is ordered by precursor_idx. + ------- + tuple + Tuple containing two lists: ids and groups. Each list is ordered by precursor_idx """ @@ -64,7 +64,7 @@ def group_and_parsimony( id_dict[subject_protein] = new_subject_set # With the following lines commented out, the query will only eliminate peptides from # respective subject proteins, but we will not add them to the query group - if return_groups and len(new_subject_set) == 0: + if return_parsimony_groups and len(new_subject_set) == 0: query_group.append(subject_protein) # save query to output lists @@ -112,14 +112,21 @@ def perform_grouping( ): """Highest level function for grouping proteins in precursor table - Args: - psm (pd.DataFrame) : Precursor table with columns "precursor_idx" and protein & decoy columns. - gene_or_protein (str, optional) : Column to group proteins by. Defaults to "proteins". - decoy_column (str, optional) : Column to use for decoy annotation. Defaults to "decoy". - group (bool, optional) : Whether to group proteins. Defaults to True. + Parameters + ---------- + psm : pd.DataFrame + Precursor table with columns "precursor_idx" and protein & decoy columns. + gene_or_protein : str + Column to group proteins by. Defaults to "proteins". + decoy_column : str + Column to use for decoy annotation. Defaults to "decoy". + group : bool + Whether to group proteins. Defaults to True. - Returns: - pd.DataFrame: Precursor table with grouped proteins + Returns + ------- + pd.DataFrame : + Precursor table with grouped proteins """ diff --git a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb index 780d4fb3..923de778 100644 --- a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb +++ b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb @@ -25,7 +25,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -40,152 +40,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
precursor_idxproteinsgenesdecoypg_masterpg
00P1;P2;P3;P4P1;P2;P3;P40P1P1;P4
10P1;P2;P3;P4P1;P2;P3;P40P1P1;P4
21P1;P2P1;P20P1P1
31P1;P2P1;P20P1P1
42P4P40P4P4
52P4P41P4P4
63P4;P5P4;P51P4P4
73P4;P5P4;P51P4P4
84P6P61P6P6
94P6P61P6P6
\n", - "
" - ], - "text/plain": [ - " precursor_idx proteins genes decoy pg_master pg\n", - "0 0 P1;P2;P3;P4 P1;P2;P3;P4 0 P1 P1;P4\n", - "1 0 P1;P2;P3;P4 P1;P2;P3;P4 0 P1 P1;P4\n", - "2 1 P1;P2 P1;P2 0 P1 P1\n", - "3 1 P1;P2 P1;P2 0 P1 P1\n", - "4 2 P4 P4 0 P4 P4\n", - "5 2 P4 P4 1 P4 P4\n", - "6 3 P4;P5 P4;P5 1 P4 P4\n", - "7 3 P4;P5 P4;P5 1 P4 P4\n", - "8 4 P6 P6 1 P6 P6\n", - "9 4 P6 P6 1 P6 P6" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# example data showcases the collapse of one precursor - multiple protein inputs to one precursor - one master protein (pg_master). All proteins that can be grouped according to\n", "precursor_idx = [0, 0, 1, 1, 2, 2, 3, 3, 4, 4]\n", @@ -254,1202 +111,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "distinct proteins\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
precursor_idxproteinsdecoy
01A0
12A0
23B0
34B0
\n", - "
" - ], - "text/plain": [ - " precursor_idx proteins decoy\n", - "0 1 A 0\n", - "1 2 A 0\n", - "2 3 B 0\n", - "3 4 B 0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
precursor_idxproteinsdecoypg_masterpg
01A0AA
12A0AA
23B0BB
34B0BB
\n", - "
" - ], - "text/plain": [ - " precursor_idx proteins decoy pg_master pg\n", - "0 1 A 0 A A\n", - "1 2 A 0 A A\n", - "2 3 B 0 B B\n", - "3 4 B 0 B B" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "differentiable proteins\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
precursor_idxproteinsdecoy
01A0
12A;B0
23A;B0
34B0
\n", - "
" - ], - "text/plain": [ - " precursor_idx proteins decoy\n", - "0 1 A 0\n", - "1 2 A;B 0\n", - "2 3 A;B 0\n", - "3 4 B 0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
precursor_idxproteinsdecoypg_masterpg
01A0AA
12A;B0AA;B
23A;B0AA;B
34B0BB
\n", - "
" - ], - "text/plain": [ - " precursor_idx proteins decoy pg_master pg\n", - "0 1 A 0 A A\n", - "1 2 A;B 0 A A;B\n", - "2 3 A;B 0 A A;B\n", - "3 4 B 0 B B" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "indistinguishable proteins\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
precursor_idxproteinsdecoy
01A;B0
12A;B0
23A;B0
34A;B0
\n", - "
" - ], - "text/plain": [ - " precursor_idx proteins decoy\n", - "0 1 A;B 0\n", - "1 2 A;B 0\n", - "2 3 A;B 0\n", - "3 4 A;B 0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
precursor_idxproteinsdecoypg_masterpg
01A;B0AA
12A;B0AA
23A;B0AA
34A;B0AA
\n", - "
" - ], - "text/plain": [ - " precursor_idx proteins decoy pg_master pg\n", - "0 1 A;B 0 A A\n", - "1 2 A;B 0 A A\n", - "2 3 A;B 0 A A\n", - "3 4 A;B 0 A A" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "subset proteins\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
precursor_idxproteinsdecoy
01A0
12A;B0
23A;B0
34A;B0
\n", - "
" - ], - "text/plain": [ - " precursor_idx proteins decoy\n", - "0 1 A 0\n", - "1 2 A;B 0\n", - "2 3 A;B 0\n", - "3 4 A;B 0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
precursor_idxproteinsdecoypg_masterpg
01A0AA
12A;B0AA
23A;B0AA
34A;B0AA
\n", - "
" - ], - "text/plain": [ - " precursor_idx proteins decoy pg_master pg\n", - "0 1 A 0 A A\n", - "1 2 A;B 0 A A\n", - "2 3 A;B 0 A A\n", - "3 4 A;B 0 A A" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "subsumable proteins\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
precursor_idxproteinsdecoy
01A0
12A;B0
23B;C0
34C0
\n", - "
" - ], - "text/plain": [ - " precursor_idx proteins decoy\n", - "0 1 A 0\n", - "1 2 A;B 0\n", - "2 3 B;C 0\n", - "3 4 C 0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
precursor_idxproteinsdecoypg_masterpg
01A0AA
12A;B0AA
23B;C0CC
34C0CC
\n", - "
" - ], - "text/plain": [ - " precursor_idx proteins decoy pg_master pg\n", - "0 1 A 0 A A\n", - "1 2 A;B 0 A A\n", - "2 3 B;C 0 C C\n", - "3 4 C 0 C C" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "shared only\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
precursor_idxproteinsdecoy
01A;B0
12A;B;C0
23A;B;C0
34A;C0
\n", - "
" - ], - "text/plain": [ - " precursor_idx proteins decoy\n", - "0 1 A;B 0\n", - "1 2 A;B;C 0\n", - "2 3 A;B;C 0\n", - "3 4 A;C 0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
precursor_idxproteinsdecoypg_masterpg
01A;B0AA
12A;B;C0AA
23A;B;C0AA
34A;C0AA
\n", - "
" - ], - "text/plain": [ - " precursor_idx proteins decoy pg_master pg\n", - "0 1 A;B 0 A A\n", - "1 2 A;B;C 0 A A\n", - "2 3 A;B;C 0 A A\n", - "3 4 A;C 0 A A" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "circular\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
precursor_idxproteinsdecoy
01A;B;C0
12B;C;D0
23C;D;E0
34D;E;A0
\n", - "
" - ], - "text/plain": [ - " precursor_idx proteins decoy\n", - "0 1 A;B;C 0\n", - "1 2 B;C;D 0\n", - "2 3 C;D;E 0\n", - "3 4 D;E;A 0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
precursor_idxproteinsdecoypg_masterpg
01A;B;C0CA;C
12B;C;D0CC
23C;D;E0CC
34D;E;A0AA
\n", - "
" - ], - "text/plain": [ - " precursor_idx proteins decoy pg_master pg\n", - "0 1 A;B;C 0 C A;C\n", - "1 2 B;C;D 0 C C\n", - "2 3 C;D;E 0 C C\n", - "3 4 D;E;A 0 A A" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "complex example\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
precursor_idxproteinsdecoy
00P1;P2;P3;P40
11P1;P40
22P20
33P2;P50
\n", - "
" - ], - "text/plain": [ - " precursor_idx proteins decoy\n", - "0 0 P1;P2;P3;P4 0\n", - "1 1 P1;P4 0\n", - "2 2 P2 0\n", - "3 3 P2;P5 0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
precursor_idxproteinsdecoypg_masterpg
00P1;P2;P3;P40P2P1;P2
11P1;P40P1P1
22P20P2P2
33P2;P50P2P2
\n", - "
" - ], - "text/plain": [ - " precursor_idx proteins decoy pg_master pg\n", - "0 0 P1;P2;P3;P4 0 P2 P1;P2\n", - "1 1 P1;P4 0 P1 P1\n", - "2 2 P2 0 P2 P2\n", - "3 3 P2;P5 0 P2 P2" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# test cases per Nesvizhskii, Alexey I., and Ruedi Aebersold. \"Interpretation of shotgun proteomic data.\" Molecular & cellular proteomics 4.10 (2005): 1419-1440. Figure 5 (see Appendix)\n", "def test_grouping():\n", From 4312a35c125a3b060057fbccf9c39163e7aa6156 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Wed, 29 May 2024 16:40:58 +0200 Subject: [PATCH 15/48] fix alphabase requirement --- requirements/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 80cfff2f..c543661d 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -5,7 +5,7 @@ numba==0.59.1 argparse==1.4.0 alpharaw==0.4.5 alphatims==1.0.8 -alphabase==1.2.3 +alphabase==1.2.4 peptdeep==1.1.9 progressbar==2.5 neptune==1.10.4 From b08d43ae0ae2d134f78f015c4bd2fdecb00278f1 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Fri, 7 Jun 2024 09:34:44 -0700 Subject: [PATCH 16/48] FIX fasta for Bruker --- alphadia/libtransform.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/alphadia/libtransform.py b/alphadia/libtransform.py index 46125a60..ba1e5045 100644 --- a/alphadia/libtransform.py +++ b/alphadia/libtransform.py @@ -660,7 +660,7 @@ def forward(self, input: SpecLibFlat) -> SpecLibFlat: "rt_norm_pred", "irt", ], - "mobility_library": ["mobility_library", "mobility"], + "mobility_library": ["mobility_library", "mobility", "mobility_pred"], } fragment_columns = { @@ -680,6 +680,7 @@ def forward(self, input: SpecLibFlat) -> SpecLibFlat: if "mobility_library" not in input.precursor_df.columns: input.precursor_df["mobility_library"] = 0 + logger.warning("Library contains no ion mobility annotations") validate.precursors_flat_schema(input.precursor_df) validate.fragments_flat_schema(input.fragment_df) From 1d3690951afb8972d059e31070b8d7fcbb1ff167 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Fri, 7 Jun 2024 09:41:22 -0700 Subject: [PATCH 17/48] update requirements --- requirements/requirements.txt | 2 +- requirements/requirements_loose.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index c543661d..c01d989c 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -6,7 +6,7 @@ argparse==1.4.0 alpharaw==0.4.5 alphatims==1.0.8 alphabase==1.2.4 -peptdeep==1.1.9 +peptdeep==1.2.1 progressbar==2.5 neptune==1.10.4 seaborn==0.13.2 diff --git a/requirements/requirements_loose.txt b/requirements/requirements_loose.txt index 70dd08af..488d24af 100644 --- a/requirements/requirements_loose.txt +++ b/requirements/requirements_loose.txt @@ -5,8 +5,8 @@ numba argparse alpharaw>=0.3.1 # test: tolerate_version alphatims -alphabase>=1.1.2 # test: tolerate_version -peptdeep +alphabase>=1.2.4 # test: tolerate_version +peptdeep>=1.2.1 # test: tolerate_version progressbar neptune seaborn From 2fd2fcf9bf9d867bb79c0c6a091961b10d48b0d2 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Mon, 10 Jun 2024 12:34:20 -0400 Subject: [PATCH 18/48] FEAT logging raw file stats --- alphadia/data/alpharaw.py | 5 +++++ alphadia/data/bruker.py | 6 +++--- alphadia/data/stats.py | 44 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 alphadia/data/stats.py diff --git a/alphadia/data/alpharaw.py b/alphadia/data/alpharaw.py index 31079c24..8eedc094 100644 --- a/alphadia/data/alpharaw.py +++ b/alphadia/data/alpharaw.py @@ -8,6 +8,8 @@ # alphadia imports from alphadia import utils +from alphadia.data.stats import log_stats + # alpha family imports from alpharaw import thermo as alpharawthermo from alpharaw import sciex as alpharawsciex @@ -339,6 +341,7 @@ def __init__(self, raw_file_path: str, process_count: int = 10, **kwargs): super().__init__(process_count=process_count) self.load_raw(raw_file_path) self.process_alpharaw(**kwargs) + log_stats(self.rt_values, self.cycle) class Sciex(AlphaRaw, alpharawsciex.SciexWiffData): @@ -346,6 +349,7 @@ def __init__(self, raw_file_path: str, process_count: int = 10, **kwargs): super().__init__(process_count=process_count) self.load_raw(raw_file_path) self.process_alpharaw(**kwargs) + log_stats(self.rt_values, self.cycle) class Thermo(AlphaRaw, alpharawthermo.ThermoRawData): @@ -353,6 +357,7 @@ def __init__(self, raw_file_path: str, process_count: int = 10, **kwargs): super().__init__(process_count=process_count) self.load_raw(raw_file_path) self.process_alpharaw(**kwargs) + log_stats(self.rt_values, self.cycle) def filter_spectra(self, cv: float = None, astral_ms1: bool = False, **kwargs): """ diff --git a/alphadia/data/bruker.py b/alphadia/data/bruker.py index 520dab27..2f003998 100644 --- a/alphadia/data/bruker.py +++ b/alphadia/data/bruker.py @@ -7,6 +7,7 @@ # alphadia imports from alphadia import utils +from alphadia.data.stats import log_stats # alpha family imports import alphatims.utils @@ -94,6 +95,7 @@ def __init__( # Precompile logger.info(f"Successfully imported data from {bruker_d_folder_name}") + log_stats(self.rt_values, self.cycle) def transpose(self): # abort if transposed data is already present @@ -655,9 +657,7 @@ def assemble_push( relative_precursor_index[i], relative_scan, relative_precursor, - ] = ( - accumulated_intensity + new_intensity - ) + ] = accumulated_intensity + new_intensity dense_output[ 1, j, diff --git a/alphadia/data/stats.py b/alphadia/data/stats.py new file mode 100644 index 00000000..6cabe37d --- /dev/null +++ b/alphadia/data/stats.py @@ -0,0 +1,44 @@ +import numpy as np +import logging + +logger = logging.getLogger() + + +def log_stats(rt_values: np.array, cycle: np.array): + """Log raw file statistics + + Parameters + ---------- + + rt_values: np.ndarray + retention time values in seconds for all frames + + cycle: np.ndarray + DIA cycle object describing the msms pattern + """ + + logger.info(f"============ Raw file stats ============") + + rt_limits = rt_values.min() / 60, rt_values.max() / 60 + rt_duration_sec = rt_values.max() - rt_values.min() + rt_duration_min = rt_duration_sec / 60 + + logger.info(f"{'RT (min)':<20}: {rt_limits[0]:.1f} - {rt_limits[1]:.1f}") + logger.info(f"{'RT duration (sec)':<20}: {rt_duration_sec:.1f}") + logger.info(f"{'RT duration (min)':<20}: {rt_duration_min:.1f}") + + cycle_length = cycle.shape[1] + cycle_duration = np.diff(rt_values[::cycle_length]).mean() + cycle_number = len(rt_values) // cycle_length + + logger.info(f"{'Cycle len (scans)':<20}: {cycle_length:.0f}") + logger.info(f"{'Cycle len (sec)':<20}: {cycle_duration:.2f}") + logger.info(f"{'Number of cycles':<20}: {cycle_number:.0f}") + + flat_cycle = cycle.flatten() + flat_cycle = flat_cycle[flat_cycle > 0] + msms_range = flat_cycle.min(), flat_cycle.max() + + logger.info(f"{'MS2 range (m/z)':<20}: {msms_range[0]:.1f} - {msms_range[1]:.1f}") + + logger.info(f"========================================") From 6bc923a522447c285a4d1e942783c0099890cba6 Mon Sep 17 00:00:00 2001 From: Vincenth Brennsteiner Date: Mon, 10 Jun 2024 18:49:32 +0200 Subject: [PATCH 19/48] MicroCommit: clean notebook --- nbs/tutorial_nbs/protein_grouping_tutorial.ipynb | 1 - 1 file changed, 1 deletion(-) diff --git a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb index 923de778..d9169f29 100644 --- a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb +++ b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb @@ -293,7 +293,6 @@ " \"pg\": [\"P1;P2\", \"P1\", \"P2\", \"P2\"],\n", " }\n", "\n", - "\n", "test_grouping()" ] } From 82301d79f52cd2ac58cbcd4497b6beb14a8b6e29 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Mon, 10 Jun 2024 12:49:32 -0400 Subject: [PATCH 20/48] FEAT set log level from config --- alphadia/planning.py | 8 ++++++++ alphadia/workflow/reporting.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/alphadia/planning.py b/alphadia/planning.py index 5f8ea39e..9c13f287 100644 --- a/alphadia/planning.py +++ b/alphadia/planning.py @@ -123,6 +123,14 @@ def __init__( if "output" not in self.config: self.config["output"] = output_folder + # set log level + level_to_set = self.config["general"]["log_level"] + level_code = logging.getLevelNamesMapping().get(level_to_set) + if level_code is None: + logger.error(f"Setting logging to unknown level {level_to_set}") + else: + logger.setLevel(level_code) + self.load_library() torch.set_num_threads(self.config["general"]["thread_count"]) diff --git a/alphadia/workflow/reporting.py b/alphadia/workflow/reporting.py index 21c2b1e5..63acf82f 100644 --- a/alphadia/workflow/reporting.py +++ b/alphadia/workflow/reporting.py @@ -25,7 +25,7 @@ # Add a new logging level to the default logger # This has to happen at load time to make the .progress() method available even if no logger is instantiated -PROGRESS_LEVELV_NUM = 100 +PROGRESS_LEVELV_NUM = 21 logging.PROGRESS = PROGRESS_LEVELV_NUM logging.addLevelName(PROGRESS_LEVELV_NUM, "PROGRESS") From fccc463374ee10df83abb00cdb42b9e6ca9f4db7 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Mon, 10 Jun 2024 15:04:51 -0400 Subject: [PATCH 21/48] pip no cache --- misc/pip_install.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/misc/pip_install.sh b/misc/pip_install.sh index d8ce0368..28dd4d5f 100644 --- a/misc/pip_install.sh +++ b/misc/pip_install.sh @@ -3,7 +3,7 @@ set -e -u INSTALL_TYPE=$1 # stable, loose, etc.. ENV_NAME=${2:-alphadia} -conda create -n $ENV_NAME python=3.9 -y +conda create -n $ENV_NAME python=3.11 -y if [ "$INSTALL_TYPE" = "loose" ]; then INSTALL_STRING="" @@ -12,5 +12,5 @@ else fi # conda 'run' vs. 'activate', cf. https://stackoverflow.com/a/72395091 -conda run -n $ENV_NAME --no-capture-output pip install -e "../.$INSTALL_STRING" +conda run -n $ENV_NAME --no-capture-output pip install --no-cache-dir -e "../.$INSTALL_STRING" conda run -n $ENV_NAME --no-capture-output alphadia -v From d59c6e03e475b97ea254e32b05d4f35423afc2e7 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Mon, 10 Jun 2024 15:05:05 -0400 Subject: [PATCH 22/48] pip no cache --- misc/pip_install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc/pip_install.sh b/misc/pip_install.sh index 28dd4d5f..196ed5ee 100644 --- a/misc/pip_install.sh +++ b/misc/pip_install.sh @@ -3,7 +3,7 @@ set -e -u INSTALL_TYPE=$1 # stable, loose, etc.. ENV_NAME=${2:-alphadia} -conda create -n $ENV_NAME python=3.11 -y +conda create -n $ENV_NAME python=3.9 -y if [ "$INSTALL_TYPE" = "loose" ]; then INSTALL_STRING="" From 92a240fe19401a0ed196fdb92d2c826cf8fc30d1 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Mon, 10 Jun 2024 15:56:54 -0400 Subject: [PATCH 23/48] fix testcase --- tests/unit_tests/conftest.py | 7 +++++-- tests/unit_tests/test_outputaccumulator.py | 14 +++++++------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tests/unit_tests/conftest.py b/tests/unit_tests/conftest.py index c8df3b31..b1a90b82 100644 --- a/tests/unit_tests/conftest.py +++ b/tests/unit_tests/conftest.py @@ -13,6 +13,7 @@ def mock_precursor_df( n_precursor: int = 100, + with_decoy=True, ) -> pd.DataFrame: """Create a mock precursor dataframe as it's found as the individual search outputs @@ -30,7 +31,6 @@ def mock_precursor_df( """ precursor_idx = np.arange(n_precursor) - decoy = np.zeros(n_precursor) precursor_mz = np.random.rand(n_precursor) * 2000 + 500 precursor_charge = np.random.choice([2, 3], size=n_precursor) @@ -40,7 +40,10 @@ def mock_precursor_df( proteins = np.random.choice(protein_names, size=n_precursor) genes = proteins - decoy = np.concatenate([np.zeros(n_precursor // 2), np.ones(n_precursor // 2)]) + if with_decoy: + decoy = np.concatenate([np.zeros(n_precursor // 2), np.ones(n_precursor // 2)]) + else: + decoy = np.zeros(n_precursor) proba = np.zeros(n_precursor) + decoy * np.random.rand(n_precursor) qval = np.random.rand(n_precursor) * 10e-3 diff --git a/tests/unit_tests/test_outputaccumulator.py b/tests/unit_tests/test_outputaccumulator.py index 239ed3ee..097e59f5 100644 --- a/tests/unit_tests/test_outputaccumulator.py +++ b/tests/unit_tests/test_outputaccumulator.py @@ -1,6 +1,7 @@ import os import tempfile import numpy as np +import pandas as pd from conftest import mock_precursor_df, mock_fragment_df from alphadia import outputtransform from alphabase.spectral_library.base import SpecLibBase @@ -67,7 +68,7 @@ def prepare_input_data(): # setup raw folders raw_folders = [os.path.join(progress_folder, run) for run in run_columns] - psm_base_df = mock_precursor_df(n_precursor=100) + psm_base_df = mock_precursor_df(n_precursor=100, with_decoy=True) fragment_base_df = mock_fragment_df(n_precursor=200, n_fragments=10) psm_dfs = [] @@ -122,12 +123,11 @@ def test_complete_output_accumulation(): os.path.join(temp_folder, f"{output.TRANSFER_OUTPUT}.hdf"), load_mod_seq=True ) - # Then: all unique precursors should be in the built library - number_of_unique_precursors = len( - np.unique( - np.concatenate([psm_df["precursor_idx"].values for psm_df in psm_dfs]) - ) - ) + # Then: all unique none decoy precursors should be in the built library + union_psm_df = pd.concat(psm_dfs) + union_psm_df = union_psm_df[union_psm_df["decoy"] == 0] + number_of_unique_precursors = len(np.unique(union_psm_df["precursor_idx"])) + assert ( len(np.unique(built_lib.precursor_df["precursor_idx"])) == number_of_unique_precursors From 7250ced981f2d19ab6001382075565775cd97466 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Mon, 10 Jun 2024 18:03:58 -0400 Subject: [PATCH 24/48] use parquet as intermediate output --- alphadia/outputaccumulator.py | 4 ++-- alphadia/outputtransform.py | 16 ++++------------ alphadia/planning.py | 8 ++++---- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/alphadia/outputaccumulator.py b/alphadia/outputaccumulator.py index 5328acab..f9465711 100644 --- a/alphadia/outputaccumulator.py +++ b/alphadia/outputaccumulator.py @@ -122,8 +122,8 @@ def parse_output_folder( """ - psm_df = pd.read_csv(os.path.join(folder, "psm.tsv"), sep="\t") - frag_df = pd.read_csv(os.path.join(folder, "frag.tsv"), sep="\t") + psm_df = pd.read_parquet(os.path.join(folder, "psm.parquet")) + frag_df = pd.read_parquet(os.path.join(folder, "frag.parquet")) assert set( selected_precursor_columns diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py index b62dc1cf..9276cff7 100644 --- a/alphadia/outputtransform.py +++ b/alphadia/outputtransform.py @@ -54,22 +54,14 @@ def get_frag_df_generator(folder_list: List[str]): for folder in folder_list: raw_name = os.path.basename(folder) - frag_path = os.path.join(folder, "frag.tsv") + frag_path = os.path.join(folder, "frag.parquet") if not os.path.exists(frag_path): logger.warning(f"no frag file found for {raw_name}") else: try: logger.info(f"reading frag file for {raw_name}") - run_df = pd.read_csv( - frag_path, - sep="\t", - dtype={ - "precursor_idx": np.uint32, - "number": np.uint8, - "type": np.uint8, - }, - ) + run_df = pd.read_parquet(frag_path) except Exception as e: logger.warning(f"Error reading frag file for {raw_name}") logger.warning(e) @@ -497,7 +489,7 @@ def build_precursor_table( for folder in folder_list: raw_name = os.path.basename(folder) - psm_path = os.path.join(folder, f"{self.PSM_INPUT}.tsv") + psm_path = os.path.join(folder, f"{self.PSM_INPUT}.parquet") logger.info(f"Building output for {raw_name}") @@ -506,7 +498,7 @@ def build_precursor_table( run_df = pd.DataFrame() else: try: - run_df = pd.read_csv(psm_path, sep="\t") + run_df = pd.read_parquet(psm_path) except Exception as e: logger.warning(f"Error reading psm file for {raw_name}") logger.warning(e) diff --git a/alphadia/planning.py b/alphadia/planning.py index 784028ff..f9fe1292 100644 --- a/alphadia/planning.py +++ b/alphadia/planning.py @@ -283,8 +283,8 @@ def run( workflow_folder_list.append(workflow.path) # check if the raw file is already processed - psm_location = os.path.join(workflow.path, "psm.tsv") - frag_location = os.path.join(workflow.path, "frag.tsv") + psm_location = os.path.join(workflow.path, "psm.parquet") + frag_location = os.path.join(workflow.path, "frag.parquet") if self.config["general"]["reuse_quant"]: if os.path.exists(psm_location) and os.path.exists(frag_location): @@ -306,8 +306,8 @@ def run( psm_df, frag_df = workflow.requantify_fragments(psm_df) psm_df["run"] = raw_name - psm_df.to_csv(psm_location, sep="\t", index=False) - frag_df.to_csv(frag_location, sep="\t", index=False) + psm_df.to_parquet(psm_location, index=False) + frag_df.to_parquet(frag_location, index=False) workflow.reporter.log_string(f"Finished workflow for {raw_name}") workflow.reporter.context.__exit__(None, None, None) From 64c9c94dcae5e47f2add19eea6e58fb001920b22 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Mon, 10 Jun 2024 19:14:53 -0400 Subject: [PATCH 25/48] universal read write --- alphadia/consensus/utils.py | 84 ++++++++++++++++++++++++++++++ alphadia/constants/default.yaml | 2 + alphadia/outputtransform.py | 16 ++---- alphadia/peakgroup/search.py | 2 +- tests/unit_tests/conftest.py | 12 +++++ tests/unit_tests/test_consensus.py | 25 +++++++++ 6 files changed, 127 insertions(+), 14 deletions(-) create mode 100644 alphadia/consensus/utils.py create mode 100644 tests/unit_tests/test_consensus.py diff --git a/alphadia/consensus/utils.py b/alphadia/consensus/utils.py new file mode 100644 index 00000000..71054635 --- /dev/null +++ b/alphadia/consensus/utils.py @@ -0,0 +1,84 @@ +import logging +import os +import pandas as pd + +logger = logging.getLogger() +supported_formats = ["parquet", "tsv"] + + +def read_df(path_no_format, file_format="parquet"): + """Read dataframe from disk with choosen file format + + Parameters + ---------- + + path_no_format: str + File to read from disk without file format + + file_format: str, default = 'parquet' + File format for loading the file. Available options: ['parquet', 'tsv'] + + Returns + ------- + + pd.DataFrame + loaded dataframe from disk + + """ + + if file_format not in supported_formats: + raise ValueError( + f"Provided unknown file format: {file_format}, supported_formats: {supported_formats}" + ) + + file_path = f"{path_no_format}.{file_format}" + + if not os.path.exists(file_path): + raise FileNotFoundError(f"Can't load file as file was not found: {file_path}") + + logger.info(f"Reading {file_path} from disk") + + if file_format == "parquet": + return pd.read_parquet(file_path) + + elif file_format == "tsv": + return pd.read_csv(file_path, sep="\t") + + else: + raise ValueError("I don't know how you ended up here") + + +def write_df(df, path_no_format, file_format="parquet"): + """Read dataframe from disk with choosen file format + + Parameters + ---------- + + df: pd.DataFrame + Dataframe to save to disk + + path_no_format: str + Path for file without format + + file_format: str, default = 'parquet' + File format for loading the file. Available options: ['parquet', 'tsv'] + + """ + + if file_format not in supported_formats: + raise ValueError( + f"Provided unknown file format: {file_format}, supported_formats: {supported_formats}" + ) + + file_path = f"{path_no_format}.{file_format}" + + logger.info(f"Saving {file_path} to disk") + + if file_format == "parquet": + df.to_parquet(file_path, index=False) + + elif file_format == "tsv": + df.to_csv(file_path, sep="\t", index=False) + + else: + raise ValueError("I don't know how you ended up here") diff --git a/alphadia/constants/default.yaml b/alphadia/constants/default.yaml index 5339a3df..4833a500 100644 --- a/alphadia/constants/default.yaml +++ b/alphadia/constants/default.yaml @@ -129,6 +129,8 @@ search_output: num_samples_quadratic: 50 min_nonnan: 3 normalize_lfq: True + # can be either "parquet" or "tsv" + file_format: "parquet" # configuration for the optimization manager # initial parameters, will nbe optimized diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py index 9276cff7..c8ce6985 100644 --- a/alphadia/outputtransform.py +++ b/alphadia/outputtransform.py @@ -445,20 +445,10 @@ def load_precursor_table(self): Precursor table """ - if not os.path.exists( - os.path.join(self.output_folder, f"{self.PRECURSOR_OUTPUT}.tsv") - ): - logger.error( - f"Can't continue as no {self.PRECURSOR_OUTPUT}.tsv file was found in the output folder: {self.output_folder}" - ) - raise FileNotFoundError( - f"Can't continue as no {self.PRECURSOR_OUTPUT}.tsv file was found in the output folder: {self.output_folder}" - ) - logger.info(f"Reading {self.PRECURSOR_OUTPUT}.tsv file") - psm_df = pd.read_csv( - os.path.join(self.output_folder, f"{self.PRECURSOR_OUTPUT}.tsv"), sep="\t" + return read_df( + os.path.join(self.output_folder, f"{self.PRECURSOR_OUTPUT}"), + file_type=self.config["file_format"], ) - return psm_df def build_precursor_table( self, diff --git a/alphadia/peakgroup/search.py b/alphadia/peakgroup/search.py index 0b97d445..9f3f6a42 100644 --- a/alphadia/peakgroup/search.py +++ b/alphadia/peakgroup/search.py @@ -1041,7 +1041,7 @@ def assemble_candidates(self, elution_group_container): precursor_flat_lookup ] - # save features for training if desired. + # DEBUG: save features for training if desired. if self.feature_path is not None: feature_matrix = np.zeros( (len(candidates), len(candidates[0].features)), dtype=np.float32 diff --git a/tests/unit_tests/conftest.py b/tests/unit_tests/conftest.py index b1a90b82..1976b9d7 100644 --- a/tests/unit_tests/conftest.py +++ b/tests/unit_tests/conftest.py @@ -4,6 +4,7 @@ import pandas as pd import numpy as np import matplotlib +import tempfile matplotlib.use("Agg") from matplotlib import pyplot as plt @@ -184,3 +185,14 @@ def pytest_configure(config): pytest.test_data[raw_folder] = raw_files # important to supress matplotlib output + + +def _random_tempfolder(): + tempdir = tempfile.gettempdir() + # 6 alphanumeric characters + random_foldername = "alphadia_" + "".join( + np.random.choice(list("abcdefghijklmnopqrstuvwxyz0123456789"), 6) + ) + path = os.path.join(tempdir, random_foldername) + os.mkdir(path) + return path diff --git a/tests/unit_tests/test_consensus.py b/tests/unit_tests/test_consensus.py new file mode 100644 index 00000000..f592e4ee --- /dev/null +++ b/tests/unit_tests/test_consensus.py @@ -0,0 +1,25 @@ +import pytest +import pandas as pd +import os +from conftest import _random_tempfolder +from alphadia.consensus.utils import read_df, write_df + + +@pytest.mark.parametrize( + "format, should_fail", + [("tsv", False), ("parquet", False), ("a321", True)], +) +def test_read_write(format, should_fail): + # given + df = pd.DataFrame([{"a": "a", "b": "b"}, {"a": "a", "b": "b"}]) + path = os.path.join(_random_tempfolder()) + + # when + if should_fail: + with pytest.raises(ValueError): + write_df(df, path, file_format=format) + + else: + write_df(df, path, file_format=format) + _df = read_df(path, file_format=format) + assert df.equals(_df) From 7876db62e3cc68bccb87775e8d1b082f0360ce1f Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Mon, 10 Jun 2024 19:37:12 -0400 Subject: [PATCH 26/48] variable output format --- alphadia/consensus/utils.py | 2 +- alphadia/outputtransform.py | 39 +++++++++++++++++-------------------- 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/alphadia/consensus/utils.py b/alphadia/consensus/utils.py index 71054635..07638fb6 100644 --- a/alphadia/consensus/utils.py +++ b/alphadia/consensus/utils.py @@ -78,7 +78,7 @@ def write_df(df, path_no_format, file_format="parquet"): df.to_parquet(file_path, index=False) elif file_format == "tsv": - df.to_csv(file_path, sep="\t", index=False) + df.to_csv(file_path, sep="\t", index=False, float_format="%.6f") else: raise ValueError("I don't know how you ended up here") diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py index c8ce6985..9f1746d6 100644 --- a/alphadia/outputtransform.py +++ b/alphadia/outputtransform.py @@ -10,7 +10,7 @@ TransferLearningAccumulator, AccumulationBroadcaster, ) - +from alphadia.consensus.utils import read_df, write_df import pandas as pd import numpy as np @@ -578,11 +578,10 @@ def build_precursor_table( psm_df = psm_df[psm_df["decoy"] == 0] if save: logger.info("Writing precursor output to disk") - psm_df.to_csv( - os.path.join(self.output_folder, f"{self.PRECURSOR_OUTPUT}.tsv"), - sep="\t", - index=False, - float_format="%.6f", + write_df( + psm_df, + os.path.join(self.output_folder, f"{self.PRECURSOR_OUTPUT}"), + file_format=self.config["search_output"]["file_format"], ) return psm_df @@ -630,11 +629,10 @@ def build_stat_df( if save: logger.info("Writing stat output to disk") - stat_df.to_csv( - os.path.join(self.output_folder, f"{self.STAT_OUTPUT}.tsv"), - sep="\t", - index=False, - float_format="%.6f", + write_df( + stat_df, + os.path.join(self.output_folder, f"{self.STAT_OUTPUT}"), + file_format="tsv", ) return stat_df @@ -712,11 +710,11 @@ def build_lfq_tables( if save: logger.info(f"Writing {group_nice} output to disk") - lfq_df.to_csv( - os.path.join(self.output_folder, f"{group_nice}.matrix.tsv"), - sep="\t", - index=False, - float_format="%.6f", + + write_df( + lfq_df, + os.path.join(self.output_folder, f"{group_nice}.matrix"), + file_format=self.config["search_output"]["file_format"], ) protein_df_melted = lfq_df.melt( @@ -727,11 +725,10 @@ def build_lfq_tables( if save: logger.info("Writing psm output to disk") - psm_df.to_csv( - os.path.join(self.output_folder, f"{self.PRECURSOR_OUTPUT}.tsv"), - sep="\t", - index=False, - float_format="%.6f", + write_df( + psm_df, + os.path.join(self.output_folder, f"{self.PRECURSOR_OUTPUT}"), + file_format=self.config["search_output"]["file_format"], ) return lfq_df From 74bb3621a3817eba07200843dbb76b77a3669f61 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Mon, 10 Jun 2024 19:58:58 -0400 Subject: [PATCH 27/48] fix tests --- alphadia/outputtransform.py | 2 +- tests/unit_tests/conftest.py | 4 ++-- tests/unit_tests/test_consensus.py | 4 ++-- tests/unit_tests/test_outputaccumulator.py | 6 +++--- tests/unit_tests/test_outputtransform.py | 11 ++++++----- tests/unit_tests/test_reporting.py | 23 +++++++--------------- 6 files changed, 21 insertions(+), 29 deletions(-) diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py index 9f1746d6..75a0db4b 100644 --- a/alphadia/outputtransform.py +++ b/alphadia/outputtransform.py @@ -447,7 +447,7 @@ def load_precursor_table(self): return read_df( os.path.join(self.output_folder, f"{self.PRECURSOR_OUTPUT}"), - file_type=self.config["file_format"], + file_format=self.config["search_output"]["file_format"], ) def build_precursor_table( diff --git a/tests/unit_tests/conftest.py b/tests/unit_tests/conftest.py index 1976b9d7..5f977eb9 100644 --- a/tests/unit_tests/conftest.py +++ b/tests/unit_tests/conftest.py @@ -187,12 +187,12 @@ def pytest_configure(config): # important to supress matplotlib output -def _random_tempfolder(): +def random_tempfolder(): tempdir = tempfile.gettempdir() # 6 alphanumeric characters random_foldername = "alphadia_" + "".join( np.random.choice(list("abcdefghijklmnopqrstuvwxyz0123456789"), 6) ) path = os.path.join(tempdir, random_foldername) - os.mkdir(path) + os.makedirs(path, exist_ok=True) return path diff --git a/tests/unit_tests/test_consensus.py b/tests/unit_tests/test_consensus.py index f592e4ee..65aa5b19 100644 --- a/tests/unit_tests/test_consensus.py +++ b/tests/unit_tests/test_consensus.py @@ -1,7 +1,7 @@ import pytest import pandas as pd import os -from conftest import _random_tempfolder +from conftest import random_tempfolder from alphadia.consensus.utils import read_df, write_df @@ -12,7 +12,7 @@ def test_read_write(format, should_fail): # given df = pd.DataFrame([{"a": "a", "b": "b"}, {"a": "a", "b": "b"}]) - path = os.path.join(_random_tempfolder()) + path = os.path.join(random_tempfolder()) # when if should_fail: diff --git a/tests/unit_tests/test_outputaccumulator.py b/tests/unit_tests/test_outputaccumulator.py index 097e59f5..9de649b4 100644 --- a/tests/unit_tests/test_outputaccumulator.py +++ b/tests/unit_tests/test_outputaccumulator.py @@ -98,9 +98,9 @@ def prepare_input_data(): for i, raw_folder in enumerate(raw_folders): os.makedirs(raw_folder, exist_ok=True) - psm_dfs[i].to_csv(os.path.join(raw_folder, "psm.tsv"), sep="\t", index=False) - fragment_dfs[i].to_csv( - os.path.join(raw_folder, "frag.tsv"), sep="\t", index=False + psm_dfs[i].to_parquet(os.path.join(raw_folder, "psm.parquet"), index=False) + fragment_dfs[i].to_parquet( + os.path.join(raw_folder, "frag.parquet"), index=False ) return config, temp_folder, raw_folders, psm_dfs, fragment_dfs diff --git a/tests/unit_tests/test_outputtransform.py b/tests/unit_tests/test_outputtransform.py index 91a18926..bdbe038a 100644 --- a/tests/unit_tests/test_outputtransform.py +++ b/tests/unit_tests/test_outputtransform.py @@ -28,6 +28,7 @@ def test_output_transform(): "normalize_lfq": True, "peptide_level_lfq": False, "precursor_level_lfq": False, + "file_format": "parquet", }, } @@ -52,8 +53,8 @@ def test_output_transform(): fragment_base_df["precursor_idx"].isin(psm_df["precursor_idx"]) ] - frag_df.to_csv(os.path.join(raw_folder, "frag.tsv"), sep="\t", index=False) - psm_df.to_csv(os.path.join(raw_folder, "psm.tsv"), sep="\t", index=False) + frag_df.to_parquet(os.path.join(raw_folder, "frag.parquet"), index=False) + psm_df.to_parquet(os.path.join(raw_folder, "psm.parquet"), index=False) output = outputtransform.SearchPlanOutput(config, temp_folder) _ = output.build_precursor_table(raw_folders, save=True) @@ -61,8 +62,8 @@ def test_output_transform(): _ = output.build_lfq_tables(raw_folders, save=True) # validate psm_df output - psm_df = pd.read_csv( - os.path.join(temp_folder, f"{output.PRECURSOR_OUTPUT}.tsv"), sep="\t" + psm_df = pd.read_parquet( + os.path.join(temp_folder, f"{output.PRECURSOR_OUTPUT}.parquet"), ) assert all( [ @@ -91,7 +92,7 @@ def test_output_transform(): assert all([col in stat_df.columns for col in ["run", "precursors", "proteins"]]) # validate protein_df output - protein_df = pd.read_csv(os.path.join(temp_folder, "pg.matrix.tsv"), sep="\t") + protein_df = pd.read_parquet(os.path.join(temp_folder, "pg.matrix.parquet")) assert all([col in protein_df.columns for col in ["run_0", "run_1", "run_2"]]) for i in run_columns: diff --git a/tests/unit_tests/test_reporting.py b/tests/unit_tests/test_reporting.py index ed536de0..5d171f74 100644 --- a/tests/unit_tests/test_reporting.py +++ b/tests/unit_tests/test_reporting.py @@ -7,23 +7,14 @@ import sys import pytest -from alphadia.workflow import reporting - +from conftest import random_tempfolder -def _random_tempfolder(): - tempdir = tempfile.gettempdir() - # 6 alphanumeric characters - random_foldername = "".join( - np.random.choice(list("abcdefghijklmnopqrstuvwxyz0123456789"), 6) - ) - path = os.path.join(tempdir, random_foldername) - os.mkdir(path) - return path +from alphadia.workflow import reporting @pytest.mark.skipif(sys.platform == "win32", reason="does not run on windows") def test_logging(): - tempfolder = _random_tempfolder() + tempfolder = random_tempfolder() if os.path.exists(os.path.join(tempfolder, "log.txt")): os.remove(os.path.join(tempfolder, "log.txt")) @@ -58,7 +49,7 @@ def test_backend(): def test_figure_backend(): - tempfolder = _random_tempfolder() + tempfolder = random_tempfolder() figure_backend = reporting.FigureBackend(path=tempfolder) @@ -79,7 +70,7 @@ def test_figure_backend(): def test_jsonl_backend(): - tempfolder = _random_tempfolder() + tempfolder = random_tempfolder() with reporting.JSONLBackend(path=tempfolder) as jsonl_backend: jsonl_backend.log_event("start_extraction", None) @@ -96,7 +87,7 @@ def test_jsonl_backend(): @pytest.mark.skipif(sys.platform == "win32", reason="does not run on windows") def test_log_backend(): - tempfolder = _random_tempfolder() + tempfolder = random_tempfolder() if os.path.exists(os.path.join(tempfolder, "log.txt")): os.remove(os.path.join(tempfolder, "log.txt")) @@ -117,7 +108,7 @@ def test_log_backend(): @pytest.mark.skipif(sys.platform == "win32", reason="does not run on windows") def test_pipeline(): - tempfolder = _random_tempfolder() + tempfolder = random_tempfolder() pipeline = reporting.Pipeline( backends=[ From 537a800ca55362dd2cb8f59fbfb0b7142c6064ba Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Mon, 10 Jun 2024 20:54:53 -0400 Subject: [PATCH 28/48] tl step --- alphadia/constants/default.yaml | 15 ++++++++- alphadia/outputtransform.py | 38 +++++++++++++++++++--- alphadia/planning.py | 2 +- alphadia/workflow/peptidecentric.py | 4 +-- nbs/tutorial_nbs/finetuning.ipynb | 31 +++++++++++++++++- tests/unit_tests/test_outputaccumulator.py | 8 ++--- 6 files changed, 84 insertions(+), 14 deletions(-) diff --git a/alphadia/constants/default.yaml b/alphadia/constants/default.yaml index 4833a500..089eff9e 100644 --- a/alphadia/constants/default.yaml +++ b/alphadia/constants/default.yaml @@ -141,7 +141,7 @@ optimization_manager: # This section controls transfer learning # currently only the library is created with transfer learning -transfer_learning: +transfer_library: # if true, the library is created with transfer learning enabled: False @@ -166,6 +166,19 @@ transfer_learning: # include only fragments with a XIC correlation at least 0.75 of the median for all fragments fragment_correlation_ratio: 0.75 +transfer_learning: + enabled: False + batch_size: 2000 + max_lr: 0.0001 + train_ratio: 0.8 + test_interval: 1 + lr_patience: 3 + minimum_psms: 1200 + epochs: 51 + warmup_epochs: 5 + nce: 25 + instrument: 'Lumos' + # configuration for the calibration manager # the config has to start with the calibration keyword and consists of a list of calibration groups. # each group consists of datapoints which have multiple properties. diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py index 75a0db4b..7095530b 100644 --- a/alphadia/outputtransform.py +++ b/alphadia/outputtransform.py @@ -11,6 +11,7 @@ AccumulationBroadcaster, ) from alphadia.consensus.utils import read_df, write_df +from alphadia.transferlearning.train import FinetuneManager import pandas as pd import numpy as np @@ -307,6 +308,7 @@ class SearchPlanOutput: PG_OUTPUT = "protein_groups" LIBRARY_OUTPUT = "speclib.mbr" TRANSFER_OUTPUT = "speclib.transfer" + TRANSFER_MODEL = "peptdeep.transfer" def __init__(self, config: dict, output_folder: str): """Combine individual searches into and build combined outputs @@ -374,9 +376,35 @@ def build( _ = self.build_lfq_tables(folder_list, psm_df=psm_df, save=True) _ = self.build_library(base_spec_lib, psm_df=psm_df, save=True) - if self.config["transfer_learning"]["enabled"]: + if self.config["transfer_library"]["enabled"]: _ = self.build_transfer_library(folder_list, save=True) + if self.config["transfer_learning"]["enabled"]: + _ = self.build_transfer_model() + + def build_transfer_model(self): + logger.progress("Train PeptDeep Models") + transfer_lib = SpecLibBase() + transfer_lib.load_hdf( + os.path.join(self.output_folder, f"{self.TRANSFER_OUTPUT}.hdf"), + load_mod_seq=True, + ) + + device = "cpu" + if self.config["general"]["use_gpu"]: + device = "mps" if os.uname().sysname == "Darwin" else "gpu" + + tune_mgr = FinetuneManager( + device=device, settings=self.config["transfer_learning"] + ) + stats = tune_mgr.finetune_rt(transfer_lib.precursor_df) + stats = tune_mgr.finetune_charge(transfer_lib.precursor_df) + stats = tune_mgr.finetune_ms2( + transfer_lib.precursor_df.copy(), transfer_lib.fragment_intensity_df.copy() + ) + + tune_mgr.save_models(os.path.join(self.output_folder, self.TRANSFER_MODEL)) + def build_transfer_library( self, folder_list: List[str], @@ -408,12 +436,12 @@ def build_transfer_library( """ logger.progress("======== Building transfer library ========") transferAccumulator = TransferLearningAccumulator( - keep_top=self.config["transfer_learning"]["top_k_samples"], - norm_delta_max=self.config["transfer_learning"]["norm_delta_max"], - precursor_correlation_cutoff=self.config["transfer_learning"][ + keep_top=self.config["transfer_library"]["top_k_samples"], + norm_delta_max=self.config["transfer_library"]["norm_delta_max"], + precursor_correlation_cutoff=self.config["transfer_library"][ "precursor_correlation_cutoff" ], - fragment_correlation_ratio=self.config["transfer_learning"][ + fragment_correlation_ratio=self.config["transfer_library"][ "fragment_correlation_ratio" ], ) diff --git a/alphadia/planning.py b/alphadia/planning.py index f9fe1292..a92c7bdc 100644 --- a/alphadia/planning.py +++ b/alphadia/planning.py @@ -302,7 +302,7 @@ def run( psm_df = workflow.requantify(psm_df) psm_df = psm_df[psm_df["qval"] <= self.config["fdr"]["fdr"]] - if self.config["transfer_learning"]["enabled"]: + if self.config["transfer_library"]["enabled"]: psm_df, frag_df = workflow.requantify_fragments(psm_df) psm_df["run"] = raw_name diff --git a/alphadia/workflow/peptidecentric.py b/alphadia/workflow/peptidecentric.py index ad438625..f0dbfc83 100644 --- a/alphadia/workflow/peptidecentric.py +++ b/alphadia/workflow/peptidecentric.py @@ -1023,8 +1023,8 @@ def requantify_fragments( verbosity="progress", ) - fragment_types = self.config["transfer_learning"]["fragment_types"].split(";") - max_charge = self.config["transfer_learning"]["max_charge"] + fragment_types = self.config["transfer_library"]["fragment_types"].split(";") + max_charge = self.config["transfer_library"]["max_charge"] self.reporter.log_string( f"creating library for charged fragment types: {fragment_types}", diff --git a/nbs/tutorial_nbs/finetuning.ipynb b/nbs/tutorial_nbs/finetuning.ipynb index 936d2d17..33c0204c 100644 --- a/nbs/tutorial_nbs/finetuning.ipynb +++ b/nbs/tutorial_nbs/finetuning.ipynb @@ -16,6 +16,35 @@ "from alphadia.transferlearning.train import *\n" ] }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'batch_size': 1000,\n", + " 'max_lr': 0.0005,\n", + " 'train_ratio': 0.8,\n", + " 'test_interval': 1,\n", + " 'lr_patience': 3,\n", + " 'minimum_psms': 1200,\n", + " 'epochs': 51,\n", + " 'warmup_epochs': 5,\n", + " 'nce': 25,\n", + " 'instrument': 'Lumos'}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "settings" + ] + }, { "cell_type": "code", "execution_count": 2, @@ -988,7 +1017,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/tests/unit_tests/test_outputaccumulator.py b/tests/unit_tests/test_outputaccumulator.py index 9de649b4..063687d0 100644 --- a/tests/unit_tests/test_outputaccumulator.py +++ b/tests/unit_tests/test_outputaccumulator.py @@ -48,7 +48,7 @@ def prepare_input_data(): "peptide_level_lfq": False, "precursor_level_lfq": False, }, - "transfer_learning": { + "transfer_library": { "enabled": True, "fragment_types": "b;y", "max_charge": 2, @@ -113,7 +113,7 @@ def test_complete_output_accumulation(): """ # Given: config, temp_folder, raw_folders, psm_dfs, fragment_dfs = prepare_input_data() - config["transfer_learning"]["top_k_samples"] = 2 + config["transfer_library"]["top_k_samples"] = 2 # When: output = outputtransform.SearchPlanOutput(config, temp_folder) @@ -145,7 +145,7 @@ def test_selection_of_precursors(): # Given: config, temp_folder, raw_folders, psm_dfs, fragment_dfs = prepare_input_data() keep_top = 2 - config["transfer_learning"]["top_k_samples"] = keep_top + config["transfer_library"]["top_k_samples"] = keep_top # When: output = outputtransform.SearchPlanOutput(config, temp_folder) _ = output.build_transfer_library(raw_folders, save=True) @@ -186,7 +186,7 @@ def test_keep_top_constraint(): # Given: config, temp_folder, raw_folders, psm_dfs, fragment_dfs = prepare_input_data() keep_top = 2 - config["transfer_learning"]["top_k_samples"] = keep_top + config["transfer_library"]["top_k_samples"] = keep_top # When: output = outputtransform.SearchPlanOutput(config, temp_folder) From be14409f6f020be2372b5e70371f0ccc6321a913 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Mon, 10 Jun 2024 21:24:10 -0400 Subject: [PATCH 29/48] bugfixes --- alphadia/outputtransform.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py index 7095530b..0aef4105 100644 --- a/alphadia/outputtransform.py +++ b/alphadia/outputtransform.py @@ -384,9 +384,18 @@ def build( def build_transfer_model(self): logger.progress("Train PeptDeep Models") + + transfer_lib_path = os.path.join( + self.output_folder, f"{self.TRANSFER_OUTPUT}.hdf" + ) + if not os.path.exists: + raise ValueError( + f"Transfer learning library was not found at {self.TRANSFER_OUTPUT}.hdf. Did you enable library generation?" + ) + transfer_lib = SpecLibBase() transfer_lib.load_hdf( - os.path.join(self.output_folder, f"{self.TRANSFER_OUTPUT}.hdf"), + transfer_lib_path, load_mod_seq=True, ) From 3be7a4e627d2c232a2afcbdf3d66a829d4e79ee4 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Mon, 10 Jun 2024 21:29:02 -0400 Subject: [PATCH 30/48] add parameter description --- alphadia/constants/default.yaml | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/alphadia/constants/default.yaml b/alphadia/constants/default.yaml index 089eff9e..070bf523 100644 --- a/alphadia/constants/default.yaml +++ b/alphadia/constants/default.yaml @@ -142,7 +142,7 @@ optimization_manager: # This section controls transfer learning # currently only the library is created with transfer learning transfer_library: - # if true, the library is created with transfer learning + # if true, the library is created for transfer learning enabled: False # semicolon separated list of fragment types to include in the library. possible values are 'a', 'b', 'c', 'x', 'y', 'z' @@ -167,16 +167,39 @@ transfer_library: fragment_correlation_ratio: 0.75 transfer_learning: + + # if true, a custom peptdeep model will be created using the transfer learned library enabled: False + + # number of precursors per batch batch_size: 2000 + + # maximum learning rate per batch. + # The maximum learning rate will be reached after a warmup phase and decreased using a plateau scheduler max_lr: 0.0001 + + # TODO remove and replaced by fixed 70:20:10 split train_ratio: 0.8 + + # test every n intervals test_interval: 1 + + # learning rate patience after which the lr will be halved lr_patience: 3 - minimum_psms: 1200 + + # minimum precursor number to perform transfer learning + minimum_psms: 10000 + + # maximum number of epochs epochs: 51 + + # number of warmup epochs during which the lr is ramped up warmup_epochs: 5 + + # normalised collision energy encoded during training nce: 25 + + # instrument type encoded during training instrument: 'Lumos' # configuration for the calibration manager From 989705d4a89cede318e0fa717333bf23264ad6f7 Mon Sep 17 00:00:00 2001 From: Mohamed Sameh Date: Tue, 11 Jun 2024 19:12:08 +0200 Subject: [PATCH 31/48] log trace error in callback --- alphadia/outputaccumulator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alphadia/outputaccumulator.py b/alphadia/outputaccumulator.py index 5328acab..6f993000 100644 --- a/alphadia/outputaccumulator.py +++ b/alphadia/outputaccumulator.py @@ -239,7 +239,7 @@ def process_folder(folder): def error_callback(e): - logger.error(e) + logger.error(e, exc_info=True) class AccumulationBroadcaster: From 68b6b00910742e66327f86235cdd1131652eaa8e Mon Sep 17 00:00:00 2001 From: Mohamed Sameh Date: Wed, 12 Jun 2024 22:06:56 +0200 Subject: [PATCH 32/48] fix: output accumulator test failure --- alphadia/outputaccumulator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/alphadia/outputaccumulator.py b/alphadia/outputaccumulator.py index 6f993000..251ae7fa 100644 --- a/alphadia/outputaccumulator.py +++ b/alphadia/outputaccumulator.py @@ -167,7 +167,8 @@ def parse_output_folder( self._precursor_df[col] = values # ----------------- Fragment ----------------- - + # Filer fragments that are not used in the precursors + frag_df = frag_df[frag_df["precursor_idx"].isin(self._precursor_df["precursor_idx"])] self._fragment_df = frag_df[ ["mz", "intensity", "precursor_idx", "frag_idx", "correlation"] ].copy() From 258a6a7b41599851b11b69a99d51ba762e00d5c9 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Wed, 12 Jun 2024 14:03:48 -0700 Subject: [PATCH 33/48] implement changes --- alphadia/workflow/peptidecentric.py | 47 ++++++++++++++--------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/alphadia/workflow/peptidecentric.py b/alphadia/workflow/peptidecentric.py index cfc2d627..da8e831e 100644 --- a/alphadia/workflow/peptidecentric.py +++ b/alphadia/workflow/peptidecentric.py @@ -1102,25 +1102,7 @@ def _build_candidate_speclib_flat( psm_df: pd.DataFrame, fragment_types: typing.List[str] = ["b", "y"], max_charge: int = 2, - optional_columns: typing.List[str] = [ - "proba", - "score", - "qval", - "channel", - "rt_library", - "mz_library", - "mobility_library", - "genes", - "proteins", - "decoy", - "mods", - "mod_sites", - "sequence", - "charge", - "rt_observed", - "mobility_observed", - "mz_observed", - ], + optional_columns: typing.Union[typing.List[str], None] = None, ) -> typing.Tuple[SpecLibFlat, pd.DataFrame]: """Build a candidate spectral library for transfer learning. @@ -1163,14 +1145,31 @@ def _build_candidate_speclib_flat( scored_candidates: pd.DataFrame Dataframe with scored candidates """ - # remove decoys - # psm_df = psm_df[psm_df["decoy"] == 0] - # make copy to avoid modifying the original dataframe - _optional_columns = [col for col in optional_columns if col in psm_df.columns] + # set default optional columns + if optional_columns is None: + optional_columns = [ + "proba", + "score", + "qval", + "channel", + "rt_library", + "mz_library", + "mobility_library", + "genes", + "proteins", + "decoy", + "mods", + "mod_sites", + "sequence", + "charge", + "rt_observed", + "mobility_observed", + "mz_observed", + ] scored_candidates = plexscoring.candidate_features_to_candidates( - psm_df, optional_columns=_optional_columns + psm_df, optional_columns=optional_columns ) # create speclib with fragment_types of interest From ee7e59d580fdd39d6da051efd492c4bd3d9d583a Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Wed, 12 Jun 2024 17:37:08 -0700 Subject: [PATCH 34/48] implement comments --- alphadia/planning.py | 3 +-- alphadia/workflow/reporting.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/alphadia/planning.py b/alphadia/planning.py index 9c13f287..803ddc5a 100644 --- a/alphadia/planning.py +++ b/alphadia/planning.py @@ -125,8 +125,7 @@ def __init__( # set log level level_to_set = self.config["general"]["log_level"] - level_code = logging.getLevelNamesMapping().get(level_to_set) - if level_code is None: + if (level_code := logging.getLevelNamesMapping().get(level_to_set)) is None: logger.error(f"Setting logging to unknown level {level_to_set}") else: logger.setLevel(level_code) diff --git a/alphadia/workflow/reporting.py b/alphadia/workflow/reporting.py index 63acf82f..b92e84c4 100644 --- a/alphadia/workflow/reporting.py +++ b/alphadia/workflow/reporting.py @@ -23,7 +23,7 @@ # As soon as its instantiated the default logger will be configured with a path to save the log file __is_initiated__ = False -# Add a new logging level to the default logger +# Add a new logging level to the default logger, level 21 is just above INFO (20) # This has to happen at load time to make the .progress() method available even if no logger is instantiated PROGRESS_LEVELV_NUM = 21 logging.PROGRESS = PROGRESS_LEVELV_NUM From db1e3f4c8cae05085f52ac9c43e0f6818c8b9025 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Wed, 12 Jun 2024 18:03:53 -0700 Subject: [PATCH 35/48] implement fixes --- alphadia/consensus/utils.py | 9 +++------ alphadia/constants/default.yaml | 2 +- alphadia/outputtransform.py | 4 ++-- tests/unit_tests/conftest.py | 12 ++++++++++++ 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/alphadia/consensus/utils.py b/alphadia/consensus/utils.py index 07638fb6..084ded8d 100644 --- a/alphadia/consensus/utils.py +++ b/alphadia/consensus/utils.py @@ -26,11 +26,6 @@ def read_df(path_no_format, file_format="parquet"): """ - if file_format not in supported_formats: - raise ValueError( - f"Provided unknown file format: {file_format}, supported_formats: {supported_formats}" - ) - file_path = f"{path_no_format}.{file_format}" if not os.path.exists(file_path): @@ -45,7 +40,9 @@ def read_df(path_no_format, file_format="parquet"): return pd.read_csv(file_path, sep="\t") else: - raise ValueError("I don't know how you ended up here") + raise ValueError( + f"Provided unknown file format: {file_format}, supported_formats: {supported_formats}" + ) def write_df(df, path_no_format, file_format="parquet"): diff --git a/alphadia/constants/default.yaml b/alphadia/constants/default.yaml index 4833a500..3d3a8112 100644 --- a/alphadia/constants/default.yaml +++ b/alphadia/constants/default.yaml @@ -130,7 +130,7 @@ search_output: min_nonnan: 3 normalize_lfq: True # can be either "parquet" or "tsv" - file_format: "parquet" + file_format: "tsv" # configuration for the optimization manager # initial parameters, will nbe optimized diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py index 75a0db4b..f5db4491 100644 --- a/alphadia/outputtransform.py +++ b/alphadia/outputtransform.py @@ -580,7 +580,7 @@ def build_precursor_table( logger.info("Writing precursor output to disk") write_df( psm_df, - os.path.join(self.output_folder, f"{self.PRECURSOR_OUTPUT}"), + os.path.join(self.output_folder, self.PRECURSOR_OUTPUT), file_format=self.config["search_output"]["file_format"], ) @@ -631,7 +631,7 @@ def build_stat_df( logger.info("Writing stat output to disk") write_df( stat_df, - os.path.join(self.output_folder, f"{self.STAT_OUTPUT}"), + os.path.join(self.output_folder, self.STAT_OUTPUT), file_format="tsv", ) diff --git a/tests/unit_tests/conftest.py b/tests/unit_tests/conftest.py index 5f977eb9..3ddd2017 100644 --- a/tests/unit_tests/conftest.py +++ b/tests/unit_tests/conftest.py @@ -24,6 +24,9 @@ def mock_precursor_df( n_precursor : int Number of precursors to generate + with_decoy : bool + If True, half of the precursors will be decoys + Returns ------- @@ -188,6 +191,14 @@ def pytest_configure(config): def random_tempfolder(): + """Create a randomly named temp folder in the system temp folder + + Returns + ------- + path : str + Path to the created temp folder + + """ tempdir = tempfile.gettempdir() # 6 alphanumeric characters random_foldername = "alphadia_" + "".join( @@ -195,4 +206,5 @@ def random_tempfolder(): ) path = os.path.join(tempdir, random_foldername) os.makedirs(path, exist_ok=True) + print(f"Created temp folder: {path}") return path From 10ac05a93dda8922c2f7a7dc0a2e3278536a7e21 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Thu, 13 Jun 2024 12:16:19 -0700 Subject: [PATCH 36/48] remove misc config --- misc/config/default.yaml | 186 --------------------------------------- 1 file changed, 186 deletions(-) delete mode 100644 misc/config/default.yaml diff --git a/misc/config/default.yaml b/misc/config/default.yaml deleted file mode 100644 index b3d1d7a5..00000000 --- a/misc/config/default.yaml +++ /dev/null @@ -1,186 +0,0 @@ -# configuration for the extraction plan -version: 1 - -general: - thread_count: 10 - # maximum number of threads or processes to use per raw file - reuse_calibration: false - reuse_quant: false - astral_ms1: false - log_level: 'INFO' - wsl: false - mmap_detector_events: false - use_gpu: true - -library_loading: - rt_heuristic: 180 - # if retention times are reported in absolute units, the rt_heuristic defines rt is interpreted as minutes or seconds - -library_prediction: - predict: False - enzyme: trypsin - fixed_modifications: 'Carbamidomethyl@C' - variable_modifications: 'Oxidation@M;Acetyl@Protein N-term' - max_var_mod_num: 2 - missed_cleavages: 1 - precursor_len: - - 7 - - 35 - precursor_charge: - - 2 - - 4 - precursor_mz: - - 400 - - 1200 - fragment_mz: - - 200 - - 2000 - nce: 25.0 - instrument: QE - save_hdf: True - -search: - channel_filter: '0' - exclude_shared_ions: True - compete_for_fragments: True - - target_num_candidates: 2 - target_ms1_tolerance: 15 - target_ms2_tolerance: 15 - target_mobility_tolerance: 0.04 - target_rt_tolerance: 60 - - quant_window: 3 - -search_advanced: - top_k_fragments: 12 - -calibration: - min_epochs: 3 - max_epochs: 20 - batch_size: 8000 - recalibration_target: 200 - final_full_calibration: False - norm_rt_mode: 'linear' - -search_initial: - initial_num_candidates: 1 - initial_ms1_tolerance: 30 - initial_ms2_tolerance: 30 - initial_mobility_tolerance: 0.08 - initial_rt_tolerance: 240 - -selection_config: - peak_len_rt: 10. - sigma_scale_rt: 0.5 - peak_len_mobility: 0.01 - sigma_scale_mobility: 1. - - top_k_precursors: 3 - kernel_size: 30 - - f_mobility: 1.0 - f_rt: 0.99 - center_fraction: 0.5 - min_size_mobility: 8 - min_size_rt: 3 - max_size_mobility: 50 - max_size_rt: 15 - - group_channels: False - use_weighted_score: True - - join_close_candidates: True - join_close_candidates_scan_threshold: 0.01 - join_close_candidates_cycle_threshold: 0.6 - -scoring_config: - score_grouped: false - top_k_isotopes: 3 - reference_channel: -1 - precursor_mz_tolerance: 10 - fragment_mz_tolerance: 15 - -multiplexing: - multiplexed_quant: False - target_channels: '4,8' - decoy_channel: 12 - reference_channel: 0 - competetive_scoring: True - -fdr: - fdr: 0.01 - group_level: 'proteins' - competetive_scoring: true - keep_decoys: false - channel_wise_fdr: false - inference_strategy: "heuristic" - -search_output: - peptide_level_lfq: false - precursor_level_lfq: false - min_k_fragments: 12 - min_correlation: 0.9 - num_samples_quadratic: 50 - min_nonnan: 3 - normalize_lfq: True - -# configuration for the optimization manager -# initial parameters, will nbe optimized -optimization_manager: - fwhm_rt: 5 - fwhm_mobility: 0.01 - score_cutoff: 0 - -# configuration for the calibration manager -# the config has to start with the calibration keyword and consists of a list of calibration groups. -# each group consists of datapoints which have multiple properties. -# This can be for example precursors (mz, rt ...), fragments (mz, ...), quadrupole (transfer_efficiency) -calibration_manager: - - name: fragment - estimators: - - name: mz - model: LOESSRegression - model_args: - n_kernels: 2 - input_columns: - - mz_library - target_columns: - - mz_observed - output_columns: - - mz_calibrated - transform_deviation: 1e6 - - name: precursor - estimators: - - name: mz - model: LOESSRegression - model_args: - n_kernels: 2 - input_columns: - - mz_library - target_columns: - - mz_observed - output_columns: - - mz_calibrated - transform_deviation: 1e6 - - name: rt - model: LOESSRegression - model_args: - n_kernels: 6 - uniform: True - input_columns: - - rt_library - target_columns: - - rt_observed - output_columns: - - rt_calibrated - - name: mobility - model: LOESSRegression - model_args: - n_kernels: 2 - input_columns: - - mobility_library - target_columns: - - mobility_observed - output_columns: - - mobility_calibrated From e34c050e76b650c112e6910ae3e23edc099d2f79 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Thu, 13 Jun 2024 12:27:07 -0700 Subject: [PATCH 37/48] remove uname --- alphadia/libtransform.py | 9 +-------- alphadia/outputtransform.py | 4 +--- alphadia/utils.py | 24 ++++++++++++++++++++++++ tests/unit_tests/test_utils.py | 12 ++++++++++++ 4 files changed, 38 insertions(+), 11 deletions(-) diff --git a/alphadia/libtransform.py b/alphadia/libtransform.py index ba1e5045..27e9d009 100644 --- a/alphadia/libtransform.py +++ b/alphadia/libtransform.py @@ -298,14 +298,7 @@ def forward(self, input: SpecLibBase) -> SpecLibBase: input.charged_frag_types = charged_frag_types - # Check if CPU or GPU/MPS should be used - device = "cpu" - if self.use_gpu: - try: - device = "mps" if os.uname().sysname == "Darwin" else "gpu" - except AttributeError: - # Windows does not support uname - device = "gpu" + device = utils.get_torch_device(self.use_gpu) model_mgr = ModelManager(device=device) if self.checkpoint_folder_path is not None: diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py index 976642be..73e77b7f 100644 --- a/alphadia/outputtransform.py +++ b/alphadia/outputtransform.py @@ -399,9 +399,7 @@ def build_transfer_model(self): load_mod_seq=True, ) - device = "cpu" - if self.config["general"]["use_gpu"]: - device = "mps" if os.uname().sysname == "Darwin" else "gpu" + device = utils.get_torch_device(self.config["general"]["use_gpu"]) tune_mgr = FinetuneManager( device=device, settings=self.config["transfer_learning"] diff --git a/alphadia/utils.py b/alphadia/utils.py index 28048ef3..6bb4eb85 100644 --- a/alphadia/utils.py +++ b/alphadia/utils.py @@ -3,6 +3,7 @@ from ctypes import Structure, c_double import typing import re +import platform # alphadia imports @@ -20,6 +21,29 @@ ISOTOPE_DIFF = 1.0032999999999674 +def get_torch_device(use_gpu: bool = False): + """Get the torch device to be used. + + Parameters + ---------- + + use_gpu : bool, optional + If True, use GPU if available, by default False + + Returns + ------- + str + Device to be used, either 'cpu', 'gpu' or 'mps' + + """ + + device = "cpu" + if use_gpu: + device = "mps" if platform.system() == "Darwin" else "gpu" + + return device + + @nb.njit def candidate_hash(precursor_idx, rank): # create a 64 bit hash from the precursor_idx, number and type diff --git a/tests/unit_tests/test_utils.py b/tests/unit_tests/test_utils.py index 399377e9..cdc05816 100644 --- a/tests/unit_tests/test_utils.py +++ b/tests/unit_tests/test_utils.py @@ -11,6 +11,7 @@ wsl_to_windows, windows_to_wsl, merge_missing_columns, + get_torch_device, ) @@ -20,6 +21,17 @@ import pytest +@pytest.mark.parametrize("use_gpu", [True, False]) +def test_get_torch_device(use_gpu): + # given + + # when + device = get_torch_device(use_gpu) + + # then + assert device in ["gpu", "mps"] if use_gpu else "cpu" + + def test_amean0(): test_array = np.random.random((10, 10)) From 1bd3ad5e30af6c8adf1931dceb63e384832de6f2 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Thu, 13 Jun 2024 12:29:38 -0700 Subject: [PATCH 38/48] assert output path --- alphadia/outputtransform.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py index 73e77b7f..270580e0 100644 --- a/alphadia/outputtransform.py +++ b/alphadia/outputtransform.py @@ -388,10 +388,9 @@ def build_transfer_model(self): transfer_lib_path = os.path.join( self.output_folder, f"{self.TRANSFER_OUTPUT}.hdf" ) - if not os.path.exists: - raise ValueError( - f"Transfer learning library was not found at {self.TRANSFER_OUTPUT}.hdf. Did you enable library generation?" - ) + assert os.path.exists( + transfer_lib_path + ), f"Transfer library not found at {transfer_lib_path}, did you enable library generation?" transfer_lib = SpecLibBase() transfer_lib.load_hdf( From f16f0b8d465f575cedf07f885e53b487b796329d Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Thu, 13 Jun 2024 12:40:45 -0700 Subject: [PATCH 39/48] check torch backends --- alphadia/utils.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/alphadia/utils.py b/alphadia/utils.py index 6bb4eb85..db113a0c 100644 --- a/alphadia/utils.py +++ b/alphadia/utils.py @@ -4,6 +4,9 @@ import typing import re import platform +import torch + +logger = logging.getLogger() # alphadia imports @@ -39,7 +42,12 @@ def get_torch_device(use_gpu: bool = False): device = "cpu" if use_gpu: - device = "mps" if platform.system() == "Darwin" else "gpu" + if platform.system() == "Darwin": + device = "mps" if torch.backends.mps.is_available() else "cpu" + else: + device = "gpu" if torch.cuda.is_available() else "cpu" + + logger.info(f"Device set to {device}") return device From c7a26e863196859c7b82fea3985db95ea2bd309c Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Thu, 13 Jun 2024 14:53:08 -0700 Subject: [PATCH 40/48] fix testcase --- tests/unit_tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit_tests/test_utils.py b/tests/unit_tests/test_utils.py index cdc05816..df46e188 100644 --- a/tests/unit_tests/test_utils.py +++ b/tests/unit_tests/test_utils.py @@ -29,7 +29,7 @@ def test_get_torch_device(use_gpu): device = get_torch_device(use_gpu) # then - assert device in ["gpu", "mps"] if use_gpu else "cpu" + assert device in ["gpu", "mps", "cpu"] def test_amean0(): From 73ba9fb8ca267708fdf9cfef1dd9c8a0003f6211 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Thu, 13 Jun 2024 15:29:06 -0700 Subject: [PATCH 41/48] make tl accessible from gui --- gui/workflows/PeptideCentric.v1.json | 161 ++++++++++++++++++++++++++- 1 file changed, 159 insertions(+), 2 deletions(-) diff --git a/gui/workflows/PeptideCentric.v1.json b/gui/workflows/PeptideCentric.v1.json index b891774c..1946b069 100644 --- a/gui/workflows/PeptideCentric.v1.json +++ b/gui/workflows/PeptideCentric.v1.json @@ -224,7 +224,7 @@ { "id": "instrument", "name": "Instrument", - "value": "Fusion", + "value": "Lumos", "description": "Instrument used for ms2 spectrum prediction.", "type": "dropdown", "options": [ @@ -232,7 +232,7 @@ "QE", "timsTOF", "SciexTOF", - "Fusion", + "Lumos", "Eclipse", "Velos", "Elite", @@ -523,6 +523,163 @@ "type": "boolean" } ] + }, + { + "id": "transfer_library", + "name": "Transfer Library", + "hidden": false, + "parameters": [ + { + "id": "enabled", + "name": "Enabled", + "value": false, + "description": "If true, transfer learnin training dataset is created.", + "type": "boolean" + }, + { + "id": "fragment_types", + "name": "Fragment types", + "value": "b;y", + "description": "Semicolon separated list of fragment types to include in the library. \n possible values are 'a', 'b', 'c', 'x', 'y', 'z'", + "type": "string" + }, + { + "id": "max_charge", + "name": "Maximum charge", + "value": 2, + "description": "Maximum charge for fragments.", + "type": "integer" + }, + { + "id": "top_k_samples", + "name": "Top k samples", + "value": 3, + "description": "If a given precursor appears multiple times in an experiment, only the top_k_samples with the highest scores are included in the library.", + "type": "integer" + }, + { + "id": "norm_delta_max", + "name": "Norm delta max", + "value": true, + "description": "Perform advanced rt calibration. \nIf set to false retention times will be normalised by the maximum retention time observed in the experiment. \nIf set to true, a combination of maximum normalisation and deviation from the calibration curve will be used.", + "type": "boolean" + }, + { + "id": "precursor_correlation_cutoff", + "name": "Precursor correlation cutoff", + "value": 0.5, + "description": "Use only precursors for ms2 training with a median XIC correlation above this threshold.", + "type": "float" + }, + { + "id": "fragment_correlation_ratio", + "name": "Fragment correlation ratio", + "value": 0.75, + "description": "Include only fragments with a XIC correlation at least 0.75 of the median for all fragments.", + "type": "float" + } + ] + }, + { + "id": "transfer_learning", + "name": "Transfer Learning", + "hidden": false, + "parameters": [ + { + "id": "enabled", + "name": "Enabled", + "value": false, + "description": "If true, a custom peptdeep model will be created using the transfer learned library.", + "type": "boolean" + }, + { + "id": "batch_size", + "name": "Batch size", + "value": 2000, + "description": "Number of precursors per batch.", + "type": "integer" + }, + { + "id": "max_lr", + "name": "Maximum learning rate", + "value": 0.0001, + "description": "Maximum learning rate per batch. \nThe maximum learning rate will be reached after a warmup phase and decreased using a plateau scheduler.", + "type": "float" + }, + { + "id": "train_ratio", + "name": "Train ratio", + "value": 0.8, + "description": "TODO remove and replaced by fixed 70:20:10 split", + "type": "float" + }, + { + "id": "test_interval", + "name": "Test interval", + "value": 1, + "description": "Test every n intervals.", + "type": "integer" + }, + { + "id": "lr_patience", + "name": "Learning rate patience", + "value": 3, + "description": "Learning rate patience after which the lr will be halved.", + "type": "integer" + }, + { + "id": "minimum_psms", + "name": "Minimum precursor number", + "value": 10000, + "description": "Minimum precursor number to perform transfer learning.", + "type": "integer" + }, + { + "id": "epochs", + "name": "Number of epochs", + "value": 51, + "description": "Maximum number of epochs.", + "type": "integer" + }, + { + "id": "warmup_epochs", + "name": "Warmup epochs", + "value": 5, + "description": "Number of warmup epochs during which the lr is ramped up.", + "type": "integer" + }, + { + "id": "nce", + "name": "Normalized collision energy", + "value": 25, + "description": "Normalised collision energy encoded during training.", + "type": "float" + }, + { + "id": "instrument", + "name": "Instrument", + "value": "Lumos", + "description": "Instrument type encoded during training. \nThe same instrument type must be used for prediction using the trained model.", + "type": "dropdown", + "options": [ + "Astral", + "QE", + "timsTOF", + "SciexTOF", + "Lumos", + "Eclipse", + "Velos", + "Elite", + "OrbitrapTribrid", + "ThermoTribrid", + "QE+", + "QEHF", + "QEHFX", + "Exploris", + "Exploris480" + ] + } + ] } ] } From a10d8740fe54790bac4615219e343dc35fce4b2c Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Thu, 13 Jun 2024 15:30:16 -0700 Subject: [PATCH 42/48] hidden by default --- gui/workflows/PeptideCentric.v1.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gui/workflows/PeptideCentric.v1.json b/gui/workflows/PeptideCentric.v1.json index 1946b069..2ba11824 100644 --- a/gui/workflows/PeptideCentric.v1.json +++ b/gui/workflows/PeptideCentric.v1.json @@ -527,7 +527,7 @@ { "id": "transfer_library", "name": "Transfer Library", - "hidden": false, + "hidden": true, "parameters": [ { "id": "enabled", @@ -583,7 +583,7 @@ { "id": "transfer_learning", "name": "Transfer Learning", - "hidden": false, + "hidden": true, "parameters": [ { "id": "enabled", From 8e8378ac474b6d9c059c76914ed89c6fd978b6fd Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Fri, 14 Jun 2024 09:48:58 +0200 Subject: [PATCH 43/48] BUG: fix issue with logging in python <3.11 --- alphadia/planning.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/alphadia/planning.py b/alphadia/planning.py index c2f2e976..51940f46 100644 --- a/alphadia/planning.py +++ b/alphadia/planning.py @@ -125,10 +125,8 @@ def __init__( # set log level level_to_set = self.config["general"]["log_level"] - if (level_code := logging.getLevelNamesMapping().get(level_to_set)) is None: - logger.error(f"Setting logging to unknown level {level_to_set}") - else: - logger.setLevel(level_code) + level_code = logging.getLevelName(level_to_set) + logger.setLevel(level_code) self.load_library() From e71cc4ffddf493d347cfe0676d8bea0642682ebd Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Fri, 14 Jun 2024 09:49:17 +0200 Subject: [PATCH 44/48] CHORE: nonzero exit status on error --- alphadia/cli.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/alphadia/cli.py b/alphadia/cli.py index b0077a4d..8bf60b15 100644 --- a/alphadia/cli.py +++ b/alphadia/cli.py @@ -2,6 +2,8 @@ # native imports import logging +import sys + import yaml import os import re @@ -345,3 +347,4 @@ def run(*args, **kwargs): logger.info(traceback.format_exc()) logger.error(e) + sys.exit(1) From b74b79e77ddf841b9ef5388a301db15a7c691fca Mon Sep 17 00:00:00 2001 From: mschwoerer <82171591+mschwoer@users.noreply.github.com> Date: Fri, 14 Jun 2024 09:50:05 +0200 Subject: [PATCH 45/48] CHORE: get rid of dots in log messages --- alphadia/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alphadia/cli.py b/alphadia/cli.py index 8bf60b15..bd1009c9 100644 --- a/alphadia/cli.py +++ b/alphadia/cli.py @@ -316,13 +316,13 @@ def run(*args, **kwargs): for f in raw_path_list: logger.progress(f" {os.path.basename(f)}") - logger.progress(f"Using library: {library_path}.") + logger.progress(f"Using library: {library_path}") logger.progress(f"Using {len(fasta_path_list)} fasta files:") for f in fasta_path_list: logger.progress(f" {f}") - logger.progress(f"Saving output to {output_directory}.") + logger.progress(f"Saving output to: {output_directory}") try: import matplotlib From 26e3ae13caf1bf544cce416acb042c275ec31223 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Fri, 14 Jun 2024 13:51:42 -0700 Subject: [PATCH 46/48] remove minimum_psms --- alphadia/constants/default.yaml | 3 --- alphadia/transferlearning/train.py | 3 +-- gui/workflows/PeptideCentric.v1.json | 7 ------- 3 files changed, 1 insertion(+), 12 deletions(-) diff --git a/alphadia/constants/default.yaml b/alphadia/constants/default.yaml index 86194da2..025c1bd8 100644 --- a/alphadia/constants/default.yaml +++ b/alphadia/constants/default.yaml @@ -208,9 +208,6 @@ transfer_learning: # learning rate patience after which the lr will be halved lr_patience: 3 - # minimum precursor number to perform transfer learning - minimum_psms: 10000 - # maximum number of epochs epochs: 51 diff --git a/alphadia/transferlearning/train.py b/alphadia/transferlearning/train.py index c558619b..23a80888 100644 --- a/alphadia/transferlearning/train.py +++ b/alphadia/transferlearning/train.py @@ -25,6 +25,7 @@ import logging from alphadia.workflow import reporting + logger = logging.getLogger() settings = { @@ -35,7 +36,6 @@ "test_interval": 1, "lr_patience": 3, # --------- Our settings ------------ - "minimum_psms": 1200, "epochs": 51, "warmup_epochs": 5, # -------------------------- @@ -278,7 +278,6 @@ def _order_intensities( """ reordered = unordered_frag_df.copy() for i in tqdm(range(len(reordered_precursor_df))): - new_start_idx = reordered_precursor_df.iloc[i]["frag_start_idx"] new_end_idx = reordered_precursor_df.iloc[i]["frag_stop_idx"] diff --git a/gui/workflows/PeptideCentric.v1.json b/gui/workflows/PeptideCentric.v1.json index 2ba11824..312f319a 100644 --- a/gui/workflows/PeptideCentric.v1.json +++ b/gui/workflows/PeptideCentric.v1.json @@ -627,13 +627,6 @@ "description": "Learning rate patience after which the lr will be halved.", "type": "integer" }, - { - "id": "minimum_psms", - "name": "Minimum precursor number", - "value": 10000, - "description": "Minimum precursor number to perform transfer learning.", - "type": "integer" - }, { "id": "epochs", "name": "Number of epochs", From b278530cfc537c1fca04ecc46019856beb2513d7 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Fri, 14 Jun 2024 13:56:30 -0700 Subject: [PATCH 47/48] raise alphabase version --- requirements/requirements.txt | 2 +- requirements/requirements_loose.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index c505c4e4..deb91af3 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -5,7 +5,7 @@ numba==0.59.1 argparse==1.4.0 alpharaw==0.4.5 alphatims==1.0.8 -alphabase==1.2.4 +alphabase==1.2.5 peptdeep==1.2.1 progressbar==2.5 neptune==1.10.4 diff --git a/requirements/requirements_loose.txt b/requirements/requirements_loose.txt index 482be66c..5ce63f4c 100644 --- a/requirements/requirements_loose.txt +++ b/requirements/requirements_loose.txt @@ -4,7 +4,7 @@ numba argparse alpharaw>=0.3.1 # test: tolerate_version alphatims -alphabase>=1.2.4 # test: tolerate_version +alphabase>=1.2.5 # test: tolerate_version peptdeep>=1.2.1 # test: tolerate_version progressbar neptune From 8b4cd6962e5e4a218ac8603a7dcd0f984800e7c4 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Fri, 14 Jun 2024 15:05:36 -0700 Subject: [PATCH 48/48] =?UTF-8?q?Bump=20version:=201.6.2=20=E2=86=92=201.7?= =?UTF-8?q?.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- alphadia/__init__.py | 2 +- docs/index.md | 2 +- gui/package.json | 2 +- gui/src/main/modules/profile.js | 2 +- misc/.bumpversion.cfg | 2 +- release/macos/build_backend_macos.sh | 2 +- release/macos/build_pkg_macos.sh | 2 +- release/macos/build_zip_macos.sh | 2 +- release/macos/distribution.xml | 2 +- release/macos/info.plist | 4 ++-- release/windows/alphadia_innoinstaller.iss | 4 ++-- release/windows/build_backend.ps1 | 2 +- 12 files changed, 14 insertions(+), 14 deletions(-) diff --git a/alphadia/__init__.py b/alphadia/__init__.py index 363a78dc..25e7bf58 100644 --- a/alphadia/__init__.py +++ b/alphadia/__init__.py @@ -1,3 +1,3 @@ #!python -__version__ = "1.6.2" +__version__ = "1.7.0" diff --git a/docs/index.md b/docs/index.md index 327dae11..88652f6d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,6 +1,6 @@ # AlphaDIA Documentation -**Version:** 1.6.2 | [Github](https://github.com/MannLabs/alphadia) +**Version:** 1.7.0 | [Github](https://github.com/MannLabs/alphadia) Open-source DIA search engine built with the alphaX ecosystem. Built with [alpharaw](https://github.com/MannLabs/alpharaw) and [alphatims](https://github.com/MannLabs/alphatims) for raw file acces. Spectral libraries are predicted with [peptdeep](https://github.com/MannLabs/alphapeptdeep) and managed by [alphabase](https://github.com/MannLabs/alphabase). Quantification is powered by [directLFQ](https://github.com/MannLabs/directLFQ). diff --git a/gui/package.json b/gui/package.json index bd21cbba..22f59176 100644 --- a/gui/package.json +++ b/gui/package.json @@ -1,7 +1,7 @@ { "name": "alphadia", "productName": "alphadia-gui", - "version": "1.6.2", + "version": "1.7.0", "description": "Graphical user interface for DIA data analysis", "main": "dist/electron.js", "homepage": "./", diff --git a/gui/src/main/modules/profile.js b/gui/src/main/modules/profile.js index e92663f6..87a52193 100644 --- a/gui/src/main/modules/profile.js +++ b/gui/src/main/modules/profile.js @@ -7,7 +7,7 @@ const { dialog } = require('electron') const Profile = class { config = { - "version": "1.6.2", + "version": "1.7.0", "conda": { "envName": "alpha", "path": "" diff --git a/misc/.bumpversion.cfg b/misc/.bumpversion.cfg index 33021c64..9ab81833 100644 --- a/misc/.bumpversion.cfg +++ b/misc/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.6.2 +current_version = 1.7.0 commit = True tag = True parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? diff --git a/release/macos/build_backend_macos.sh b/release/macos/build_backend_macos.sh index 6a8b4d14..c8eff216 100755 --- a/release/macos/build_backend_macos.sh +++ b/release/macos/build_backend_macos.sh @@ -6,7 +6,7 @@ python -c "from huggingface_hub import get_full_repo_name; print('success')" pip install build python -m build -pip install "dist/alphadia-1.6.2-py3-none-any.whl[stable]" +pip install "dist/alphadia-1.7.0-py3-none-any.whl[stable]" # Creating the stand-alone pyinstaller folder pip install pyinstaller diff --git a/release/macos/build_pkg_macos.sh b/release/macos/build_pkg_macos.sh index f8254a55..d56e06b3 100755 --- a/release/macos/build_pkg_macos.sh +++ b/release/macos/build_pkg_macos.sh @@ -2,7 +2,7 @@ # Set up package name and version PACKAGE_NAME="alphadia" -PACKAGE_VERSION="1.6.2" +PACKAGE_VERSION="1.7.0" ARCH=$(uname -m) if [ "$ARCH" == "x86_64" ]; then diff --git a/release/macos/build_zip_macos.sh b/release/macos/build_zip_macos.sh index 886b5b24..6a636b54 100755 --- a/release/macos/build_zip_macos.sh +++ b/release/macos/build_zip_macos.sh @@ -2,7 +2,7 @@ # Set up package name and version PACKAGE_NAME="alphadia" -PACKAGE_VERSION="1.6.2" +PACKAGE_VERSION="1.7.0" ARCH=$(uname -m) if [ "$ARCH" == "x86_64" ]; then diff --git a/release/macos/distribution.xml b/release/macos/distribution.xml index 095ceb88..73121631 100644 --- a/release/macos/distribution.xml +++ b/release/macos/distribution.xml @@ -1,6 +1,6 @@ - alphaDIA 1.6.2 + alphaDIA 1.7.0 diff --git a/release/macos/info.plist b/release/macos/info.plist index 2d5e4643..0930aa58 100644 --- a/release/macos/info.plist +++ b/release/macos/info.plist @@ -9,9 +9,9 @@ CFBundleIconFile alphadia.icns CFBundleIdentifier - alphadia.1.6.2 + alphadia.1.7.0 CFBundleShortVersionString - 1.6.2 + 1.7.0 CFBundleInfoDictionaryVersion 6.0 CFBundleName diff --git a/release/windows/alphadia_innoinstaller.iss b/release/windows/alphadia_innoinstaller.iss index ad02b87c..10e3359c 100644 --- a/release/windows/alphadia_innoinstaller.iss +++ b/release/windows/alphadia_innoinstaller.iss @@ -2,7 +2,7 @@ ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES! #define MyAppName "AlphaDIA" -#define MyAppVersion "1.6.2" +#define MyAppVersion "1.7.0" #define MyAppPublisher "Max Planck Institute of Biochemistry, Mann Labs" #define MyAppURL "https://github.com/MannLabs/alphadia" #define MyAppExeName "alphadia-gui.exe" @@ -25,7 +25,7 @@ LicenseFile=..\..\LICENSE.txt PrivilegesRequired=lowest PrivilegesRequiredOverridesAllowed=dialog OutputDir=..\..\dist -OutputBaseFilename=alphadia-1.6.2-win-x64 +OutputBaseFilename=alphadia-1.7.0-win-x64 SetupIconFile=..\logos\alphadia.ico Compression=lzma SolidCompression=yes diff --git a/release/windows/build_backend.ps1 b/release/windows/build_backend.ps1 index dae15414..07705399 100644 --- a/release/windows/build_backend.ps1 +++ b/release/windows/build_backend.ps1 @@ -6,7 +6,7 @@ python -c 'from huggingface_hub import get_full_repo_name; print("success")' pip install build python -m build -pip install "dist/alphadia-1.6.2-py3-none-any.whl[stable]" +pip install "dist/alphadia-1.7.0-py3-none-any.whl[stable]" # Creating the stand-alone pyinstaller folder pip install pyinstaller tbb