From 1fb223812d8284223d6c3c2cba4f620d32c050f7 Mon Sep 17 00:00:00 2001
From: Vincenth Brennsteiner
 <vincenthbrennsteiner@Vincenths-MacBook-Pro-2.local>
Date: Mon, 11 Mar 2024 00:09:58 +0100
Subject: [PATCH 01/48] open notebook to compare grouping between search
 engines

---
 nbs/debug/dev_grouping_comparison.ipynb | 92 +++++++++++++++++++++++++
 1 file changed, 92 insertions(+)
 create mode 100644 nbs/debug/dev_grouping_comparison.ipynb

diff --git a/nbs/debug/dev_grouping_comparison.ipynb b/nbs/debug/dev_grouping_comparison.ipynb
new file mode 100644
index 00000000..de8d1f51
--- /dev/null
+++ b/nbs/debug/dev_grouping_comparison.ipynb
@@ -0,0 +1,92 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Main Notebook Aim"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The aim of this notebook is to delineate differences in protein inference between AlphaDIA, Spectronaut and DIANN. Heuristic and Strict Parsimony grouping of either of these engines shall be evaluated on a HeLa QC dataset (Orbitrap Astral). Aims are the following:\n",
+    "\n",
+    "- Explain apparent differences in protein inference between AlphaDIA - DIANN: Strict Parsimony\n",
+    "- Explain apparent differences in protein inference between AlphaDIA - Spectronaut: Strict Parsimony\n",
+    "- Explain apparent differences in protein inference between AlphaDIA - DIANN: Highly Heuristic\n",
+    "- Explain apparent differences in protein inference between AlphaDIA - Spectronaut: Highly Heuristic\n",
+    "\n",
+    "Special focus lies on edge cases (subsumable, circular, non-deterministic) grouping situations."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Inspect QC file and generate an overview of the data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load AlphaDIA results for both grouping methods"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load Spectronaut results for both grouping methods"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load DIA-NN results for both grouping methods"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Visualize differences between the datasets for each method"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Closer investigation of divergent grouping results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 9147995b6e2b07d12580d0223e51515adb1897ce Mon Sep 17 00:00:00 2001
From: Vincenth Brennsteiner
 <vincenthbrennsteiner@Vincenths-MacBook-Pro-2.local>
Date: Thu, 4 Apr 2024 16:08:59 +0200
Subject: [PATCH 02/48] formulate approach to compare grouping between
 different search engines

---
 nbs/debug/dev_grouping_comparison.ipynb | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/nbs/debug/dev_grouping_comparison.ipynb b/nbs/debug/dev_grouping_comparison.ipynb
index de8d1f51..f29b54ee 100644
--- a/nbs/debug/dev_grouping_comparison.ipynb
+++ b/nbs/debug/dev_grouping_comparison.ipynb
@@ -18,7 +18,14 @@
     "- Explain apparent differences in protein inference between AlphaDIA - DIANN: Highly Heuristic\n",
     "- Explain apparent differences in protein inference between AlphaDIA - Spectronaut: Highly Heuristic\n",
     "\n",
-    "Special focus lies on edge cases (subsumable, circular, non-deterministic) grouping situations."
+    "Special focus lies on edge cases (subsumable, circular, non-deterministic) grouping situations.\n",
+    "\n",
+    "Algorithmically, this is a challenging task since the different search engines operate with different, closed source codebases. Instead, opt for a post-hoc approach:\n",
+    "\n",
+    "1. From each search engine, obtain a peptide and protein-group level output file for the same rawfiles processed with the same fasta/spectral library\n",
+    "2. Parse outputs such that each precursor is associated with its genes, with fanned out rows for peptides associating with more than one gene\n",
+    "3. Iterate over each gene in each result table and select those with exactly identical precursor sets. Mark these genes as \"shared\", with the definition: \"A shared gene is a gene whose associated precursors are the same from each search engine\"\n",
+    "4. Then, select groups that consist only of shared genes. While the gene-precursor association is clearly determined by the fasta, the manner in which genes are combined into groups may differ between search engines. Using these cases, we can examine how different search engines handle grouping starting from exactly the same precursors."
    ]
   },
   {
@@ -27,7 +34,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Inspect QC file and generate an overview of the data"
+    "# Inspect QC file and generate an overview of the data "
    ]
   },
   {

From eb393cdc532e32ad3dfea6c4abec7285bf23e08b Mon Sep 17 00:00:00 2001
From: Vincenth Brennsteiner
 <vincenthbrennsteiner@Vincenths-MacBook-Pro-2.local>
Date: Tue, 9 Apr 2024 15:49:03 +0200
Subject: [PATCH 03/48] file loading for post-hoc comparison of AlphaDIA,
 DiaNN, Spectronaut protein inference

---
 nbs/debug/dev_grouping_comparison.ipynb | 1410 ++++++++++++++++++++++-
 1 file changed, 1407 insertions(+), 3 deletions(-)

diff --git a/nbs/debug/dev_grouping_comparison.ipynb b/nbs/debug/dev_grouping_comparison.ipynb
index f29b54ee..fa381c51 100644
--- a/nbs/debug/dev_grouping_comparison.ipynb
+++ b/nbs/debug/dev_grouping_comparison.ipynb
@@ -30,11 +30,1401 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Inspect QC file and generate an overview of the data "
+    "# Inspect QC files and generate an overview of the data \n",
+    "\n",
+    "import os \n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "# Utility functions\n",
+    "# to be sure and to enable parsing without user input, infer cohort from file itself\n",
+    "def infer_engine(\n",
+    "        headers : list,\n",
+    "        alphadia_pe_columns : list = ['base_width_mobility', 'base_width_rt', 'mono_ms1_intensity'],\n",
+    "        alphadia_pg_columns : list = ['pg'],\n",
+    "        diann_pe_columns : list = ['File.Name', 'Run', 'Protein.Group', 'Protein.Ids'],\n",
+    "        diann_pg_columns : list = ['First.Protein.Description'],\n",
+    "        spectronaut_pe_columns : list = ['PG.ProteinGroups', 'PG.ProteinAccessions', 'PG.Genes', 'PG.UniProtIds'],\n",
+    "        spectronaut_pg_columns : list = ['PG.NrOfStrippedSequencesIdentified (Experiment-wide)', 'PG.NrOfPrecursorsIdentified (Experiment-wide)'],\n",
+    "):\n",
+    "    engine = []\n",
+    "    level = []\n",
+    "    if set(alphadia_pe_columns).issubset(headers):\n",
+    "        engine.append('Alphadia')\n",
+    "        level.append('peptide')\n",
+    "    elif set (alphadia_pg_columns).issubset(headers):\n",
+    "        engine.append('Alphadia')\n",
+    "        level.append('protein_group')\n",
+    "    elif set(diann_pe_columns).issubset(headers):\n",
+    "        engine.append('DiaNN')\n",
+    "        level.append('peptide')\n",
+    "    elif set(diann_pg_columns).issubset(headers):\n",
+    "        engine.append('DiaNN')\n",
+    "        level.append('protein_group')\n",
+    "    elif set(spectronaut_pe_columns).issubset(headers):\n",
+    "        engine.append('Spectronaut')\n",
+    "        level.append('peptide')\n",
+    "    elif set(spectronaut_pg_columns).issubset(headers):\n",
+    "        engine.append('Spectronaut')\n",
+    "        level.append('protein_group')\n",
+    "    else:\n",
+    "        raise ValueError('Could not infer search engine from column names')\n",
+    "\n",
+    "    if len(engine) > 1:\n",
+    "        raise ValueError('More than one search engine detected')\n",
+    "\n",
+    "    return engine[0], level[0]\n",
+    "\n",
+    "# parse result files to usable format\n",
+    "\n",
+    "def parse_alphadia_pe_table(\n",
+    "        pe_table : pd.DataFrame,\n",
+    "):\n",
+    "    return pe_table\n",
+    "\n",
+    "def parse_alphadia_pg_table(\n",
+    "        pg_table : pd.DataFrame,\n",
+    "):\n",
+    "    return pg_table\n",
+    "\n",
+    "def parse_diann_pe_table(\n",
+    "        pe_table : pd.DataFrame,\n",
+    "):\n",
+    "    return pe_table\n",
+    "\n",
+    "def parse_diann_pg_table(\n",
+    "        pg_table : pd.DataFrame,\n",
+    "):\n",
+    "    return pg_table\n",
+    "\n",
+    "def parse_spectronaut_pe_table(\n",
+    "        pe_table : pd.DataFrame,\n",
+    "):\n",
+    "    return pe_table\n",
+    "\n",
+    "def parse_spectronaut_pg_table(\n",
+    "        pg_table : pd.DataFrame,\n",
+    "):\n",
+    "    return pg_table\n",
+    "\n",
+    "# Higher level wrapper to read and parse peptide and protein group level results table\n",
+    "\n",
+    "def read_and_parse_peptide_table(\n",
+    "        input_table_path : str,\n",
+    "        input_table_filename : str,\n",
+    ") -> pd.DataFrame:\n",
+    "    \"\"\"Read and parse results table from respective search engine output. First step in analysing QC data.\n",
+    "    \n",
+    "    Parameters:\n",
+    "    ----------\n",
+    "\n",
+    "    input_table_path : str\n",
+    "        Path to the peptide level results table\n",
+    "\n",
+    "    input_table_filename : str\n",
+    "        Filename of the peptide level results table\n",
+    "\n",
+    "    Returns:\n",
+    "    -------\n",
+    "\n",
+    "    out_table : pd.DataFrame\n",
+    "        DataFrame containing the peptide level results\n",
+    "    \n",
+    "    \"\"\"\n",
+    "\n",
+    "    # read peptide level table\n",
+    "    input_table = pd.read_csv(\n",
+    "        os.path.join(input_table_path, input_table_filename),\n",
+    "        sep = '\\t'\n",
+    "    )\n",
+    "    \n",
+    "    engine, level = infer_engine(input_table.columns.tolist())\n",
+    "\n",
+    "    if level == 'peptide':\n",
+    "        if engine == 'Alphadia':\n",
+    "            out_table = parse_alphadia_pe_table(input_table)\n",
+    "        elif engine == 'DiaNN':\n",
+    "            out_table = parse_diann_pe_table(input_table)\n",
+    "        elif engine == 'Spectronaut':\n",
+    "            out_table = parse_spectronaut_pe_table(input_table)\n",
+    "    elif level == 'protein_group':\n",
+    "        if engine == 'Alphadia':\n",
+    "            out_table = parse_alphadia_pg_table(input_table)\n",
+    "        elif engine == 'DiaNN':\n",
+    "            out_table = parse_diann_pg_table(input_table)\n",
+    "        elif engine == 'Spectronaut':\n",
+    "            out_table = parse_spectronaut_pg_table(input_table)\n",
+    "\n",
+    "    return out_table, engine"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>base_width_mobility</th>\n",
+       "      <th>base_width_rt</th>\n",
+       "      <th>rt_observed</th>\n",
+       "      <th>mobility_observed</th>\n",
+       "      <th>mono_ms1_intensity</th>\n",
+       "      <th>top_ms1_intensity</th>\n",
+       "      <th>sum_ms1_intensity</th>\n",
+       "      <th>weighted_ms1_intensity</th>\n",
+       "      <th>weighted_mass_deviation</th>\n",
+       "      <th>weighted_mass_error</th>\n",
+       "      <th>...</th>\n",
+       "      <th>_candidate_idx</th>\n",
+       "      <th>valid</th>\n",
+       "      <th>candidate_idx</th>\n",
+       "      <th>run</th>\n",
+       "      <th>mod_seq_hash</th>\n",
+       "      <th>mod_seq_charge_hash</th>\n",
+       "      <th>pg_master</th>\n",
+       "      <th>pg</th>\n",
+       "      <th>pg_qval</th>\n",
+       "      <th>intensity</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>10.728760</td>\n",
+       "      <td>297.96973</td>\n",
+       "      <td>0.000001</td>\n",
+       "      <td>5833644.50</td>\n",
+       "      <td>5833644.50</td>\n",
+       "      <td>13318241.0</td>\n",
+       "      <td>4630847.0</td>\n",
+       "      <td>0.046064</td>\n",
+       "      <td>0.046064</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3810240</td>\n",
+       "      <td>True</td>\n",
+       "      <td>3810240</td>\n",
+       "      <td>20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...</td>\n",
+       "      <td>6684877181925296581</td>\n",
+       "      <td>6684877181925296583</td>\n",
+       "      <td>O43237</td>\n",
+       "      <td>O43237</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.027229e+06</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>10.723328</td>\n",
+       "      <td>288.75705</td>\n",
+       "      <td>0.000001</td>\n",
+       "      <td>13404818.00</td>\n",
+       "      <td>13404818.00</td>\n",
+       "      <td>33200900.0</td>\n",
+       "      <td>11027869.0</td>\n",
+       "      <td>-0.187766</td>\n",
+       "      <td>0.187766</td>\n",
+       "      <td>...</td>\n",
+       "      <td>6107025</td>\n",
+       "      <td>True</td>\n",
+       "      <td>6107025</td>\n",
+       "      <td>20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...</td>\n",
+       "      <td>6676861052106421843</td>\n",
+       "      <td>6676861052106421845</td>\n",
+       "      <td>Q04323</td>\n",
+       "      <td>Q04323</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.041206e+06</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>16.940490</td>\n",
+       "      <td>432.81763</td>\n",
+       "      <td>0.000001</td>\n",
+       "      <td>1033554.44</td>\n",
+       "      <td>1033554.44</td>\n",
+       "      <td>3246347.8</td>\n",
+       "      <td>1095333.5</td>\n",
+       "      <td>-0.850223</td>\n",
+       "      <td>0.850223</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4568806</td>\n",
+       "      <td>True</td>\n",
+       "      <td>4568806</td>\n",
+       "      <td>20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...</td>\n",
+       "      <td>2677431574892707069</td>\n",
+       "      <td>2677431574892707071</td>\n",
+       "      <td>O60264</td>\n",
+       "      <td>O60264</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>4.600186e+06</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>10.726410</td>\n",
+       "      <td>425.16693</td>\n",
+       "      <td>0.000001</td>\n",
+       "      <td>2843770.00</td>\n",
+       "      <td>2843770.00</td>\n",
+       "      <td>7460814.0</td>\n",
+       "      <td>2434136.2</td>\n",
+       "      <td>-0.793379</td>\n",
+       "      <td>0.793379</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4384664</td>\n",
+       "      <td>True</td>\n",
+       "      <td>4384664</td>\n",
+       "      <td>20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...</td>\n",
+       "      <td>4619871039799467150</td>\n",
+       "      <td>4619871039799467152</td>\n",
+       "      <td>Q8IWE2</td>\n",
+       "      <td>Q8IWE2</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2.069930e+05</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>38.312042</td>\n",
+       "      <td>481.91810</td>\n",
+       "      <td>0.000001</td>\n",
+       "      <td>4392369.00</td>\n",
+       "      <td>4392369.00</td>\n",
+       "      <td>14179063.0</td>\n",
+       "      <td>4668329.5</td>\n",
+       "      <td>0.074734</td>\n",
+       "      <td>0.074734</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3785744</td>\n",
+       "      <td>True</td>\n",
+       "      <td>3785744</td>\n",
+       "      <td>20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...</td>\n",
+       "      <td>14520620509815008418</td>\n",
+       "      <td>14520620509815008420</td>\n",
+       "      <td>Q14978</td>\n",
+       "      <td>Q14978</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3.119783e+06</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 91 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   base_width_mobility  base_width_rt  rt_observed  mobility_observed  \\\n",
+       "0                  0.0      10.728760    297.96973           0.000001   \n",
+       "1                  0.0      10.723328    288.75705           0.000001   \n",
+       "2                  0.0      16.940490    432.81763           0.000001   \n",
+       "3                  0.0      10.726410    425.16693           0.000001   \n",
+       "4                  0.0      38.312042    481.91810           0.000001   \n",
+       "\n",
+       "   mono_ms1_intensity  top_ms1_intensity  sum_ms1_intensity  \\\n",
+       "0          5833644.50         5833644.50         13318241.0   \n",
+       "1         13404818.00        13404818.00         33200900.0   \n",
+       "2          1033554.44         1033554.44          3246347.8   \n",
+       "3          2843770.00         2843770.00          7460814.0   \n",
+       "4          4392369.00         4392369.00         14179063.0   \n",
+       "\n",
+       "   weighted_ms1_intensity  weighted_mass_deviation  weighted_mass_error  ...  \\\n",
+       "0               4630847.0                 0.046064             0.046064  ...   \n",
+       "1              11027869.0                -0.187766             0.187766  ...   \n",
+       "2               1095333.5                -0.850223             0.850223  ...   \n",
+       "3               2434136.2                -0.793379             0.793379  ...   \n",
+       "4               4668329.5                 0.074734             0.074734  ...   \n",
+       "\n",
+       "   _candidate_idx  valid  candidate_idx  \\\n",
+       "0         3810240   True        3810240   \n",
+       "1         6107025   True        6107025   \n",
+       "2         4568806   True        4568806   \n",
+       "3         4384664   True        4384664   \n",
+       "4         3785744   True        3785744   \n",
+       "\n",
+       "                                                 run          mod_seq_hash  \\\n",
+       "0  20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...   6684877181925296581   \n",
+       "1  20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...   6676861052106421843   \n",
+       "2  20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...   2677431574892707069   \n",
+       "3  20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...   4619871039799467150   \n",
+       "4  20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...  14520620509815008418   \n",
+       "\n",
+       "    mod_seq_charge_hash  pg_master      pg  pg_qval     intensity  \n",
+       "0   6684877181925296583     O43237  O43237      0.0  1.027229e+06  \n",
+       "1   6676861052106421845     Q04323  Q04323      0.0  1.041206e+06  \n",
+       "2   2677431574892707071     O60264  O60264      0.0  4.600186e+06  \n",
+       "3   4619871039799467152     Q8IWE2  Q8IWE2      0.0  2.069930e+05  \n",
+       "4  14520620509815008420     Q14978  Q14978      0.0  3.119783e+06  \n",
+       "\n",
+       "[5 rows x 91 columns]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>pg</th>\n",
+       "      <th>20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_14</th>\n",
+       "      <th>20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_15</th>\n",
+       "      <th>20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_16</th>\n",
+       "      <th>20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_17</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>A0A024RBG1</td>\n",
+       "      <td>27294.380913</td>\n",
+       "      <td>28892.140941</td>\n",
+       "      <td>27849.403037</td>\n",
+       "      <td>29332.977053</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>A0A096LP49</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>13362.565748</td>\n",
+       "      <td>18468.761155</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>A0A0B4J2D5</td>\n",
+       "      <td>304680.336728</td>\n",
+       "      <td>334097.520531</td>\n",
+       "      <td>294064.475715</td>\n",
+       "      <td>265216.624922</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>A0A0B4J2F0</td>\n",
+       "      <td>267612.886812</td>\n",
+       "      <td>298607.689343</td>\n",
+       "      <td>320374.321906</td>\n",
+       "      <td>326374.101157</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>A0A0B4J2F2</td>\n",
+       "      <td>76495.977143</td>\n",
+       "      <td>82719.929049</td>\n",
+       "      <td>63831.158999</td>\n",
+       "      <td>80179.393809</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           pg  20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_14  \\\n",
+       "0  A0A024RBG1                                       27294.380913           \n",
+       "1  A0A096LP49                                           0.000000           \n",
+       "2  A0A0B4J2D5                                      304680.336728           \n",
+       "3  A0A0B4J2F0                                      267612.886812           \n",
+       "4  A0A0B4J2F2                                       76495.977143           \n",
+       "\n",
+       "   20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_15  \\\n",
+       "0                                       28892.140941           \n",
+       "1                                       13362.565748           \n",
+       "2                                      334097.520531           \n",
+       "3                                      298607.689343           \n",
+       "4                                       82719.929049           \n",
+       "\n",
+       "   20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_16  \\\n",
+       "0                                       27849.403037           \n",
+       "1                                       18468.761155           \n",
+       "2                                      294064.475715           \n",
+       "3                                      320374.321906           \n",
+       "4                                       63831.158999           \n",
+       "\n",
+       "   20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_17  \n",
+       "0                                       29332.977053          \n",
+       "1                                           0.000000          \n",
+       "2                                      265216.624922          \n",
+       "3                                      326374.101157          \n",
+       "4                                       80179.393809          "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>File.Name</th>\n",
+       "      <th>Run</th>\n",
+       "      <th>Protein.Group</th>\n",
+       "      <th>Protein.Ids</th>\n",
+       "      <th>Protein.Names</th>\n",
+       "      <th>Genes</th>\n",
+       "      <th>PG.Quantity</th>\n",
+       "      <th>PG.Normalised</th>\n",
+       "      <th>PG.MaxLFQ</th>\n",
+       "      <th>Genes.Quantity</th>\n",
+       "      <th>...</th>\n",
+       "      <th>Decoy.Evidence</th>\n",
+       "      <th>Decoy.CScore</th>\n",
+       "      <th>Fragment.Quant.Raw</th>\n",
+       "      <th>Fragment.Quant.Corrected</th>\n",
+       "      <th>Fragment.Correlations</th>\n",
+       "      <th>MS2.Scan</th>\n",
+       "      <th>IM</th>\n",
+       "      <th>iIM</th>\n",
+       "      <th>Predicted.IM</th>\n",
+       "      <th>Predicted.iIM</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240...</td>\n",
+       "      <td>20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...</td>\n",
+       "      <td>P37108</td>\n",
+       "      <td>P37108</td>\n",
+       "      <td>SRP14_HUMAN</td>\n",
+       "      <td>SRP14</td>\n",
+       "      <td>2458760.0</td>\n",
+       "      <td>2407790.0</td>\n",
+       "      <td>2372520.0</td>\n",
+       "      <td>2458760.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.04939</td>\n",
+       "      <td>1.144500e-01</td>\n",
+       "      <td>0;4799.62;0;0;2219.65;401.484;2770;0;0;3364.71...</td>\n",
+       "      <td>0;4799.62;0;0;2219.65;401.484;2770;0;0;3364.71...</td>\n",
+       "      <td>0;0.85516;0;0;0.930818;0.0392701;0.629045;0;0;...</td>\n",
+       "      <td>133304</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240...</td>\n",
+       "      <td>20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...</td>\n",
+       "      <td>P37108</td>\n",
+       "      <td>P37108</td>\n",
+       "      <td>SRP14_HUMAN</td>\n",
+       "      <td>SRP14</td>\n",
+       "      <td>2429120.0</td>\n",
+       "      <td>2332930.0</td>\n",
+       "      <td>2412170.0</td>\n",
+       "      <td>2429120.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.00000</td>\n",
+       "      <td>-1.000000e+07</td>\n",
+       "      <td>1517.72;1104.9;1813.46;0;3090.83;0;1185.63;0;0...</td>\n",
+       "      <td>1517.72;1104.9;1813.46;0;3090.83;0;1185.63;0;0...</td>\n",
+       "      <td>0.512347;0.511267;0.755782;0;0.635567;0;0.3431...</td>\n",
+       "      <td>133004</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240...</td>\n",
+       "      <td>20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...</td>\n",
+       "      <td>P37108</td>\n",
+       "      <td>P37108</td>\n",
+       "      <td>SRP14_HUMAN</td>\n",
+       "      <td>SRP14</td>\n",
+       "      <td>2255580.0</td>\n",
+       "      <td>2329290.0</td>\n",
+       "      <td>2182340.0</td>\n",
+       "      <td>2255580.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.00000</td>\n",
+       "      <td>-1.000000e+07</td>\n",
+       "      <td>2287.07;458.234;1840.52;0;757.653;0;1665.65;27...</td>\n",
+       "      <td>2287.07;458.234;1840.52;0;757.653;0;1665.65;27...</td>\n",
+       "      <td>0.232198;0.258243;0.633966;0;0.942524;0;0.6651...</td>\n",
+       "      <td>133904</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240...</td>\n",
+       "      <td>20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...</td>\n",
+       "      <td>P37108</td>\n",
+       "      <td>P37108</td>\n",
+       "      <td>SRP14_HUMAN</td>\n",
+       "      <td>SRP14</td>\n",
+       "      <td>2217790.0</td>\n",
+       "      <td>2293100.0</td>\n",
+       "      <td>2402530.0</td>\n",
+       "      <td>2217790.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.00000</td>\n",
+       "      <td>-1.000000e+07</td>\n",
+       "      <td>814.006;2186.53;236.486;0;3221.79;559.551;2718...</td>\n",
+       "      <td>814.006;2186.53;236.486;0;3221.79;559.551;2718...</td>\n",
+       "      <td>0.290409;0.931915;0.699088;0;0.42669;0.18216;0...</td>\n",
+       "      <td>133904</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240...</td>\n",
+       "      <td>20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...</td>\n",
+       "      <td>P37108</td>\n",
+       "      <td>P37108</td>\n",
+       "      <td>SRP14_HUMAN</td>\n",
+       "      <td>SRP14</td>\n",
+       "      <td>2458760.0</td>\n",
+       "      <td>2407790.0</td>\n",
+       "      <td>2372520.0</td>\n",
+       "      <td>2458760.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.04939</td>\n",
+       "      <td>2.612290e-01</td>\n",
+       "      <td>3107.44;2308.01;1727.22;10473;228.697;330.737;...</td>\n",
+       "      <td>3107.44;2308.01;1727.22;10473;228.697;330.737;...</td>\n",
+       "      <td>0.848145;0.850794;0.679977;0.591514;0.403484;0...</td>\n",
+       "      <td>132955</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 57 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                           File.Name  \\\n",
+       "0  Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240...   \n",
+       "1  Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240...   \n",
+       "2  Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240...   \n",
+       "3  Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240...   \n",
+       "4  Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240...   \n",
+       "\n",
+       "                                                 Run Protein.Group  \\\n",
+       "0  20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...        P37108   \n",
+       "1  20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...        P37108   \n",
+       "2  20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...        P37108   \n",
+       "3  20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...        P37108   \n",
+       "4  20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng...        P37108   \n",
+       "\n",
+       "  Protein.Ids Protein.Names  Genes  PG.Quantity  PG.Normalised  PG.MaxLFQ  \\\n",
+       "0      P37108   SRP14_HUMAN  SRP14    2458760.0      2407790.0  2372520.0   \n",
+       "1      P37108   SRP14_HUMAN  SRP14    2429120.0      2332930.0  2412170.0   \n",
+       "2      P37108   SRP14_HUMAN  SRP14    2255580.0      2329290.0  2182340.0   \n",
+       "3      P37108   SRP14_HUMAN  SRP14    2217790.0      2293100.0  2402530.0   \n",
+       "4      P37108   SRP14_HUMAN  SRP14    2458760.0      2407790.0  2372520.0   \n",
+       "\n",
+       "   Genes.Quantity  ...  Decoy.Evidence  Decoy.CScore  \\\n",
+       "0       2458760.0  ...         1.04939  1.144500e-01   \n",
+       "1       2429120.0  ...         0.00000 -1.000000e+07   \n",
+       "2       2255580.0  ...         0.00000 -1.000000e+07   \n",
+       "3       2217790.0  ...         0.00000 -1.000000e+07   \n",
+       "4       2458760.0  ...         1.04939  2.612290e-01   \n",
+       "\n",
+       "                                  Fragment.Quant.Raw  \\\n",
+       "0  0;4799.62;0;0;2219.65;401.484;2770;0;0;3364.71...   \n",
+       "1  1517.72;1104.9;1813.46;0;3090.83;0;1185.63;0;0...   \n",
+       "2  2287.07;458.234;1840.52;0;757.653;0;1665.65;27...   \n",
+       "3  814.006;2186.53;236.486;0;3221.79;559.551;2718...   \n",
+       "4  3107.44;2308.01;1727.22;10473;228.697;330.737;...   \n",
+       "\n",
+       "                            Fragment.Quant.Corrected  \\\n",
+       "0  0;4799.62;0;0;2219.65;401.484;2770;0;0;3364.71...   \n",
+       "1  1517.72;1104.9;1813.46;0;3090.83;0;1185.63;0;0...   \n",
+       "2  2287.07;458.234;1840.52;0;757.653;0;1665.65;27...   \n",
+       "3  814.006;2186.53;236.486;0;3221.79;559.551;2718...   \n",
+       "4  3107.44;2308.01;1727.22;10473;228.697;330.737;...   \n",
+       "\n",
+       "                               Fragment.Correlations MS2.Scan  IM  iIM  \\\n",
+       "0  0;0.85516;0;0;0.930818;0.0392701;0.629045;0;0;...   133304   0    0   \n",
+       "1  0.512347;0.511267;0.755782;0;0.635567;0;0.3431...   133004   0    0   \n",
+       "2  0.232198;0.258243;0.633966;0;0.942524;0;0.6651...   133904   0    0   \n",
+       "3  0.290409;0.931915;0.699088;0;0.42669;0.18216;0...   133904   0    0   \n",
+       "4  0.848145;0.850794;0.679977;0.591514;0.403484;0...   132955   0    0   \n",
+       "\n",
+       "   Predicted.IM  Predicted.iIM  \n",
+       "0             0              0  \n",
+       "1             0              0  \n",
+       "2             0              0  \n",
+       "3             0              0  \n",
+       "4             0              0  \n",
+       "\n",
+       "[5 rows x 57 columns]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Protein.Group</th>\n",
+       "      <th>Protein.Ids</th>\n",
+       "      <th>Protein.Names</th>\n",
+       "      <th>Genes</th>\n",
+       "      <th>First.Protein.Description</th>\n",
+       "      <th>Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_14.mzML</th>\n",
+       "      <th>Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_15.mzML</th>\n",
+       "      <th>Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_16.mzML</th>\n",
+       "      <th>Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_17.mzML</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>A0A024R1R8;Q9Y2S6</td>\n",
+       "      <td>Q9Y2S6;A0A024R1R8</td>\n",
+       "      <td>TMA7B_HUMAN;TMA7_HUMAN</td>\n",
+       "      <td>TMA7;TMA7B</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2824960.00</td>\n",
+       "      <td>2864450.0</td>\n",
+       "      <td>2950430.0</td>\n",
+       "      <td>2851180.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q3BBV...</td>\n",
+       "      <td>Q3BBV0;P0DPF3;Q5TAG4;Q8N660;A0A087WUL8;B4DH59;...</td>\n",
+       "      <td>NBPF8_HUMAN;NBPF9_HUMAN;NBPFA_HUMAN;NBPFE_HUMA...</td>\n",
+       "      <td>NBPF10;NBPF14;NBPF19;NBPF20;NBPF26;NBPF8;NBPF9</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>14483.4</td>\n",
+       "      <td>18018.2</td>\n",
+       "      <td>12017.10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TI2...</td>\n",
+       "      <td>P0DPF3;Q5TAG4;Q86T75;Q8N660;A0A087WUL8;B4DH59;...</td>\n",
+       "      <td>NBPF9_HUMAN;NBPFA_HUMAN;NBPFE_HUMAN;NBPFJ_HUMA...</td>\n",
+       "      <td>NBPF10;NBPF14;NBPF19;NBPF20;NBPF26;NBPF9</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>6904.08</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>16465.6</td>\n",
+       "      <td>11101.30</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>A0A096LP01</td>\n",
+       "      <td>A0A096LP01</td>\n",
+       "      <td>SIM26_HUMAN</td>\n",
+       "      <td>SMIM26</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>4283.08</td>\n",
+       "      <td>10357.3</td>\n",
+       "      <td>6473.1</td>\n",
+       "      <td>8930.01</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>A0A096LP49;A0A096LP49-2</td>\n",
+       "      <td>A0A096LP49;A0A096LP49-2</td>\n",
+       "      <td>CC187_HUMAN</td>\n",
+       "      <td>CCDC187</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>25736.90</td>\n",
+       "      <td>25175.7</td>\n",
+       "      <td>27476.6</td>\n",
+       "      <td>36951.80</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                       Protein.Group  \\\n",
+       "0                                  A0A024R1R8;Q9Y2S6   \n",
+       "1  A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q3BBV...   \n",
+       "2  A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TI2...   \n",
+       "3                                         A0A096LP01   \n",
+       "4                            A0A096LP49;A0A096LP49-2   \n",
+       "\n",
+       "                                         Protein.Ids  \\\n",
+       "0                                  Q9Y2S6;A0A024R1R8   \n",
+       "1  Q3BBV0;P0DPF3;Q5TAG4;Q8N660;A0A087WUL8;B4DH59;...   \n",
+       "2  P0DPF3;Q5TAG4;Q86T75;Q8N660;A0A087WUL8;B4DH59;...   \n",
+       "3                                         A0A096LP01   \n",
+       "4                            A0A096LP49;A0A096LP49-2   \n",
+       "\n",
+       "                                       Protein.Names  \\\n",
+       "0                             TMA7B_HUMAN;TMA7_HUMAN   \n",
+       "1  NBPF8_HUMAN;NBPF9_HUMAN;NBPFA_HUMAN;NBPFE_HUMA...   \n",
+       "2  NBPF9_HUMAN;NBPFA_HUMAN;NBPFE_HUMAN;NBPFJ_HUMA...   \n",
+       "3                                        SIM26_HUMAN   \n",
+       "4                                        CC187_HUMAN   \n",
+       "\n",
+       "                                            Genes  First.Protein.Description  \\\n",
+       "0                                      TMA7;TMA7B                        NaN   \n",
+       "1  NBPF10;NBPF14;NBPF19;NBPF20;NBPF26;NBPF8;NBPF9                        NaN   \n",
+       "2        NBPF10;NBPF14;NBPF19;NBPF20;NBPF26;NBPF9                        NaN   \n",
+       "3                                          SMIM26                        NaN   \n",
+       "4                                         CCDC187                        NaN   \n",
+       "\n",
+       "   Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_14.mzML  \\\n",
+       "0                                         2824960.00                                                         \n",
+       "1                                                NaN                                                         \n",
+       "2                                            6904.08                                                         \n",
+       "3                                            4283.08                                                         \n",
+       "4                                           25736.90                                                         \n",
+       "\n",
+       "   Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_15.mzML  \\\n",
+       "0                                          2864450.0                                                         \n",
+       "1                                            14483.4                                                         \n",
+       "2                                                NaN                                                         \n",
+       "3                                            10357.3                                                         \n",
+       "4                                            25175.7                                                         \n",
+       "\n",
+       "   Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_16.mzML  \\\n",
+       "0                                          2950430.0                                                         \n",
+       "1                                            18018.2                                                         \n",
+       "2                                            16465.6                                                         \n",
+       "3                                             6473.1                                                         \n",
+       "4                                            27476.6                                                         \n",
+       "\n",
+       "   Y:\\Vincenth\\astral_lfq_test\\HeLa_qc_data\\20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_17.mzML  \n",
+       "0                                         2851180.00                                                        \n",
+       "1                                           12017.10                                                        \n",
+       "2                                           11101.30                                                        \n",
+       "3                                            8930.01                                                        \n",
+       "4                                           36951.80                                                        "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>PG.ProteinGroups</th>\n",
+       "      <th>PG.ProteinAccessions</th>\n",
+       "      <th>PG.Genes</th>\n",
+       "      <th>PG.UniProtIds</th>\n",
+       "      <th>PG.ProteinNames</th>\n",
+       "      <th>PG.IsCandidate</th>\n",
+       "      <th>PG.Completeness</th>\n",
+       "      <th>PG.ProteinLabel</th>\n",
+       "      <th>PEP.GroupingKey</th>\n",
+       "      <th>PEP.GroupingKeyType</th>\n",
+       "      <th>...</th>\n",
+       "      <th>PEP.IsProteotypic</th>\n",
+       "      <th>PEP.PeptidePosition</th>\n",
+       "      <th>PEP.IsProteinGroupSpecific</th>\n",
+       "      <th>PEP.IsGeneSpecific</th>\n",
+       "      <th>PEP.AllOccurringProteinAccessions</th>\n",
+       "      <th>EG.PrecursorId</th>\n",
+       "      <th>[1] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_14.raw.EG.TotalQuantity (Settings)</th>\n",
+       "      <th>[2] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_15.raw.EG.TotalQuantity (Settings)</th>\n",
+       "      <th>[3] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_16.raw.EG.TotalQuantity (Settings)</th>\n",
+       "      <th>[4] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_17.raw.EG.TotalQuantity (Settings)</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>A0A024R1R8;Q9Y2S6</td>\n",
+       "      <td>A0A024R1R8;Q9Y2S6</td>\n",
+       "      <td>TMA7B;TMA7</td>\n",
+       "      <td>A0A024R1R8;Q9Y2S6</td>\n",
+       "      <td>TMA7B_HUMAN;TMA7_HUMAN</td>\n",
+       "      <td>False</td>\n",
+       "      <td>100</td>\n",
+       "      <td>A0A024R1R8;Q9Y2S6</td>\n",
+       "      <td>GPLATGGIK</td>\n",
+       "      <td>Stripped Sequence</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Unknown</td>\n",
+       "      <td>51;51</td>\n",
+       "      <td>Unknown</td>\n",
+       "      <td>Unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>_GPLATGGIK_.2</td>\n",
+       "      <td>91116.1953125</td>\n",
+       "      <td>87792.84375</td>\n",
+       "      <td>95424.9609375</td>\n",
+       "      <td>92398.78125</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>A0A024RBG1;Q9NZJ9-2</td>\n",
+       "      <td>A0A024RBG1;Q9NZJ9-2</td>\n",
+       "      <td>NUDT4B;NUDT4</td>\n",
+       "      <td>A0A024RBG1;Q9NZJ9-2</td>\n",
+       "      <td>NUD4B_HUMAN;NUDT4_HUMAN</td>\n",
+       "      <td>False</td>\n",
+       "      <td>100</td>\n",
+       "      <td>A0A024RBG1;Q9NZJ9-2</td>\n",
+       "      <td>LLGIFEQNQDR</td>\n",
+       "      <td>Stripped Sequence</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Unknown</td>\n",
+       "      <td>80;80</td>\n",
+       "      <td>Unknown</td>\n",
+       "      <td>Unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>_LLGIFEQNQDR_.2</td>\n",
+       "      <td>1104.52001953125</td>\n",
+       "      <td>926.7722778320312</td>\n",
+       "      <td>1012.015380859375</td>\n",
+       "      <td>1105.1077880859375</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...</td>\n",
+       "      <td>A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...</td>\n",
+       "      <td>NBPF19;NBPF26;NBPF20;NBPF9;NBPF9;NBPF12;NBPF14...</td>\n",
+       "      <td>A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...</td>\n",
+       "      <td>NBPFJ_HUMAN;NBPFP_HUMAN;NBPFK_HUMAN;NBPF9_HUMA...</td>\n",
+       "      <td>False</td>\n",
+       "      <td>100</td>\n",
+       "      <td>A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...</td>\n",
+       "      <td>SAFYVLEQQR</td>\n",
+       "      <td>Stripped Sequence</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Unknown</td>\n",
+       "      <td>387;318;43,287,531,775,1019,1263,1507,1751,199...</td>\n",
+       "      <td>Unknown</td>\n",
+       "      <td>Unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>_SAFYVLEQQR_.2</td>\n",
+       "      <td>128.38462829589844</td>\n",
+       "      <td>88.06173706054688</td>\n",
+       "      <td>259.7120361328125</td>\n",
+       "      <td>145.58016967773438</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>A0A096LP49;A0A096LP49-2</td>\n",
+       "      <td>A0A096LP49;A0A096LP49-2</td>\n",
+       "      <td>CCDC187</td>\n",
+       "      <td>A0A096LP49;A0A096LP49-2</td>\n",
+       "      <td>CC187_HUMAN</td>\n",
+       "      <td>False</td>\n",
+       "      <td>100</td>\n",
+       "      <td>A0A096LP49;A0A096LP49-2</td>\n",
+       "      <td>QAQLQALETTAK</td>\n",
+       "      <td>Stripped Sequence</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Unknown</td>\n",
+       "      <td>821;723</td>\n",
+       "      <td>Unknown</td>\n",
+       "      <td>Unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>_QAQLQALETTAK_.2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Filtered</td>\n",
+       "      <td>331.3834533691406</td>\n",
+       "      <td>154.49806213378906</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>A0A096LP49;A0A096LP49-2</td>\n",
+       "      <td>A0A096LP49;A0A096LP49-2</td>\n",
+       "      <td>CCDC187</td>\n",
+       "      <td>A0A096LP49;A0A096LP49-2</td>\n",
+       "      <td>CC187_HUMAN</td>\n",
+       "      <td>False</td>\n",
+       "      <td>100</td>\n",
+       "      <td>A0A096LP49;A0A096LP49-2</td>\n",
+       "      <td>EAEHLGTSSSLHLR</td>\n",
+       "      <td>Stripped Sequence</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Unknown</td>\n",
+       "      <td>805;707</td>\n",
+       "      <td>Unknown</td>\n",
+       "      <td>Unknown</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>_EAEHLGTSSSLHLR_.4</td>\n",
+       "      <td>56.887428283691406</td>\n",
+       "      <td>56.48419189453125</td>\n",
+       "      <td>88.43661499023438</td>\n",
+       "      <td>108.95454406738281</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 21 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                    PG.ProteinGroups  \\\n",
+       "0                                  A0A024R1R8;Q9Y2S6   \n",
+       "1                                A0A024RBG1;Q9NZJ9-2   \n",
+       "2  A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...   \n",
+       "3                            A0A096LP49;A0A096LP49-2   \n",
+       "4                            A0A096LP49;A0A096LP49-2   \n",
+       "\n",
+       "                                PG.ProteinAccessions  \\\n",
+       "0                                  A0A024R1R8;Q9Y2S6   \n",
+       "1                                A0A024RBG1;Q9NZJ9-2   \n",
+       "2  A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...   \n",
+       "3                            A0A096LP49;A0A096LP49-2   \n",
+       "4                            A0A096LP49;A0A096LP49-2   \n",
+       "\n",
+       "                                            PG.Genes  \\\n",
+       "0                                         TMA7B;TMA7   \n",
+       "1                                       NUDT4B;NUDT4   \n",
+       "2  NBPF19;NBPF26;NBPF20;NBPF9;NBPF9;NBPF12;NBPF14...   \n",
+       "3                                            CCDC187   \n",
+       "4                                            CCDC187   \n",
+       "\n",
+       "                                       PG.UniProtIds  \\\n",
+       "0                                  A0A024R1R8;Q9Y2S6   \n",
+       "1                                A0A024RBG1;Q9NZJ9-2   \n",
+       "2  A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...   \n",
+       "3                            A0A096LP49;A0A096LP49-2   \n",
+       "4                            A0A096LP49;A0A096LP49-2   \n",
+       "\n",
+       "                                     PG.ProteinNames  PG.IsCandidate  \\\n",
+       "0                             TMA7B_HUMAN;TMA7_HUMAN           False   \n",
+       "1                            NUD4B_HUMAN;NUDT4_HUMAN           False   \n",
+       "2  NBPFJ_HUMAN;NBPFP_HUMAN;NBPFK_HUMAN;NBPF9_HUMA...           False   \n",
+       "3                                        CC187_HUMAN           False   \n",
+       "4                                        CC187_HUMAN           False   \n",
+       "\n",
+       "   PG.Completeness                                    PG.ProteinLabel  \\\n",
+       "0              100                                  A0A024R1R8;Q9Y2S6   \n",
+       "1              100                                A0A024RBG1;Q9NZJ9-2   \n",
+       "2              100  A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...   \n",
+       "3              100                            A0A096LP49;A0A096LP49-2   \n",
+       "4              100                            A0A096LP49;A0A096LP49-2   \n",
+       "\n",
+       "  PEP.GroupingKey PEP.GroupingKeyType  ... PEP.IsProteotypic  \\\n",
+       "0       GPLATGGIK   Stripped Sequence  ...           Unknown   \n",
+       "1     LLGIFEQNQDR   Stripped Sequence  ...           Unknown   \n",
+       "2      SAFYVLEQQR   Stripped Sequence  ...           Unknown   \n",
+       "3    QAQLQALETTAK   Stripped Sequence  ...           Unknown   \n",
+       "4  EAEHLGTSSSLHLR   Stripped Sequence  ...           Unknown   \n",
+       "\n",
+       "                                 PEP.PeptidePosition  \\\n",
+       "0                                              51;51   \n",
+       "1                                              80;80   \n",
+       "2  387;318;43,287,531,775,1019,1263,1507,1751,199...   \n",
+       "3                                            821;723   \n",
+       "4                                            805;707   \n",
+       "\n",
+       "  PEP.IsProteinGroupSpecific PEP.IsGeneSpecific  \\\n",
+       "0                    Unknown            Unknown   \n",
+       "1                    Unknown            Unknown   \n",
+       "2                    Unknown            Unknown   \n",
+       "3                    Unknown            Unknown   \n",
+       "4                    Unknown            Unknown   \n",
+       "\n",
+       "  PEP.AllOccurringProteinAccessions      EG.PrecursorId  \\\n",
+       "0                               NaN       _GPLATGGIK_.2   \n",
+       "1                               NaN     _LLGIFEQNQDR_.2   \n",
+       "2                               NaN      _SAFYVLEQQR_.2   \n",
+       "3                               NaN    _QAQLQALETTAK_.2   \n",
+       "4                               NaN  _EAEHLGTSSSLHLR_.4   \n",
+       "\n",
+       "  [1] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_14.raw.EG.TotalQuantity (Settings)  \\\n",
+       "0                                      91116.1953125                                              \n",
+       "1                                   1104.52001953125                                              \n",
+       "2                                 128.38462829589844                                              \n",
+       "3                                                  1                                              \n",
+       "4                                 56.887428283691406                                              \n",
+       "\n",
+       "  [2] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_15.raw.EG.TotalQuantity (Settings)  \\\n",
+       "0                                        87792.84375                                              \n",
+       "1                                  926.7722778320312                                              \n",
+       "2                                  88.06173706054688                                              \n",
+       "3                                           Filtered                                              \n",
+       "4                                  56.48419189453125                                              \n",
+       "\n",
+       "  [3] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_16.raw.EG.TotalQuantity (Settings)  \\\n",
+       "0                                      95424.9609375                                              \n",
+       "1                                  1012.015380859375                                              \n",
+       "2                                  259.7120361328125                                              \n",
+       "3                                  331.3834533691406                                              \n",
+       "4                                  88.43661499023438                                              \n",
+       "\n",
+       "  [4] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_17.raw.EG.TotalQuantity (Settings)  \n",
+       "0                                        92398.78125                                             \n",
+       "1                                 1105.1077880859375                                             \n",
+       "2                                 145.58016967773438                                             \n",
+       "3                                 154.49806213378906                                             \n",
+       "4                                 108.95454406738281                                             \n",
+       "\n",
+       "[5 rows x 21 columns]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>PG.ProteinGroups</th>\n",
+       "      <th>PG.GroupLabel</th>\n",
+       "      <th>PG.ProteinAccessions</th>\n",
+       "      <th>PG.Genes</th>\n",
+       "      <th>PG.UniProtIds</th>\n",
+       "      <th>PG.ProteinNames</th>\n",
+       "      <th>PG.NrOfStrippedSequencesIdentified (Experiment-wide)</th>\n",
+       "      <th>PG.NrOfPrecursorsIdentified (Experiment-wide)</th>\n",
+       "      <th>PG.Sequence Version</th>\n",
+       "      <th>PG.FASTAName</th>\n",
+       "      <th>[1] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_14.raw.PG.Quantity</th>\n",
+       "      <th>[2] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_15.raw.PG.Quantity</th>\n",
+       "      <th>[3] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_16.raw.PG.Quantity</th>\n",
+       "      <th>[4] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_17.raw.PG.Quantity</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>A0A024R1R8;Q9Y2S6</td>\n",
+       "      <td>A0A024R1R8;Q9Y2S6</td>\n",
+       "      <td>A0A024R1R8;Q9Y2S6</td>\n",
+       "      <td>TMA7B;TMA7</td>\n",
+       "      <td>A0A024R1R8;Q9Y2S6</td>\n",
+       "      <td>TMA7B_HUMAN;TMA7_HUMAN</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>vb_uniprotkb_human_AND_reviewed_true_AND_m_202...</td>\n",
+       "      <td>91116.195312</td>\n",
+       "      <td>87792.843750</td>\n",
+       "      <td>95424.960938</td>\n",
+       "      <td>92398.781250</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>A0A024RBG1;Q9NZJ9-2</td>\n",
+       "      <td>A0A024RBG1;Q9NZJ9-2</td>\n",
+       "      <td>A0A024RBG1;Q9NZJ9-2</td>\n",
+       "      <td>NUDT4B;NUDT4</td>\n",
+       "      <td>A0A024RBG1;Q9NZJ9-2</td>\n",
+       "      <td>NUD4B_HUMAN;NUDT4_HUMAN</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1;</td>\n",
+       "      <td>vb_uniprotkb_human_AND_reviewed_true_AND_m_202...</td>\n",
+       "      <td>1104.520020</td>\n",
+       "      <td>926.772278</td>\n",
+       "      <td>1012.015381</td>\n",
+       "      <td>1105.107788</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...</td>\n",
+       "      <td>A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...</td>\n",
+       "      <td>A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...</td>\n",
+       "      <td>NBPF19;NBPF26;NBPF20;NBPF9;NBPF9;NBPF12;NBPF14...</td>\n",
+       "      <td>A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...</td>\n",
+       "      <td>NBPFJ_HUMAN;NBPFP_HUMAN;NBPFK_HUMAN;NBPF9_HUMA...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1;1;1;1;;3;2;3;3;2</td>\n",
+       "      <td>vb_uniprotkb_human_AND_reviewed_true_AND_m_202...</td>\n",
+       "      <td>128.384628</td>\n",
+       "      <td>88.061737</td>\n",
+       "      <td>259.712036</td>\n",
+       "      <td>145.580170</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>A0A096LP49;A0A096LP49-2</td>\n",
+       "      <td>A0A096LP49;A0A096LP49-2</td>\n",
+       "      <td>A0A096LP49;A0A096LP49-2</td>\n",
+       "      <td>CCDC187</td>\n",
+       "      <td>A0A096LP49;A0A096LP49-2</td>\n",
+       "      <td>CC187_HUMAN</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1;</td>\n",
+       "      <td>vb_uniprotkb_human_AND_reviewed_true_AND_m_202...</td>\n",
+       "      <td>47.604214</td>\n",
+       "      <td>90.190895</td>\n",
+       "      <td>44.830376</td>\n",
+       "      <td>78.878120</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>A0A0B4J2D5;P0DPI2</td>\n",
+       "      <td>A0A0B4J2D5;P0DPI2</td>\n",
+       "      <td>A0A0B4J2D5;P0DPI2</td>\n",
+       "      <td>GATD3B;GATD3</td>\n",
+       "      <td>A0A0B4J2D5;P0DPI2</td>\n",
+       "      <td>GAL3B_HUMAN;GAL3A_HUMAN</td>\n",
+       "      <td>6</td>\n",
+       "      <td>12</td>\n",
+       "      <td>1</td>\n",
+       "      <td>vb_uniprotkb_human_AND_reviewed_true_AND_m_202...</td>\n",
+       "      <td>3114.341797</td>\n",
+       "      <td>3309.541016</td>\n",
+       "      <td>3181.658203</td>\n",
+       "      <td>2948.111816</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                    PG.ProteinGroups  \\\n",
+       "0                                  A0A024R1R8;Q9Y2S6   \n",
+       "1                                A0A024RBG1;Q9NZJ9-2   \n",
+       "2  A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...   \n",
+       "3                            A0A096LP49;A0A096LP49-2   \n",
+       "4                                  A0A0B4J2D5;P0DPI2   \n",
+       "\n",
+       "                                       PG.GroupLabel  \\\n",
+       "0                                  A0A024R1R8;Q9Y2S6   \n",
+       "1                                A0A024RBG1;Q9NZJ9-2   \n",
+       "2  A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...   \n",
+       "3                            A0A096LP49;A0A096LP49-2   \n",
+       "4                                  A0A0B4J2D5;P0DPI2   \n",
+       "\n",
+       "                                PG.ProteinAccessions  \\\n",
+       "0                                  A0A024R1R8;Q9Y2S6   \n",
+       "1                                A0A024RBG1;Q9NZJ9-2   \n",
+       "2  A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...   \n",
+       "3                            A0A096LP49;A0A096LP49-2   \n",
+       "4                                  A0A0B4J2D5;P0DPI2   \n",
+       "\n",
+       "                                            PG.Genes  \\\n",
+       "0                                         TMA7B;TMA7   \n",
+       "1                                       NUDT4B;NUDT4   \n",
+       "2  NBPF19;NBPF26;NBPF20;NBPF9;NBPF9;NBPF12;NBPF14...   \n",
+       "3                                            CCDC187   \n",
+       "4                                       GATD3B;GATD3   \n",
+       "\n",
+       "                                       PG.UniProtIds  \\\n",
+       "0                                  A0A024R1R8;Q9Y2S6   \n",
+       "1                                A0A024RBG1;Q9NZJ9-2   \n",
+       "2  A0A087WUL8;B4DH59;P0DPF2;P0DPF3;P0DPF3-2;Q5TAG...   \n",
+       "3                            A0A096LP49;A0A096LP49-2   \n",
+       "4                                  A0A0B4J2D5;P0DPI2   \n",
+       "\n",
+       "                                     PG.ProteinNames  \\\n",
+       "0                             TMA7B_HUMAN;TMA7_HUMAN   \n",
+       "1                            NUD4B_HUMAN;NUDT4_HUMAN   \n",
+       "2  NBPFJ_HUMAN;NBPFP_HUMAN;NBPFK_HUMAN;NBPF9_HUMA...   \n",
+       "3                                        CC187_HUMAN   \n",
+       "4                            GAL3B_HUMAN;GAL3A_HUMAN   \n",
+       "\n",
+       "   PG.NrOfStrippedSequencesIdentified (Experiment-wide)  \\\n",
+       "0                                                  1      \n",
+       "1                                                  1      \n",
+       "2                                                  1      \n",
+       "3                                                  2      \n",
+       "4                                                  6      \n",
+       "\n",
+       "   PG.NrOfPrecursorsIdentified (Experiment-wide) PG.Sequence Version  \\\n",
+       "0                                              1                   1   \n",
+       "1                                              1                  1;   \n",
+       "2                                              1  1;1;1;1;;3;2;3;3;2   \n",
+       "3                                              3                  1;   \n",
+       "4                                             12                   1   \n",
+       "\n",
+       "                                        PG.FASTAName  \\\n",
+       "0  vb_uniprotkb_human_AND_reviewed_true_AND_m_202...   \n",
+       "1  vb_uniprotkb_human_AND_reviewed_true_AND_m_202...   \n",
+       "2  vb_uniprotkb_human_AND_reviewed_true_AND_m_202...   \n",
+       "3  vb_uniprotkb_human_AND_reviewed_true_AND_m_202...   \n",
+       "4  vb_uniprotkb_human_AND_reviewed_true_AND_m_202...   \n",
+       "\n",
+       "   [1] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_14.raw.PG.Quantity  \\\n",
+       "0                                       91116.195312                               \n",
+       "1                                        1104.520020                               \n",
+       "2                                         128.384628                               \n",
+       "3                                          47.604214                               \n",
+       "4                                        3114.341797                               \n",
+       "\n",
+       "   [2] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_15.raw.PG.Quantity  \\\n",
+       "0                                       87792.843750                               \n",
+       "1                                         926.772278                               \n",
+       "2                                          88.061737                               \n",
+       "3                                          90.190895                               \n",
+       "4                                        3309.541016                               \n",
+       "\n",
+       "   [3] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_16.raw.PG.Quantity  \\\n",
+       "0                                       95424.960938                               \n",
+       "1                                        1012.015381                               \n",
+       "2                                         259.712036                               \n",
+       "3                                          44.830376                               \n",
+       "4                                        3181.658203                               \n",
+       "\n",
+       "   [4] 20240321_OA2_Evo1_21min_TiHe_ADIAMA_HeLa_200ng_F-40_iO_17.raw.PG.Quantity  \n",
+       "0                                       92398.781250                              \n",
+       "1                                        1105.107788                              \n",
+       "2                                         145.580170                              \n",
+       "3                                          78.878120                              \n",
+       "4                                        2948.111816                              "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# test alphadia PE table\n",
+    "pe_table_path = \"./dev_grouping_comparison_data/alphadia/precursor_level\"\n",
+    "pe_table_filename = \"precursors.tsv\"\n",
+    "pe_table, engine = read_and_parse_peptide_table(pe_table_path, pe_table_filename)\n",
+    "assert engine == 'Alphadia'\n",
+    "display(pe_table.head())\n",
+    "\n",
+    "# test alphadia PG table\n",
+    "pg_table_path = \"./dev_grouping_comparison_data/alphadia/group_level\"\n",
+    "pg_table_filename = \"pg.matrix.tsv\"\n",
+    "pg_table, engine = read_and_parse_peptide_table(pg_table_path, pg_table_filename)\n",
+    "assert engine == 'Alphadia'\n",
+    "display(pg_table.head())\n",
+    "\n",
+    "# test diann PE table\n",
+    "pe_table_path = \"./dev_grouping_comparison_data/diann/precursor_level\"\n",
+    "pe_table_filename = \"report.tsv\"\n",
+    "pe_table, engine = read_and_parse_peptide_table(pe_table_path, pe_table_filename)\n",
+    "assert engine == 'DiaNN'\n",
+    "display(pe_table.head())\n",
+    "\n",
+    "# test diann PG table\n",
+    "pg_table_path = \"./dev_grouping_comparison_data/diann/group_level\"\n",
+    "pg_table_filename = \"report.pg_matrix.tsv\"\n",
+    "pg_table, engine = read_and_parse_peptide_table(pg_table_path, pg_table_filename)\n",
+    "assert engine == 'DiaNN'\n",
+    "display(pg_table.head())\n",
+    "        \n",
+    "# test spectronaut PE table\n",
+    "pe_table_path = \"./dev_grouping_comparison_data/spectronaut/precursor_level\"\n",
+    "pe_table_filename = \"HeLa_QC_PE_20240409_140530_20240321_Report.tsv\"\n",
+    "pe_table, engine = read_and_parse_peptide_table(pe_table_path, pe_table_filename)\n",
+    "assert engine == 'Spectronaut'\n",
+    "display(pe_table.head())\n",
+    "\n",
+    "# test spectronaut PG table\n",
+    "pg_table_path = \"./dev_grouping_comparison_data/spectronaut/group_level\"\n",
+    "pg_table_filename = \"HeLa_QC_PG_20240409_140824_20240321_Report.tsv\"\n",
+    "pg_table, engine = read_and_parse_peptide_table(pg_table_path, pg_table_filename)\n",
+    "assert engine == 'Spectronaut'\n",
+    "display(pg_table.head())"
    ]
   },
   {
@@ -89,8 +1479,22 @@
   }
  ],
  "metadata": {
+  "kernelspec": {
+   "display_name": "alphaverse",
+   "language": "python",
+   "name": "python3"
+  },
   "language_info": {
-   "name": "python"
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.18"
   },
   "orig_nbformat": 4
  },

From 4b2bf92410b62b28bfe9ba62c40132fa4a7d891f Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Sat, 27 Apr 2024 17:20:25 +0200
Subject: [PATCH 04/48] update plotting

---
 alphadia/plotting/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/alphadia/plotting/utils.py b/alphadia/plotting/utils.py
index 5fbe49d4..eba62983 100644
--- a/alphadia/plotting/utils.py
+++ b/alphadia/plotting/utils.py
@@ -45,6 +45,7 @@ def density_scatter(
     x: typing.Union[np.ndarray, pd.Series, pd.DataFrame],
     y: typing.Union[np.ndarray, pd.Series, pd.DataFrame],
     axis: plt.Axes = None,
+    bw_method=None,
     s: float = 1,
     **kwargs,
 ):
@@ -100,7 +101,7 @@ def density_scatter(
 
     # Calculate the point density
     xy = np.vstack([x, y])
-    z = gaussian_kde(xy)(xy)
+    z = gaussian_kde(xy, bw_method=bw_method)(xy)
 
     # Sort the points by density, so that the densest points are plotted last
     idx = z.argsort()

From 07d98c26dbbb56b7e877fb6d3675cb23660ce6ac Mon Sep 17 00:00:00 2001
From: Vincenth Brennsteiner
 <vincenthbrennsteiner@141-61-112-83.biochem.mpg.de>
Date: Wed, 22 May 2024 14:39:16 +0200
Subject: [PATCH 05/48] MicroCommit: update gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 83dfd498..172f9b21 100644
--- a/.gitignore
+++ b/.gitignore
@@ -139,6 +139,7 @@ dmypy.json
 
 # Data
 testdata/
+nbs/debug/dev_grouping_comparison_data/
 
 ######################
 # OS generated files #

From ddbfe4d8e40b8bc812bbb9cd099fcd8911376ac3 Mon Sep 17 00:00:00 2001
From: Vincenth Brennsteiner
 <vincenthbrennsteiner@141-61-112-83.biochem.mpg.de>
Date: Wed, 22 May 2024 19:50:28 +0200
Subject: [PATCH 06/48] Refactor function docstrings in grouping.py, add check
 for equal return_dict keys and initial precursor_idx list.

---
 alphadia/grouping.py | 51 +++++++++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/alphadia/grouping.py b/alphadia/grouping.py
index e6966a8c..67458ee7 100644
--- a/alphadia/grouping.py
+++ b/alphadia/grouping.py
@@ -15,26 +15,19 @@ def group_and_parsimony(
     precursor_idx: NDArray[np.int64],
     precursor_ids: NDArray[Any],
 ):
-    """
-    Function to group ids based on precursor indices and return groups & master ids as lists
-
-    Parameters
-    ----------
-
-        precursor_idx : np.array[int]
-            array containing unique integer indices corresponding to each peptide precursor
+    """Function to group ids based on precursor indices and return groups & master ids as lists
 
-        precursor_ids : np.array[str]
-            array of variable length semicolon separated str belonging to a given peptide precursor id
+    Args:
+        precursor_idx (np.array[int]): array containing unique integer indices corresponding 
+            to each peptide precursor
+        precursor_ids (np.array[str]): array of variable length semicolon separated str belonging 
+            to a given peptide precursor id
 
     Returns
-    -------
-
-        ids : list[str]
-            list of ids linked to a given peptide precursor, such that each precursor only belongs to one id. This list is ordered by precursor_idx.
-
-        groups : list[str]
-            list of semicolon separated ids belonging to a given peptide precursor, such that each precursor only belongs to one group. This list is ordered by precursor_idx.
+        ids (list[str]): list of ids linked to a given peptide precursor, such that each 
+            precursor only belongs to one id. This list is ordered by precursor_idx.
+        groups (list[str]): list of semicolon separated ids belonging to a given peptide precursor, 
+            such that each precursor only belongs to one group. This list is ordered by precursor_idx.
 
     """
 
@@ -53,11 +46,12 @@ def group_and_parsimony(
 
     # loop bounds max iterations
     for _ in range(len(id_dict)):
-        # remove longest set from dict as query & remove query peptided from all other sets
+        # remove longest set from dict as query & remove query peptide from all other sets
         query_id = max(id_dict.keys(), key=lambda x: len(id_dict[x]))
         query_peptides = id_dict.pop(query_id)
         query_group = [query_id]
 
+        # break if query is empty. Sorting step means that all remaining sets are empty
         if len(query_peptides) == 0:
             break
 
@@ -67,6 +61,8 @@ def group_and_parsimony(
                 continue
             new_subject_set = subject_peptides - query_peptides
             id_dict[subject_protein] = new_subject_set
+            # With the following lines commented out, the query will only eliminate peptides from 
+            # respective subject proteins, but we will not add them to the query group
             # if len(new_subject_set) == 0:
             #    query_group.append(subject_protein)
 
@@ -90,13 +86,18 @@ def group_and_parsimony(
             f"Not all precursors were found in the output of the grouping function. {len(return_dict)} precursors were found, but {len(precursor_idx)} were expected."
         )
 
-    # order by precursor index
+    # check that all return_dict keys are unique. Assume same length and unique keys constitutes match to precursor_idx
+    if len(return_dict) != len(set(return_dict.keys())):
+        raise ValueError(
+            "Not all precursors were found in the output of the grouping function. Duplicate precursors were found."
+        )
+
+    # order by precursor index and return as lists
     return_dict_ordered = {key: return_dict[key] for key in precursor_idx}
     ids, groups = zip(*return_dict_ordered.values())
 
     return ids, groups
 
-
 def perform_grouping(
     psm: pd.DataFrame,
     genes_or_proteins: str = "proteins",
@@ -105,8 +106,14 @@ def perform_grouping(
 ):
     """Highest level function for grouping proteins in precursor table
 
-    Parameters:
-        gene_or_protein (str, optional): Column to group proteins by. Defaults to "proteins".
+    Args:
+        psm (pd.DataFrame) : Precursor table with columns "precursor_idx" and protein & decoy columns.
+        gene_or_protein (str, optional) : Column to group proteins by. Defaults to "proteins".
+        decoy_column (str, optional) : Column to use for decoy annotation. Defaults to "decoy".
+        group (bool, optional) : Whether to group proteins. Defaults to True.
+
+    Returns:
+        pd.DataFrame: Precursor table with grouped proteins
 
     """
 

From dccb69bf52ca187fffba8d8720115f402cbcbf1a Mon Sep 17 00:00:00 2001
From: Vincenth Brennsteiner <brennsteiner@biochem.mpg.de>
Date: Fri, 24 May 2024 11:10:46 +0200
Subject: [PATCH 07/48] adapt grouping tutorial notebook to heuristic grouping,
 add comments

---
 alphadia/grouping.py                          |  23 +-
 .../protein_grouping_tutorial.ipynb           | 342 +++++++++---------
 tests/unit_tests/test_grouping.py             |  44 ++-
 3 files changed, 223 insertions(+), 186 deletions(-)

diff --git a/alphadia/grouping.py b/alphadia/grouping.py
index 67458ee7..f0ba1588 100644
--- a/alphadia/grouping.py
+++ b/alphadia/grouping.py
@@ -75,6 +75,7 @@ def group_and_parsimony(
     id_group = [";".join(x) for x in id_group]
 
     # reshape output data and align with precursor dataframe input. Use dictionary for efficient ordering
+    # TODO consider iterating over precursor_idx directly
     return_dict = {}
     for i, peptide_set in enumerate(precursor_set):
         for key in peptide_set:
@@ -83,16 +84,18 @@ def group_and_parsimony(
     # check that all precursors are found again
     if len(return_dict) != len(precursor_idx):
         raise ValueError(
-            f"Not all precursors were found in the output of the grouping function. {len(return_dict)} precursors were found, but {len(precursor_idx)} were expected."
+            f"""Not all precursors were found in the output of the grouping function. {len(return_dict)} precursors were found, but {len(precursor_idx)} were expected."""
         )
 
     # check that all return_dict keys are unique. Assume same length and unique keys constitutes match to precursor_idx
     if len(return_dict) != len(set(return_dict.keys())):
         raise ValueError(
-            "Not all precursors were found in the output of the grouping function. Duplicate precursors were found."
+            """Not all precursors were found in the output of the grouping function. 
+            Duplicate precursors were found."""
         )
 
     # order by precursor index and return as lists
+    # TODO look above, order by precursor_idx directly?
     return_dict_ordered = {key: return_dict[key] for key in precursor_idx}
     ids, groups = zip(*return_dict_ordered.values())
 
@@ -122,17 +125,20 @@ def perform_grouping(
 
     # create non-duplicated view of precursor table
     duplicate_mask = ~psm.duplicated(subset=["precursor_idx"], keep="first")
-    # make sure column is string
+    
+    # make sure column is string and subset to relevant columns
     psm[genes_or_proteins] = psm[genes_or_proteins].astype(str)
     upsm = psm.loc[duplicate_mask, ["precursor_idx", genes_or_proteins, decoy_column]]
 
     # check if duplicate precursors exist
+    # TODO: consider removing check for duplicates since duplicate masking is implemented above
     if upsm.duplicated(subset=["precursor_idx"]).any():
         raise ValueError(
-            "The same precursor was found annotated to different proteins. Please make sure all precursors were searched with the same library."
+            """The same precursor was found annotated to different proteins. 
+            Please make sure all precursors were searched with the same library."""
         )
 
-    # handle case with only one decoy class:
+    # greedy set cover on all proteins if there is only one decoy class
     unique_decoys = upsm[decoy_column].unique()
     if len(unique_decoys) == 1:
         upsm[decoy_column] = -1
@@ -141,15 +147,19 @@ def perform_grouping(
         )
         upsm = upsm[["precursor_idx", "pg_master", "pg", genes_or_proteins]]
     else:
+        # handle case with multiple decoy classes
         target_mask = upsm[decoy_column] == 0
         decoy_mask = upsm[decoy_column] == 1
 
+        # greedy set cover on targets
         t_df = upsm[target_mask].copy()
+        # TODO: consider directly assigning to t_df["pg_master"], t_df["pg"] = group_and_parsimony(...)
         new_columns = group_and_parsimony(
             t_df.precursor_idx.values, t_df[genes_or_proteins].values
         )
         t_df["pg_master"], t_df["pg"] = new_columns
 
+        # greedy set cover on decoys
         d_df = upsm[decoy_mask].copy()
         new_columns = group_and_parsimony(
             d_df.precursor_idx.values, d_df[genes_or_proteins].values
@@ -160,7 +170,10 @@ def perform_grouping(
             ["precursor_idx", "pg_master", "pg", genes_or_proteins]
         ]
 
+    # heuristic grouping: from each initial precursor's protein ID set, filter out proteins that 
+    # are never master proteins
     if group:
+        # select all master protein groups
         allowed_pg = upsm["pg"].str.split(";", expand=True)[0].unique()
         allowed_set_pg = set(allowed_pg)
 
diff --git a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb
index 0b36cfe2..fd07c12b 100644
--- a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb
+++ b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb
@@ -25,7 +25,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -37,7 +37,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -64,7 +64,7 @@
        "      <th>precursor_idx</th>\n",
        "      <th>proteins</th>\n",
        "      <th>genes</th>\n",
-       "      <th>_decoy</th>\n",
+       "      <th>decoy</th>\n",
        "      <th>pg_master</th>\n",
        "      <th>pg</th>\n",
        "    </tr>\n",
@@ -77,7 +77,7 @@
        "      <td>P1;P2;P3;P4</td>\n",
        "      <td>0</td>\n",
        "      <td>P1</td>\n",
-       "      <td>P1;P2;P3</td>\n",
+       "      <td>P1;P4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -86,7 +86,7 @@
        "      <td>P1;P2;P3;P4</td>\n",
        "      <td>0</td>\n",
        "      <td>P1</td>\n",
-       "      <td>P1;P2;P3</td>\n",
+       "      <td>P1;P4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -95,7 +95,7 @@
        "      <td>P1;P2</td>\n",
        "      <td>0</td>\n",
        "      <td>P1</td>\n",
-       "      <td>P1;P2;P3</td>\n",
+       "      <td>P1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -104,7 +104,7 @@
        "      <td>P1;P2</td>\n",
        "      <td>0</td>\n",
        "      <td>P1</td>\n",
-       "      <td>P1;P2;P3</td>\n",
+       "      <td>P1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -131,7 +131,7 @@
        "      <td>P4;P5</td>\n",
        "      <td>1</td>\n",
        "      <td>P4</td>\n",
-       "      <td>P4;P5</td>\n",
+       "      <td>P4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
@@ -140,7 +140,7 @@
        "      <td>P4;P5</td>\n",
        "      <td>1</td>\n",
        "      <td>P4</td>\n",
-       "      <td>P4;P5</td>\n",
+       "      <td>P4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
@@ -165,20 +165,20 @@
        "</div>"
       ],
       "text/plain": [
-       "   precursor_idx     proteins        genes  _decoy pg_master        pg\n",
-       "0              0  P1;P2;P3;P4  P1;P2;P3;P4       0        P1  P1;P2;P3\n",
-       "1              0  P1;P2;P3;P4  P1;P2;P3;P4       0        P1  P1;P2;P3\n",
-       "2              1        P1;P2        P1;P2       0        P1  P1;P2;P3\n",
-       "3              1        P1;P2        P1;P2       0        P1  P1;P2;P3\n",
-       "4              2           P4           P4       0        P4        P4\n",
-       "5              2           P4           P4       1        P4        P4\n",
-       "6              3        P4;P5        P4;P5       1        P4     P4;P5\n",
-       "7              3        P4;P5        P4;P5       1        P4     P4;P5\n",
-       "8              4           P6           P6       1        P6        P6\n",
-       "9              4           P6           P6       1        P6        P6"
+       "   precursor_idx     proteins        genes  decoy pg_master     pg\n",
+       "0              0  P1;P2;P3;P4  P1;P2;P3;P4      0        P1  P1;P4\n",
+       "1              0  P1;P2;P3;P4  P1;P2;P3;P4      0        P1  P1;P4\n",
+       "2              1        P1;P2        P1;P2      0        P1     P1\n",
+       "3              1        P1;P2        P1;P2      0        P1     P1\n",
+       "4              2           P4           P4      0        P4     P4\n",
+       "5              2           P4           P4      1        P4     P4\n",
+       "6              3        P4;P5        P4;P5      1        P4     P4\n",
+       "7              3        P4;P5        P4;P5      1        P4     P4\n",
+       "8              4           P6           P6      1        P6     P6\n",
+       "9              4           P6           P6      1        P6     P6"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -216,7 +216,7 @@
     "        \"precursor_idx\": precursor_idx,\n",
     "        \"proteins\": proteins,\n",
     "        \"genes\": genes,\n",
-    "        \"_decoy\": decoy,\n",
+    "        \"decoy\": decoy,\n",
     "    }\n",
     ")\n",
     "\n",
@@ -251,7 +251,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -284,7 +284,7 @@
        "      <th></th>\n",
        "      <th>precursor_idx</th>\n",
        "      <th>proteins</th>\n",
-       "      <th>_decoy</th>\n",
+       "      <th>decoy</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -317,11 +317,11 @@
        "</div>"
       ],
       "text/plain": [
-       "   precursor_idx proteins  _decoy\n",
-       "0              1        A       0\n",
-       "1              2        A       0\n",
-       "2              3        B       0\n",
-       "3              4        B       0"
+       "   precursor_idx proteins  decoy\n",
+       "0              1        A      0\n",
+       "1              2        A      0\n",
+       "2              3        B      0\n",
+       "3              4        B      0"
       ]
      },
      "metadata": {},
@@ -350,7 +350,7 @@
        "      <th></th>\n",
        "      <th>precursor_idx</th>\n",
        "      <th>proteins</th>\n",
-       "      <th>_decoy</th>\n",
+       "      <th>decoy</th>\n",
        "      <th>pg_master</th>\n",
        "      <th>pg</th>\n",
        "    </tr>\n",
@@ -393,11 +393,11 @@
        "</div>"
       ],
       "text/plain": [
-       "   precursor_idx proteins  _decoy pg_master pg\n",
-       "0              1        A       0         A  A\n",
-       "1              2        A       0         A  A\n",
-       "2              3        B       0         B  B\n",
-       "3              4        B       0         B  B"
+       "   precursor_idx proteins  decoy pg_master pg\n",
+       "0              1        A      0         A  A\n",
+       "1              2        A      0         A  A\n",
+       "2              3        B      0         B  B\n",
+       "3              4        B      0         B  B"
       ]
      },
      "metadata": {},
@@ -433,7 +433,7 @@
        "      <th></th>\n",
        "      <th>precursor_idx</th>\n",
        "      <th>proteins</th>\n",
-       "      <th>_decoy</th>\n",
+       "      <th>decoy</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -466,11 +466,11 @@
        "</div>"
       ],
       "text/plain": [
-       "   precursor_idx proteins  _decoy\n",
-       "0              1        A       0\n",
-       "1              2      A;B       0\n",
-       "2              3      A;B       0\n",
-       "3              4        B       0"
+       "   precursor_idx proteins  decoy\n",
+       "0              1        A      0\n",
+       "1              2      A;B      0\n",
+       "2              3      A;B      0\n",
+       "3              4        B      0"
       ]
      },
      "metadata": {},
@@ -499,7 +499,7 @@
        "      <th></th>\n",
        "      <th>precursor_idx</th>\n",
        "      <th>proteins</th>\n",
-       "      <th>_decoy</th>\n",
+       "      <th>decoy</th>\n",
        "      <th>pg_master</th>\n",
        "      <th>pg</th>\n",
        "    </tr>\n",
@@ -519,7 +519,7 @@
        "      <td>A;B</td>\n",
        "      <td>0</td>\n",
        "      <td>A</td>\n",
-       "      <td>A</td>\n",
+       "      <td>A;B</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -527,7 +527,7 @@
        "      <td>A;B</td>\n",
        "      <td>0</td>\n",
        "      <td>A</td>\n",
-       "      <td>A</td>\n",
+       "      <td>A;B</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -542,11 +542,11 @@
        "</div>"
       ],
       "text/plain": [
-       "   precursor_idx proteins  _decoy pg_master pg\n",
-       "0              1        A       0         A  A\n",
-       "1              2      A;B       0         A  A\n",
-       "2              3      A;B       0         A  A\n",
-       "3              4        B       0         B  B"
+       "   precursor_idx proteins  decoy pg_master   pg\n",
+       "0              1        A      0         A    A\n",
+       "1              2      A;B      0         A  A;B\n",
+       "2              3      A;B      0         A  A;B\n",
+       "3              4        B      0         B    B"
       ]
      },
      "metadata": {},
@@ -582,7 +582,7 @@
        "      <th></th>\n",
        "      <th>precursor_idx</th>\n",
        "      <th>proteins</th>\n",
-       "      <th>_decoy</th>\n",
+       "      <th>decoy</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -615,11 +615,11 @@
        "</div>"
       ],
       "text/plain": [
-       "   precursor_idx proteins  _decoy\n",
-       "0              1      A;B       0\n",
-       "1              2      A;B       0\n",
-       "2              3      A;B       0\n",
-       "3              4      A;B       0"
+       "   precursor_idx proteins  decoy\n",
+       "0              1      A;B      0\n",
+       "1              2      A;B      0\n",
+       "2              3      A;B      0\n",
+       "3              4      A;B      0"
       ]
      },
      "metadata": {},
@@ -648,7 +648,7 @@
        "      <th></th>\n",
        "      <th>precursor_idx</th>\n",
        "      <th>proteins</th>\n",
-       "      <th>_decoy</th>\n",
+       "      <th>decoy</th>\n",
        "      <th>pg_master</th>\n",
        "      <th>pg</th>\n",
        "    </tr>\n",
@@ -660,7 +660,7 @@
        "      <td>A;B</td>\n",
        "      <td>0</td>\n",
        "      <td>A</td>\n",
-       "      <td>A;B</td>\n",
+       "      <td>A</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -668,7 +668,7 @@
        "      <td>A;B</td>\n",
        "      <td>0</td>\n",
        "      <td>A</td>\n",
-       "      <td>A;B</td>\n",
+       "      <td>A</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -676,7 +676,7 @@
        "      <td>A;B</td>\n",
        "      <td>0</td>\n",
        "      <td>A</td>\n",
-       "      <td>A;B</td>\n",
+       "      <td>A</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -684,18 +684,18 @@
        "      <td>A;B</td>\n",
        "      <td>0</td>\n",
        "      <td>A</td>\n",
-       "      <td>A;B</td>\n",
+       "      <td>A</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "   precursor_idx proteins  _decoy pg_master   pg\n",
-       "0              1      A;B       0         A  A;B\n",
-       "1              2      A;B       0         A  A;B\n",
-       "2              3      A;B       0         A  A;B\n",
-       "3              4      A;B       0         A  A;B"
+       "   precursor_idx proteins  decoy pg_master pg\n",
+       "0              1      A;B      0         A  A\n",
+       "1              2      A;B      0         A  A\n",
+       "2              3      A;B      0         A  A\n",
+       "3              4      A;B      0         A  A"
       ]
      },
      "metadata": {},
@@ -731,7 +731,7 @@
        "      <th></th>\n",
        "      <th>precursor_idx</th>\n",
        "      <th>proteins</th>\n",
-       "      <th>_decoy</th>\n",
+       "      <th>decoy</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -764,11 +764,11 @@
        "</div>"
       ],
       "text/plain": [
-       "   precursor_idx proteins  _decoy\n",
-       "0              1        A       0\n",
-       "1              2      A;B       0\n",
-       "2              3      A;B       0\n",
-       "3              4      A;B       0"
+       "   precursor_idx proteins  decoy\n",
+       "0              1        A      0\n",
+       "1              2      A;B      0\n",
+       "2              3      A;B      0\n",
+       "3              4      A;B      0"
       ]
      },
      "metadata": {},
@@ -797,7 +797,7 @@
        "      <th></th>\n",
        "      <th>precursor_idx</th>\n",
        "      <th>proteins</th>\n",
-       "      <th>_decoy</th>\n",
+       "      <th>decoy</th>\n",
        "      <th>pg_master</th>\n",
        "      <th>pg</th>\n",
        "    </tr>\n",
@@ -809,7 +809,7 @@
        "      <td>A</td>\n",
        "      <td>0</td>\n",
        "      <td>A</td>\n",
-       "      <td>A;B</td>\n",
+       "      <td>A</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -817,7 +817,7 @@
        "      <td>A;B</td>\n",
        "      <td>0</td>\n",
        "      <td>A</td>\n",
-       "      <td>A;B</td>\n",
+       "      <td>A</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -825,7 +825,7 @@
        "      <td>A;B</td>\n",
        "      <td>0</td>\n",
        "      <td>A</td>\n",
-       "      <td>A;B</td>\n",
+       "      <td>A</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -833,18 +833,18 @@
        "      <td>A;B</td>\n",
        "      <td>0</td>\n",
        "      <td>A</td>\n",
-       "      <td>A;B</td>\n",
+       "      <td>A</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "   precursor_idx proteins  _decoy pg_master   pg\n",
-       "0              1        A       0         A  A;B\n",
-       "1              2      A;B       0         A  A;B\n",
-       "2              3      A;B       0         A  A;B\n",
-       "3              4      A;B       0         A  A;B"
+       "   precursor_idx proteins  decoy pg_master pg\n",
+       "0              1        A      0         A  A\n",
+       "1              2      A;B      0         A  A\n",
+       "2              3      A;B      0         A  A\n",
+       "3              4      A;B      0         A  A"
       ]
      },
      "metadata": {},
@@ -880,7 +880,7 @@
        "      <th></th>\n",
        "      <th>precursor_idx</th>\n",
        "      <th>proteins</th>\n",
-       "      <th>_decoy</th>\n",
+       "      <th>decoy</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -913,11 +913,11 @@
        "</div>"
       ],
       "text/plain": [
-       "   precursor_idx proteins  _decoy\n",
-       "0              1        A       0\n",
-       "1              2      A;B       0\n",
-       "2              3      B;C       0\n",
-       "3              4        C       0"
+       "   precursor_idx proteins  decoy\n",
+       "0              1        A      0\n",
+       "1              2      A;B      0\n",
+       "2              3      B;C      0\n",
+       "3              4        C      0"
       ]
      },
      "metadata": {},
@@ -946,7 +946,7 @@
        "      <th></th>\n",
        "      <th>precursor_idx</th>\n",
        "      <th>proteins</th>\n",
-       "      <th>_decoy</th>\n",
+       "      <th>decoy</th>\n",
        "      <th>pg_master</th>\n",
        "      <th>pg</th>\n",
        "    </tr>\n",
@@ -974,7 +974,7 @@
        "      <td>B;C</td>\n",
        "      <td>0</td>\n",
        "      <td>C</td>\n",
-       "      <td>C;B</td>\n",
+       "      <td>C</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -982,18 +982,18 @@
        "      <td>C</td>\n",
        "      <td>0</td>\n",
        "      <td>C</td>\n",
-       "      <td>C;B</td>\n",
+       "      <td>C</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "   precursor_idx proteins  _decoy pg_master   pg\n",
-       "0              1        A       0         A    A\n",
-       "1              2      A;B       0         A    A\n",
-       "2              3      B;C       0         C  C;B\n",
-       "3              4        C       0         C  C;B"
+       "   precursor_idx proteins  decoy pg_master pg\n",
+       "0              1        A      0         A  A\n",
+       "1              2      A;B      0         A  A\n",
+       "2              3      B;C      0         C  C\n",
+       "3              4        C      0         C  C"
       ]
      },
      "metadata": {},
@@ -1029,7 +1029,7 @@
        "      <th></th>\n",
        "      <th>precursor_idx</th>\n",
        "      <th>proteins</th>\n",
-       "      <th>_decoy</th>\n",
+       "      <th>decoy</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -1062,11 +1062,11 @@
        "</div>"
       ],
       "text/plain": [
-       "   precursor_idx proteins  _decoy\n",
-       "0              1      A;B       0\n",
-       "1              2    A;B;C       0\n",
-       "2              3    A;B;C       0\n",
-       "3              4      A;C       0"
+       "   precursor_idx proteins  decoy\n",
+       "0              1      A;B      0\n",
+       "1              2    A;B;C      0\n",
+       "2              3    A;B;C      0\n",
+       "3              4      A;C      0"
       ]
      },
      "metadata": {},
@@ -1095,7 +1095,7 @@
        "      <th></th>\n",
        "      <th>precursor_idx</th>\n",
        "      <th>proteins</th>\n",
-       "      <th>_decoy</th>\n",
+       "      <th>decoy</th>\n",
        "      <th>pg_master</th>\n",
        "      <th>pg</th>\n",
        "    </tr>\n",
@@ -1107,7 +1107,7 @@
        "      <td>A;B</td>\n",
        "      <td>0</td>\n",
        "      <td>A</td>\n",
-       "      <td>A;B;C</td>\n",
+       "      <td>A</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -1115,7 +1115,7 @@
        "      <td>A;B;C</td>\n",
        "      <td>0</td>\n",
        "      <td>A</td>\n",
-       "      <td>A;B;C</td>\n",
+       "      <td>A</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -1123,7 +1123,7 @@
        "      <td>A;B;C</td>\n",
        "      <td>0</td>\n",
        "      <td>A</td>\n",
-       "      <td>A;B;C</td>\n",
+       "      <td>A</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -1131,18 +1131,18 @@
        "      <td>A;C</td>\n",
        "      <td>0</td>\n",
        "      <td>A</td>\n",
-       "      <td>A;B;C</td>\n",
+       "      <td>A</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "   precursor_idx proteins  _decoy pg_master     pg\n",
-       "0              1      A;B       0         A  A;B;C\n",
-       "1              2    A;B;C       0         A  A;B;C\n",
-       "2              3    A;B;C       0         A  A;B;C\n",
-       "3              4      A;C       0         A  A;B;C"
+       "   precursor_idx proteins  decoy pg_master pg\n",
+       "0              1      A;B      0         A  A\n",
+       "1              2    A;B;C      0         A  A\n",
+       "2              3    A;B;C      0         A  A\n",
+       "3              4      A;C      0         A  A"
       ]
      },
      "metadata": {},
@@ -1178,7 +1178,7 @@
        "      <th></th>\n",
        "      <th>precursor_idx</th>\n",
        "      <th>proteins</th>\n",
-       "      <th>_decoy</th>\n",
+       "      <th>decoy</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -1211,11 +1211,11 @@
        "</div>"
       ],
       "text/plain": [
-       "   precursor_idx proteins  _decoy\n",
-       "0              1    A;B;C       0\n",
-       "1              2    B;C;D       0\n",
-       "2              3    C;D;E       0\n",
-       "3              4    D;E;A       0"
+       "   precursor_idx proteins  decoy\n",
+       "0              1    A;B;C      0\n",
+       "1              2    B;C;D      0\n",
+       "2              3    C;D;E      0\n",
+       "3              4    D;E;A      0"
       ]
      },
      "metadata": {},
@@ -1244,7 +1244,7 @@
        "      <th></th>\n",
        "      <th>precursor_idx</th>\n",
        "      <th>proteins</th>\n",
-       "      <th>_decoy</th>\n",
+       "      <th>decoy</th>\n",
        "      <th>pg_master</th>\n",
        "      <th>pg</th>\n",
        "    </tr>\n",
@@ -1256,7 +1256,7 @@
        "      <td>A;B;C</td>\n",
        "      <td>0</td>\n",
        "      <td>C</td>\n",
-       "      <td>C;B</td>\n",
+       "      <td>A;C</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -1264,7 +1264,7 @@
        "      <td>B;C;D</td>\n",
        "      <td>0</td>\n",
        "      <td>C</td>\n",
-       "      <td>C;B</td>\n",
+       "      <td>C</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -1272,7 +1272,7 @@
        "      <td>C;D;E</td>\n",
        "      <td>0</td>\n",
        "      <td>C</td>\n",
-       "      <td>C;B</td>\n",
+       "      <td>C</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -1280,18 +1280,18 @@
        "      <td>D;E;A</td>\n",
        "      <td>0</td>\n",
        "      <td>A</td>\n",
-       "      <td>A;D;E</td>\n",
+       "      <td>A</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "   precursor_idx proteins  _decoy pg_master     pg\n",
-       "0              1    A;B;C       0         C    C;B\n",
-       "1              2    B;C;D       0         C    C;B\n",
-       "2              3    C;D;E       0         C    C;B\n",
-       "3              4    D;E;A       0         A  A;D;E"
+       "   precursor_idx proteins  decoy pg_master   pg\n",
+       "0              1    A;B;C      0         C  A;C\n",
+       "1              2    B;C;D      0         C    C\n",
+       "2              3    C;D;E      0         C    C\n",
+       "3              4    D;E;A      0         A    A"
       ]
      },
      "metadata": {},
@@ -1327,7 +1327,7 @@
        "      <th></th>\n",
        "      <th>precursor_idx</th>\n",
        "      <th>proteins</th>\n",
-       "      <th>_decoy</th>\n",
+       "      <th>decoy</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -1360,11 +1360,11 @@
        "</div>"
       ],
       "text/plain": [
-       "   precursor_idx     proteins  _decoy\n",
-       "0              0  P1;P2;P3;P4       0\n",
-       "1              1        P1;P4       0\n",
-       "2              2           P2       0\n",
-       "3              3        P2;P5       0"
+       "   precursor_idx     proteins  decoy\n",
+       "0              0  P1;P2;P3;P4      0\n",
+       "1              1        P1;P4      0\n",
+       "2              2           P2      0\n",
+       "3              3        P2;P5      0"
       ]
      },
      "metadata": {},
@@ -1393,7 +1393,7 @@
        "      <th></th>\n",
        "      <th>precursor_idx</th>\n",
        "      <th>proteins</th>\n",
-       "      <th>_decoy</th>\n",
+       "      <th>decoy</th>\n",
        "      <th>pg_master</th>\n",
        "      <th>pg</th>\n",
        "    </tr>\n",
@@ -1405,7 +1405,7 @@
        "      <td>P1;P2;P3;P4</td>\n",
        "      <td>0</td>\n",
        "      <td>P2</td>\n",
-       "      <td>P2;P3;P5</td>\n",
+       "      <td>P1;P2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -1413,7 +1413,7 @@
        "      <td>P1;P4</td>\n",
        "      <td>0</td>\n",
        "      <td>P1</td>\n",
-       "      <td>P1;P4</td>\n",
+       "      <td>P1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -1421,7 +1421,7 @@
        "      <td>P2</td>\n",
        "      <td>0</td>\n",
        "      <td>P2</td>\n",
-       "      <td>P2;P3;P5</td>\n",
+       "      <td>P2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -1429,18 +1429,18 @@
        "      <td>P2;P5</td>\n",
        "      <td>0</td>\n",
        "      <td>P2</td>\n",
-       "      <td>P2;P3;P5</td>\n",
+       "      <td>P2</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "   precursor_idx     proteins  _decoy pg_master        pg\n",
-       "0              0  P1;P2;P3;P4       0        P2  P2;P3;P5\n",
-       "1              1        P1;P4       0        P1     P1;P4\n",
-       "2              2           P2       0        P2  P2;P3;P5\n",
-       "3              3        P2;P5       0        P2  P2;P3;P5"
+       "   precursor_idx     proteins  decoy pg_master     pg\n",
+       "0              0  P1;P2;P3;P4      0        P2  P1;P2\n",
+       "1              1        P1;P4      0        P1     P1\n",
+       "2              2           P2      0        P2     P2\n",
+       "3              3        P2;P5      0        P2     P2"
       ]
      },
      "metadata": {},
@@ -1455,7 +1455,7 @@
     "        {\n",
     "            \"precursor_idx\": [1, 2, 3, 4],\n",
     "            \"proteins\": [\"A\", \"A\", \"B\", \"B\"],\n",
-    "            \"_decoy\": [0, 0, 0, 0],\n",
+    "            \"decoy\": [0, 0, 0, 0],\n",
     "        }\n",
     "    )\n",
     "    print(\"distinct proteins\")\n",
@@ -1466,9 +1466,10 @@
     "    ) == {\n",
     "        \"precursor_idx\": [1, 2, 3, 4],\n",
     "        \"proteins\": [\"A\", \"A\", \"B\", \"B\"],\n",
-    "        \"_decoy\": [0, 0, 0, 0],\n",
+    "        \"decoy\": [0, 0, 0, 0],\n",
     "        \"pg_master\": [\"A\", \"A\", \"B\", \"B\"],\n",
-    "        \"pg\": [\"A\", \"A\", \"B\", \"B\"],\n",
+    "        # \"pg\": [\"A\", \"A\", \"B\", \"B\"], # parsimonious grouping\n",
+    "        \"pg\" : [\"A\", \"A\", \"B\", \"B\"]\n",
     "    }\n",
     "\n",
     "    # 2. differentiable proteins: shared peptides go to one protein\n",
@@ -1476,7 +1477,7 @@
     "        {\n",
     "            \"precursor_idx\": [1, 2, 3, 4],\n",
     "            \"proteins\": [\"A\", \"A;B\", \"A;B\", \"B\"],\n",
-    "            \"_decoy\": [0, 0, 0, 0],\n",
+    "            \"decoy\": [0, 0, 0, 0],\n",
     "        }\n",
     "    )\n",
     "    print(\"differentiable proteins\")\n",
@@ -1487,9 +1488,10 @@
     "    ).to_dict(orient=\"list\") == {\n",
     "        \"precursor_idx\": [1, 2, 3, 4],\n",
     "        \"proteins\": [\"A\", \"A;B\", \"A;B\", \"B\"],\n",
-    "        \"_decoy\": [0, 0, 0, 0],\n",
+    "        \"decoy\": [0, 0, 0, 0],\n",
     "        \"pg_master\": [\"A\", \"A\", \"A\", \"B\"],\n",
-    "        \"pg\": [\"A\", \"A\", \"A\", \"B\"],\n",
+    "        # \"pg\": [\"A\", \"A\", \"A\", \"B\"], # parsimonious grouping\n",
+    "        \"pg\" : [\"A\", \"A;B\", \"A;B\", \"B\"]\n",
     "    }\n",
     "\n",
     "    # 3. indistinguishable proteins: one is totally removed\n",
@@ -1497,7 +1499,7 @@
     "        {\n",
     "            \"precursor_idx\": [1, 2, 3, 4],\n",
     "            \"proteins\": [\"A;B\", \"A;B\", \"A;B\", \"A;B\"],\n",
-    "            \"_decoy\": [0, 0, 0, 0],\n",
+    "            \"decoy\": [0, 0, 0, 0],\n",
     "        }\n",
     "    )\n",
     "    print(\"indistinguishable proteins\")\n",
@@ -1508,9 +1510,10 @@
     "    ).to_dict(orient=\"list\") == {\n",
     "        \"precursor_idx\": [1, 2, 3, 4],\n",
     "        \"proteins\": [\"A;B\", \"A;B\", \"A;B\", \"A;B\"],\n",
-    "        \"_decoy\": [0, 0, 0, 0],\n",
+    "        \"decoy\": [0, 0, 0, 0],\n",
     "        \"pg_master\": [\"A\", \"A\", \"A\", \"A\"],\n",
-    "        \"pg\": [\"A;B\", \"A;B\", \"A;B\", \"A;B\"],\n",
+    "        # \"pg\": [\"A;B\", \"A;B\", \"A;B\", \"A;B\"], # parsimonious grouping\n",
+    "        \"pg\" : [\"A\", \"A\", \"A\", \"A\"]\n",
     "    }\n",
     "\n",
     "    # 4. subset protein: subsetted protein is removed\n",
@@ -1518,7 +1521,7 @@
     "        {\n",
     "            \"precursor_idx\": [1, 2, 3, 4],\n",
     "            \"proteins\": [\"A\", \"A;B\", \"A;B\", \"A;B\"],\n",
-    "            \"_decoy\": [0, 0, 0, 0],\n",
+    "            \"decoy\": [0, 0, 0, 0],\n",
     "        }\n",
     "    )\n",
     "    print(\"subset proteins\")\n",
@@ -1529,9 +1532,10 @@
     "    ) == {\n",
     "        \"precursor_idx\": [1, 2, 3, 4],\n",
     "        \"proteins\": [\"A\", \"A;B\", \"A;B\", \"A;B\"],\n",
-    "        \"_decoy\": [0, 0, 0, 0],\n",
+    "        \"decoy\": [0, 0, 0, 0],\n",
     "        \"pg_master\": [\"A\", \"A\", \"A\", \"A\"],\n",
-    "        \"pg\": [\"A;B\", \"A;B\", \"A;B\", \"A;B\"],\n",
+    "        # \"pg\": [\"A;B\", \"A;B\", \"A;B\", \"A;B\"], # parsimonious grouping\n",
+    "        \"pg\" : [\"A\", \"A\", \"A\", \"A\"]\n",
     "    }\n",
     "\n",
     "    # 5. subsumable proteins --> there are two possible outcomes depending on whether one starts with the middle or either end of the chain\n",
@@ -1539,7 +1543,7 @@
     "        {\n",
     "            \"precursor_idx\": [1, 2, 3, 4],\n",
     "            \"proteins\": [\"A\", \"A;B\", \"B;C\", \"C\"],\n",
-    "            \"_decoy\": [0, 0, 0, 0],\n",
+    "            \"decoy\": [0, 0, 0, 0],\n",
     "        }\n",
     "    )\n",
     "    print(\"subsumable proteins\")\n",
@@ -1550,9 +1554,10 @@
     "    ) == {\n",
     "        \"precursor_idx\": [1, 2, 3, 4],\n",
     "        \"proteins\": [\"A\", \"A;B\", \"B;C\", \"C\"],\n",
-    "        \"_decoy\": [0, 0, 0, 0],\n",
+    "        \"decoy\": [0, 0, 0, 0],\n",
     "        \"pg_master\": [\"A\", \"A\", \"C\", \"C\"],\n",
-    "        \"pg\": [\"A\", \"A\", \"C;B\", \"C;B\"],\n",
+    "        # \"pg\": [\"A\", \"A\", \"C;B\", \"C;B\"], # parsimonious grouping\n",
+    "        \"pg\" : [\"A\", \"A\", \"C\", \"C\"]\n",
     "    }\n",
     "\n",
     "    # 6. a group of proteins identified by shared peptides only\n",
@@ -1560,7 +1565,7 @@
     "        {\n",
     "            \"precursor_idx\": [1, 2, 3, 4],\n",
     "            \"proteins\": [\"A;B\", \"A;B;C\", \"A;B;C\", \"A;C\"],\n",
-    "            \"_decoy\": [0, 0, 0, 0],\n",
+    "            \"decoy\": [0, 0, 0, 0],\n",
     "        }\n",
     "    )\n",
     "    print(\"shared only\")\n",
@@ -1571,9 +1576,10 @@
     "    ) == {\n",
     "        \"precursor_idx\": [1, 2, 3, 4],\n",
     "        \"proteins\": [\"A;B\", \"A;B;C\", \"A;B;C\", \"A;C\"],\n",
-    "        \"_decoy\": [0, 0, 0, 0],\n",
+    "        \"decoy\": [0, 0, 0, 0],\n",
     "        \"pg_master\": [\"A\", \"A\", \"A\", \"A\"],\n",
-    "        \"pg\": [\"A;B;C\", \"A;B;C\", \"A;B;C\", \"A;B;C\"],\n",
+    "        # \"pg\": [\"A;B;C\", \"A;B;C\", \"A;B;C\", \"A;B;C\"], # parsimonious grouping\n",
+    "        \"pg\" : [\"A\", \"A\", \"A\", \"A\"]\n",
     "    }\n",
     "\n",
     "    # 7. circular proteins\n",
@@ -1581,7 +1587,7 @@
     "        {\n",
     "            \"precursor_idx\": [1, 2, 3, 4],\n",
     "            \"proteins\": [\"A;B;C\", \"B;C;D\", \"C;D;E\", \"D;E;A\"],\n",
-    "            \"_decoy\": [0, 0, 0, 0],\n",
+    "            \"decoy\": [0, 0, 0, 0],\n",
     "        }\n",
     "    )\n",
     "    print(\"circular\")\n",
@@ -1592,9 +1598,10 @@
     "    ) == {\n",
     "        \"precursor_idx\": [1, 2, 3, 4],\n",
     "        \"proteins\": [\"A;B;C\", \"B;C;D\", \"C;D;E\", \"D;E;A\"],\n",
-    "        \"_decoy\": [0, 0, 0, 0],\n",
+    "        \"decoy\": [0, 0, 0, 0],\n",
     "        \"pg_master\": [\"C\", \"C\", \"C\", \"A\"],\n",
-    "        \"pg\": [\"C;B\", \"C;B\", \"C;B\", \"A;D;E\"],\n",
+    "        # \"pg\": [\"C;B\", \"C;B\", \"C;B\", \"A;D;E\"], # parsimonious grouping\n",
+    "        \"pg\" : [\"A;C\", \"C\", \"C\", \"A\"]\n",
     "    }\n",
     "\n",
     "    # 8. Complex example --> depending on which of the equivalent proteins P1 and P4 is chosen first, the grouping will be different\n",
@@ -1602,7 +1609,7 @@
     "        {\n",
     "            \"precursor_idx\": [0, 1, 2, 3],\n",
     "            \"proteins\": [\"P1;P2;P3;P4\", \"P1;P4\", \"P2\", \"P2;P5\"],\n",
-    "            \"_decoy\": [0, 0, 0, 0],\n",
+    "            \"decoy\": [0, 0, 0, 0],\n",
     "        }\n",
     "    )\n",
     "    print(\"complex example\")\n",
@@ -1613,9 +1620,10 @@
     "    ) == {\n",
     "        \"precursor_idx\": [0, 1, 2, 3],\n",
     "        \"proteins\": [\"P1;P2;P3;P4\", \"P1;P4\", \"P2\", \"P2;P5\"],\n",
-    "        \"_decoy\": [0, 0, 0, 0],\n",
+    "        \"decoy\": [0, 0, 0, 0],\n",
     "        \"pg_master\": [\"P2\", \"P1\", \"P2\", \"P2\"],\n",
-    "        \"pg\": [\"P2;P3;P5\", \"P1;P4\", \"P2;P3;P5\", \"P2;P3;P5\"],\n",
+    "        # \"pg\": [\"P2;P3;P5\", \"P1;P4\", \"P2;P3;P5\", \"P2;P3;P5\"], # parsimonious grouping\n",
+    "        \"pg\" : [\"P1;P2\", \"P1\", \"P2\", \"P2\"]\n",
     "    }\n",
     "\n",
     "\n",
diff --git a/tests/unit_tests/test_grouping.py b/tests/unit_tests/test_grouping.py
index cbb63180..cd7c9c4b 100644
--- a/tests/unit_tests/test_grouping.py
+++ b/tests/unit_tests/test_grouping.py
@@ -20,7 +20,7 @@ def construct_test_cases():
         "proteins": ["A", "A", "B", "B"],
         "decoy": [0, 0, 0, 0],
         "pg_master": ["A", "A", "B", "B"],
-        "pg": ["A", "A", "B", "B"],
+        "pg": ["A", "A", "B", "B"], # heuristic grouping
     }
 
     differentiable_proteins_input = {
@@ -33,7 +33,7 @@ def construct_test_cases():
         "proteins": ["A", "A;B", "A;B", "B"],
         "decoy": [0, 0, 0, 0],
         "pg_master": ["A", "A", "A", "B"],
-        "pg": ["A", "A;B", "A;B", "B"],
+        "pg": ["A", "A;B", "A;B", "B"], # heuristic grouping
     }
 
     indistinguishable_proteins_input = {
@@ -46,7 +46,7 @@ def construct_test_cases():
         "proteins": ["A;B", "A;B", "A;B", "A;B"],
         "decoy": [0, 0, 0, 0],
         "pg_master": ["A", "A", "A", "A"],
-        "pg": ["A", "A", "A", "A"],
+        "pg": ["A", "A", "A", "A"], # heuristic grouping
     }
 
     subset_proteins_input = {
@@ -59,33 +59,33 @@ def construct_test_cases():
         "proteins": ["A", "A;B", "A;B", "A;B"],
         "decoy": [0, 0, 0, 0],
         "pg_master": ["A", "A", "A", "A"],
-        "pg": ["A", "A", "A", "A"],
+        "pg": ["A", "A", "A", "A"], # heuristic grouping
     }
 
     subsumable_proteins_input = {
         "precursor_idx": [1, 2, 3, 4],
         "proteins": ["A", "A;B", "B;C", "C"],
-        "decoy": [0, 0, 0, 0],
+        "decoy": [0, 0, 0, 0], # heuristic grouping
     }
     subsumable_proteins_expected = {
         "precursor_idx": [1, 2, 3, 4],
         "proteins": ["A", "A;B", "B;C", "C"],
         "decoy": [0, 0, 0, 0],
         "pg_master": ["A", "A", "C", "C"],
-        "pg": ["A", "A", "C", "C"],
+        "pg": ["A", "A", "C", "C"], # heuristic grouping
     }
 
     shared_only_proteins_input = {
         "precursor_idx": [1, 2, 3, 4],
         "proteins": ["A;B", "A;B;C", "A;B;C", "A;C"],
-        "decoy": [0, 0, 0, 0],
+        "decoy": [0, 0, 0, 0], # heuristic grouping
     }
     shared_only_proteins_expected = {
         "precursor_idx": [1, 2, 3, 4],
         "proteins": ["A;B", "A;B;C", "A;B;C", "A;C"],
         "decoy": [0, 0, 0, 0],
         "pg_master": ["A", "A", "A", "A"],
-        "pg": ["A", "A", "A", "A"],
+        "pg": ["A", "A", "A", "A"], # heuristic grouping
     }
 
     circular_proteins_input = {
@@ -98,7 +98,7 @@ def construct_test_cases():
         "proteins": ["A;B;C", "B;C;D", "C;D;E", "D;E;A"],
         "decoy": [0, 0, 0, 0],
         "pg_master": ["C", "C", "C", "A"],
-        "pg": ["A;C", "C", "C", "A"],
+        "pg": ["A;C", "C", "C", "A"], # heuristic grouping
     }
 
     complex_example_proteins_input = {
@@ -111,11 +111,15 @@ def construct_test_cases():
         "proteins": ["P1;P2;P3;P4", "P1;P4", "P2", "P2;P5"],
         "decoy": [0, 0, 0, 0],
         "pg_master": ["P2", "P1", "P2", "P2"],
-        "pg": ["P1;P2", "P1", "P2", "P2"],
+        "pg": ["P1;P2", "P1", "P2", "P2"], # heuristic grouping
     }
 
     test_cases = [
-        ("distinct_proteins", distinct_proteins_input, distinct_proteins_expected),
+        (
+            "distinct_proteins", 
+            distinct_proteins_input, 
+            distinct_proteins_expected
+        ),
         (
             "differentiable proteins",
             differentiable_proteins_input,
@@ -126,14 +130,26 @@ def construct_test_cases():
             indistinguishable_proteins_input,
             indistinguishable_proteins_expected,
         ),
-        ("subset proteins", subset_proteins_input, subset_proteins_expected),
+        (
+            "subset proteins", 
+            subset_proteins_input, 
+            subset_proteins_expected
+        ),
         (
             "subsumable proteins",
             subsumable_proteins_input,
             subsumable_proteins_expected,
         ),
-        ("shared only", shared_only_proteins_input, shared_only_proteins_expected),
-        ("circular", circular_proteins_input, circular_proteins_expected),
+        (
+            "shared only", 
+            shared_only_proteins_input, 
+            shared_only_proteins_expected
+        ),
+        (
+            "circular", 
+            circular_proteins_input, 
+            circular_proteins_expected
+        ),
         (
             "complex example",
             complex_example_proteins_input,

From b60f877540789021a29c655050daae14b87d3371 Mon Sep 17 00:00:00 2001
From: Vincenth Brennsteiner <brennsteiner@biochem.mpg.de>
Date: Fri, 24 May 2024 11:37:55 +0200
Subject: [PATCH 08/48] add 'return_parsimony_groups' option to
 perform_grouping and 'return_groups' option to group_and_parsimony in order
 to obtain maximum parsimony derived protein groups. This does not affect the
 current output of the maximum_parsimony and heuristic mode.

---
 alphadia/grouping.py                             | 14 ++++++++------
 nbs/tutorial_nbs/protein_grouping_tutorial.ipynb | 11 +++++++----
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/alphadia/grouping.py b/alphadia/grouping.py
index f0ba1588..b832776e 100644
--- a/alphadia/grouping.py
+++ b/alphadia/grouping.py
@@ -14,6 +14,7 @@
 def group_and_parsimony(
     precursor_idx: NDArray[np.int64],
     precursor_ids: NDArray[Any],
+    return_groups: bool = False,
 ):
     """Function to group ids based on precursor indices and return groups & master ids as lists
 
@@ -63,8 +64,8 @@ def group_and_parsimony(
             id_dict[subject_protein] = new_subject_set
             # With the following lines commented out, the query will only eliminate peptides from 
             # respective subject proteins, but we will not add them to the query group
-            # if len(new_subject_set) == 0:
-            #    query_group.append(subject_protein)
+            if return_groups and len(new_subject_set) == 0:
+               query_group.append(subject_protein)
 
         # save query to output lists
         id_group.append(query_group)
@@ -106,6 +107,7 @@ def perform_grouping(
     genes_or_proteins: str = "proteins",
     decoy_column: str = "decoy",
     group: bool = True,
+    return_parsimony_groups: bool = False,
 ):
     """Highest level function for grouping proteins in precursor table
 
@@ -143,7 +145,7 @@ def perform_grouping(
     if len(unique_decoys) == 1:
         upsm[decoy_column] = -1
         upsm["pg_master"], upsm["pg"] = group_and_parsimony(
-            upsm.precursor_idx.values, upsm[genes_or_proteins].values
+            upsm.precursor_idx.values, upsm[genes_or_proteins].values, return_parsimony_groups
         )
         upsm = upsm[["precursor_idx", "pg_master", "pg", genes_or_proteins]]
     else:
@@ -155,14 +157,14 @@ def perform_grouping(
         t_df = upsm[target_mask].copy()
         # TODO: consider directly assigning to t_df["pg_master"], t_df["pg"] = group_and_parsimony(...)
         new_columns = group_and_parsimony(
-            t_df.precursor_idx.values, t_df[genes_or_proteins].values
+            t_df.precursor_idx.values, t_df[genes_or_proteins].values, return_parsimony_groups
         )
         t_df["pg_master"], t_df["pg"] = new_columns
 
         # greedy set cover on decoys
         d_df = upsm[decoy_mask].copy()
         new_columns = group_and_parsimony(
-            d_df.precursor_idx.values, d_df[genes_or_proteins].values
+            d_df.precursor_idx.values, d_df[genes_or_proteins].values, return_parsimony_groups
         )
         d_df["pg_master"], d_df["pg"] = new_columns
 
@@ -173,7 +175,7 @@ def perform_grouping(
     # heuristic grouping: from each initial precursor's protein ID set, filter out proteins that 
     # are never master proteins
     if group:
-        # select all master protein groups
+        # select all master protein groups, which are the first in the semicolon separated list
         allowed_pg = upsm["pg"].str.split(";", expand=True)[0].unique()
         allowed_set_pg = set(allowed_pg)
 
diff --git a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb
index fd07c12b..12d80446 100644
--- a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb
+++ b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb
@@ -25,10 +25,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
     "import pandas as pd\n",
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
@@ -37,7 +40,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -178,7 +181,7 @@
        "9              4           P6           P6      1        P6     P6"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -251,7 +254,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {

From d9a21b6ff223ad04b3038cc28a9bea57a8fc3818 Mon Sep 17 00:00:00 2001
From: github-actions <github-actions@github.com>
Date: Fri, 24 May 2024 09:54:45 +0000
Subject: [PATCH 09/48] Apply Black formatting

---
 alphadia/grouping.py                          |  29 ++--
 nbs/debug/dev_grouping_comparison.ipynb       | 125 ++++++++++--------
 .../protein_grouping_tutorial.ipynb           |  16 +--
 tests/unit_tests/test_grouping.py             |  44 ++----
 4 files changed, 113 insertions(+), 101 deletions(-)

diff --git a/alphadia/grouping.py b/alphadia/grouping.py
index b832776e..5d64d499 100644
--- a/alphadia/grouping.py
+++ b/alphadia/grouping.py
@@ -19,15 +19,15 @@ def group_and_parsimony(
     """Function to group ids based on precursor indices and return groups & master ids as lists
 
     Args:
-        precursor_idx (np.array[int]): array containing unique integer indices corresponding 
+        precursor_idx (np.array[int]): array containing unique integer indices corresponding
             to each peptide precursor
-        precursor_ids (np.array[str]): array of variable length semicolon separated str belonging 
+        precursor_ids (np.array[str]): array of variable length semicolon separated str belonging
             to a given peptide precursor id
 
     Returns
-        ids (list[str]): list of ids linked to a given peptide precursor, such that each 
+        ids (list[str]): list of ids linked to a given peptide precursor, such that each
             precursor only belongs to one id. This list is ordered by precursor_idx.
-        groups (list[str]): list of semicolon separated ids belonging to a given peptide precursor, 
+        groups (list[str]): list of semicolon separated ids belonging to a given peptide precursor,
             such that each precursor only belongs to one group. This list is ordered by precursor_idx.
 
     """
@@ -62,10 +62,10 @@ def group_and_parsimony(
                 continue
             new_subject_set = subject_peptides - query_peptides
             id_dict[subject_protein] = new_subject_set
-            # With the following lines commented out, the query will only eliminate peptides from 
+            # With the following lines commented out, the query will only eliminate peptides from
             # respective subject proteins, but we will not add them to the query group
             if return_groups and len(new_subject_set) == 0:
-               query_group.append(subject_protein)
+                query_group.append(subject_protein)
 
         # save query to output lists
         id_group.append(query_group)
@@ -102,6 +102,7 @@ def group_and_parsimony(
 
     return ids, groups
 
+
 def perform_grouping(
     psm: pd.DataFrame,
     genes_or_proteins: str = "proteins",
@@ -127,7 +128,7 @@ def perform_grouping(
 
     # create non-duplicated view of precursor table
     duplicate_mask = ~psm.duplicated(subset=["precursor_idx"], keep="first")
-    
+
     # make sure column is string and subset to relevant columns
     psm[genes_or_proteins] = psm[genes_or_proteins].astype(str)
     upsm = psm.loc[duplicate_mask, ["precursor_idx", genes_or_proteins, decoy_column]]
@@ -145,7 +146,9 @@ def perform_grouping(
     if len(unique_decoys) == 1:
         upsm[decoy_column] = -1
         upsm["pg_master"], upsm["pg"] = group_and_parsimony(
-            upsm.precursor_idx.values, upsm[genes_or_proteins].values, return_parsimony_groups
+            upsm.precursor_idx.values,
+            upsm[genes_or_proteins].values,
+            return_parsimony_groups,
         )
         upsm = upsm[["precursor_idx", "pg_master", "pg", genes_or_proteins]]
     else:
@@ -157,14 +160,18 @@ def perform_grouping(
         t_df = upsm[target_mask].copy()
         # TODO: consider directly assigning to t_df["pg_master"], t_df["pg"] = group_and_parsimony(...)
         new_columns = group_and_parsimony(
-            t_df.precursor_idx.values, t_df[genes_or_proteins].values, return_parsimony_groups
+            t_df.precursor_idx.values,
+            t_df[genes_or_proteins].values,
+            return_parsimony_groups,
         )
         t_df["pg_master"], t_df["pg"] = new_columns
 
         # greedy set cover on decoys
         d_df = upsm[decoy_mask].copy()
         new_columns = group_and_parsimony(
-            d_df.precursor_idx.values, d_df[genes_or_proteins].values, return_parsimony_groups
+            d_df.precursor_idx.values,
+            d_df[genes_or_proteins].values,
+            return_parsimony_groups,
         )
         d_df["pg_master"], d_df["pg"] = new_columns
 
@@ -172,7 +179,7 @@ def perform_grouping(
             ["precursor_idx", "pg_master", "pg", genes_or_proteins]
         ]
 
-    # heuristic grouping: from each initial precursor's protein ID set, filter out proteins that 
+    # heuristic grouping: from each initial precursor's protein ID set, filter out proteins that
     # are never master proteins
     if group:
         # select all master protein groups, which are the first in the semicolon separated list
diff --git a/nbs/debug/dev_grouping_comparison.ipynb b/nbs/debug/dev_grouping_comparison.ipynb
index fa381c51..0d9c0c37 100644
--- a/nbs/debug/dev_grouping_comparison.ipynb
+++ b/nbs/debug/dev_grouping_comparison.ipynb
@@ -34,92 +34,114 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Inspect QC files and generate an overview of the data \n",
+    "# Inspect QC files and generate an overview of the data\n",
     "\n",
-    "import os \n",
+    "import os\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "import matplotlib.pyplot as plt\n",
     "\n",
+    "\n",
     "# Utility functions\n",
     "# to be sure and to enable parsing without user input, infer cohort from file itself\n",
     "def infer_engine(\n",
-    "        headers : list,\n",
-    "        alphadia_pe_columns : list = ['base_width_mobility', 'base_width_rt', 'mono_ms1_intensity'],\n",
-    "        alphadia_pg_columns : list = ['pg'],\n",
-    "        diann_pe_columns : list = ['File.Name', 'Run', 'Protein.Group', 'Protein.Ids'],\n",
-    "        diann_pg_columns : list = ['First.Protein.Description'],\n",
-    "        spectronaut_pe_columns : list = ['PG.ProteinGroups', 'PG.ProteinAccessions', 'PG.Genes', 'PG.UniProtIds'],\n",
-    "        spectronaut_pg_columns : list = ['PG.NrOfStrippedSequencesIdentified (Experiment-wide)', 'PG.NrOfPrecursorsIdentified (Experiment-wide)'],\n",
+    "    headers: list,\n",
+    "    alphadia_pe_columns: list = [\n",
+    "        \"base_width_mobility\",\n",
+    "        \"base_width_rt\",\n",
+    "        \"mono_ms1_intensity\",\n",
+    "    ],\n",
+    "    alphadia_pg_columns: list = [\"pg\"],\n",
+    "    diann_pe_columns: list = [\"File.Name\", \"Run\", \"Protein.Group\", \"Protein.Ids\"],\n",
+    "    diann_pg_columns: list = [\"First.Protein.Description\"],\n",
+    "    spectronaut_pe_columns: list = [\n",
+    "        \"PG.ProteinGroups\",\n",
+    "        \"PG.ProteinAccessions\",\n",
+    "        \"PG.Genes\",\n",
+    "        \"PG.UniProtIds\",\n",
+    "    ],\n",
+    "    spectronaut_pg_columns: list = [\n",
+    "        \"PG.NrOfStrippedSequencesIdentified (Experiment-wide)\",\n",
+    "        \"PG.NrOfPrecursorsIdentified (Experiment-wide)\",\n",
+    "    ],\n",
     "):\n",
     "    engine = []\n",
     "    level = []\n",
     "    if set(alphadia_pe_columns).issubset(headers):\n",
-    "        engine.append('Alphadia')\n",
-    "        level.append('peptide')\n",
-    "    elif set (alphadia_pg_columns).issubset(headers):\n",
-    "        engine.append('Alphadia')\n",
-    "        level.append('protein_group')\n",
+    "        engine.append(\"Alphadia\")\n",
+    "        level.append(\"peptide\")\n",
+    "    elif set(alphadia_pg_columns).issubset(headers):\n",
+    "        engine.append(\"Alphadia\")\n",
+    "        level.append(\"protein_group\")\n",
     "    elif set(diann_pe_columns).issubset(headers):\n",
-    "        engine.append('DiaNN')\n",
-    "        level.append('peptide')\n",
+    "        engine.append(\"DiaNN\")\n",
+    "        level.append(\"peptide\")\n",
     "    elif set(diann_pg_columns).issubset(headers):\n",
-    "        engine.append('DiaNN')\n",
-    "        level.append('protein_group')\n",
+    "        engine.append(\"DiaNN\")\n",
+    "        level.append(\"protein_group\")\n",
     "    elif set(spectronaut_pe_columns).issubset(headers):\n",
-    "        engine.append('Spectronaut')\n",
-    "        level.append('peptide')\n",
+    "        engine.append(\"Spectronaut\")\n",
+    "        level.append(\"peptide\")\n",
     "    elif set(spectronaut_pg_columns).issubset(headers):\n",
-    "        engine.append('Spectronaut')\n",
-    "        level.append('protein_group')\n",
+    "        engine.append(\"Spectronaut\")\n",
+    "        level.append(\"protein_group\")\n",
     "    else:\n",
-    "        raise ValueError('Could not infer search engine from column names')\n",
+    "        raise ValueError(\"Could not infer search engine from column names\")\n",
     "\n",
     "    if len(engine) > 1:\n",
-    "        raise ValueError('More than one search engine detected')\n",
+    "        raise ValueError(\"More than one search engine detected\")\n",
     "\n",
     "    return engine[0], level[0]\n",
     "\n",
+    "\n",
     "# parse result files to usable format\n",
     "\n",
+    "\n",
     "def parse_alphadia_pe_table(\n",
-    "        pe_table : pd.DataFrame,\n",
+    "    pe_table: pd.DataFrame,\n",
     "):\n",
     "    return pe_table\n",
     "\n",
+    "\n",
     "def parse_alphadia_pg_table(\n",
-    "        pg_table : pd.DataFrame,\n",
+    "    pg_table: pd.DataFrame,\n",
     "):\n",
     "    return pg_table\n",
     "\n",
+    "\n",
     "def parse_diann_pe_table(\n",
-    "        pe_table : pd.DataFrame,\n",
+    "    pe_table: pd.DataFrame,\n",
     "):\n",
     "    return pe_table\n",
     "\n",
+    "\n",
     "def parse_diann_pg_table(\n",
-    "        pg_table : pd.DataFrame,\n",
+    "    pg_table: pd.DataFrame,\n",
     "):\n",
     "    return pg_table\n",
     "\n",
+    "\n",
     "def parse_spectronaut_pe_table(\n",
-    "        pe_table : pd.DataFrame,\n",
+    "    pe_table: pd.DataFrame,\n",
     "):\n",
     "    return pe_table\n",
     "\n",
+    "\n",
     "def parse_spectronaut_pg_table(\n",
-    "        pg_table : pd.DataFrame,\n",
+    "    pg_table: pd.DataFrame,\n",
     "):\n",
     "    return pg_table\n",
     "\n",
+    "\n",
     "# Higher level wrapper to read and parse peptide and protein group level results table\n",
     "\n",
+    "\n",
     "def read_and_parse_peptide_table(\n",
-    "        input_table_path : str,\n",
-    "        input_table_filename : str,\n",
+    "    input_table_path: str,\n",
+    "    input_table_filename: str,\n",
     ") -> pd.DataFrame:\n",
     "    \"\"\"Read and parse results table from respective search engine output. First step in analysing QC data.\n",
-    "    \n",
+    "\n",
     "    Parameters:\n",
     "    ----------\n",
     "\n",
@@ -134,30 +156,29 @@
     "\n",
     "    out_table : pd.DataFrame\n",
     "        DataFrame containing the peptide level results\n",
-    "    \n",
+    "\n",
     "    \"\"\"\n",
     "\n",
     "    # read peptide level table\n",
     "    input_table = pd.read_csv(\n",
-    "        os.path.join(input_table_path, input_table_filename),\n",
-    "        sep = '\\t'\n",
+    "        os.path.join(input_table_path, input_table_filename), sep=\"\\t\"\n",
     "    )\n",
-    "    \n",
+    "\n",
     "    engine, level = infer_engine(input_table.columns.tolist())\n",
     "\n",
-    "    if level == 'peptide':\n",
-    "        if engine == 'Alphadia':\n",
+    "    if level == \"peptide\":\n",
+    "        if engine == \"Alphadia\":\n",
     "            out_table = parse_alphadia_pe_table(input_table)\n",
-    "        elif engine == 'DiaNN':\n",
+    "        elif engine == \"DiaNN\":\n",
     "            out_table = parse_diann_pe_table(input_table)\n",
-    "        elif engine == 'Spectronaut':\n",
+    "        elif engine == \"Spectronaut\":\n",
     "            out_table = parse_spectronaut_pe_table(input_table)\n",
-    "    elif level == 'protein_group':\n",
-    "        if engine == 'Alphadia':\n",
+    "    elif level == \"protein_group\":\n",
+    "        if engine == \"Alphadia\":\n",
     "            out_table = parse_alphadia_pg_table(input_table)\n",
-    "        elif engine == 'DiaNN':\n",
+    "        elif engine == \"DiaNN\":\n",
     "            out_table = parse_diann_pg_table(input_table)\n",
-    "        elif engine == 'Spectronaut':\n",
+    "        elif engine == \"Spectronaut\":\n",
     "            out_table = parse_spectronaut_pg_table(input_table)\n",
     "\n",
     "    return out_table, engine"
@@ -1388,42 +1409,42 @@
     "pe_table_path = \"./dev_grouping_comparison_data/alphadia/precursor_level\"\n",
     "pe_table_filename = \"precursors.tsv\"\n",
     "pe_table, engine = read_and_parse_peptide_table(pe_table_path, pe_table_filename)\n",
-    "assert engine == 'Alphadia'\n",
+    "assert engine == \"Alphadia\"\n",
     "display(pe_table.head())\n",
     "\n",
     "# test alphadia PG table\n",
     "pg_table_path = \"./dev_grouping_comparison_data/alphadia/group_level\"\n",
     "pg_table_filename = \"pg.matrix.tsv\"\n",
     "pg_table, engine = read_and_parse_peptide_table(pg_table_path, pg_table_filename)\n",
-    "assert engine == 'Alphadia'\n",
+    "assert engine == \"Alphadia\"\n",
     "display(pg_table.head())\n",
     "\n",
     "# test diann PE table\n",
     "pe_table_path = \"./dev_grouping_comparison_data/diann/precursor_level\"\n",
     "pe_table_filename = \"report.tsv\"\n",
     "pe_table, engine = read_and_parse_peptide_table(pe_table_path, pe_table_filename)\n",
-    "assert engine == 'DiaNN'\n",
+    "assert engine == \"DiaNN\"\n",
     "display(pe_table.head())\n",
     "\n",
     "# test diann PG table\n",
     "pg_table_path = \"./dev_grouping_comparison_data/diann/group_level\"\n",
     "pg_table_filename = \"report.pg_matrix.tsv\"\n",
     "pg_table, engine = read_and_parse_peptide_table(pg_table_path, pg_table_filename)\n",
-    "assert engine == 'DiaNN'\n",
+    "assert engine == \"DiaNN\"\n",
     "display(pg_table.head())\n",
-    "        \n",
+    "\n",
     "# test spectronaut PE table\n",
     "pe_table_path = \"./dev_grouping_comparison_data/spectronaut/precursor_level\"\n",
     "pe_table_filename = \"HeLa_QC_PE_20240409_140530_20240321_Report.tsv\"\n",
     "pe_table, engine = read_and_parse_peptide_table(pe_table_path, pe_table_filename)\n",
-    "assert engine == 'Spectronaut'\n",
+    "assert engine == \"Spectronaut\"\n",
     "display(pe_table.head())\n",
     "\n",
     "# test spectronaut PG table\n",
     "pg_table_path = \"./dev_grouping_comparison_data/spectronaut/group_level\"\n",
     "pg_table_filename = \"HeLa_QC_PG_20240409_140824_20240321_Report.tsv\"\n",
     "pg_table, engine = read_and_parse_peptide_table(pg_table_path, pg_table_filename)\n",
-    "assert engine == 'Spectronaut'\n",
+    "assert engine == \"Spectronaut\"\n",
     "display(pg_table.head())"
    ]
   },
diff --git a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb
index 12d80446..780d4fb3 100644
--- a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb
+++ b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb
@@ -1472,7 +1472,7 @@
     "        \"decoy\": [0, 0, 0, 0],\n",
     "        \"pg_master\": [\"A\", \"A\", \"B\", \"B\"],\n",
     "        # \"pg\": [\"A\", \"A\", \"B\", \"B\"], # parsimonious grouping\n",
-    "        \"pg\" : [\"A\", \"A\", \"B\", \"B\"]\n",
+    "        \"pg\": [\"A\", \"A\", \"B\", \"B\"],\n",
     "    }\n",
     "\n",
     "    # 2. differentiable proteins: shared peptides go to one protein\n",
@@ -1494,7 +1494,7 @@
     "        \"decoy\": [0, 0, 0, 0],\n",
     "        \"pg_master\": [\"A\", \"A\", \"A\", \"B\"],\n",
     "        # \"pg\": [\"A\", \"A\", \"A\", \"B\"], # parsimonious grouping\n",
-    "        \"pg\" : [\"A\", \"A;B\", \"A;B\", \"B\"]\n",
+    "        \"pg\": [\"A\", \"A;B\", \"A;B\", \"B\"],\n",
     "    }\n",
     "\n",
     "    # 3. indistinguishable proteins: one is totally removed\n",
@@ -1516,7 +1516,7 @@
     "        \"decoy\": [0, 0, 0, 0],\n",
     "        \"pg_master\": [\"A\", \"A\", \"A\", \"A\"],\n",
     "        # \"pg\": [\"A;B\", \"A;B\", \"A;B\", \"A;B\"], # parsimonious grouping\n",
-    "        \"pg\" : [\"A\", \"A\", \"A\", \"A\"]\n",
+    "        \"pg\": [\"A\", \"A\", \"A\", \"A\"],\n",
     "    }\n",
     "\n",
     "    # 4. subset protein: subsetted protein is removed\n",
@@ -1538,7 +1538,7 @@
     "        \"decoy\": [0, 0, 0, 0],\n",
     "        \"pg_master\": [\"A\", \"A\", \"A\", \"A\"],\n",
     "        # \"pg\": [\"A;B\", \"A;B\", \"A;B\", \"A;B\"], # parsimonious grouping\n",
-    "        \"pg\" : [\"A\", \"A\", \"A\", \"A\"]\n",
+    "        \"pg\": [\"A\", \"A\", \"A\", \"A\"],\n",
     "    }\n",
     "\n",
     "    # 5. subsumable proteins --> there are two possible outcomes depending on whether one starts with the middle or either end of the chain\n",
@@ -1560,7 +1560,7 @@
     "        \"decoy\": [0, 0, 0, 0],\n",
     "        \"pg_master\": [\"A\", \"A\", \"C\", \"C\"],\n",
     "        # \"pg\": [\"A\", \"A\", \"C;B\", \"C;B\"], # parsimonious grouping\n",
-    "        \"pg\" : [\"A\", \"A\", \"C\", \"C\"]\n",
+    "        \"pg\": [\"A\", \"A\", \"C\", \"C\"],\n",
     "    }\n",
     "\n",
     "    # 6. a group of proteins identified by shared peptides only\n",
@@ -1582,7 +1582,7 @@
     "        \"decoy\": [0, 0, 0, 0],\n",
     "        \"pg_master\": [\"A\", \"A\", \"A\", \"A\"],\n",
     "        # \"pg\": [\"A;B;C\", \"A;B;C\", \"A;B;C\", \"A;B;C\"], # parsimonious grouping\n",
-    "        \"pg\" : [\"A\", \"A\", \"A\", \"A\"]\n",
+    "        \"pg\": [\"A\", \"A\", \"A\", \"A\"],\n",
     "    }\n",
     "\n",
     "    # 7. circular proteins\n",
@@ -1604,7 +1604,7 @@
     "        \"decoy\": [0, 0, 0, 0],\n",
     "        \"pg_master\": [\"C\", \"C\", \"C\", \"A\"],\n",
     "        # \"pg\": [\"C;B\", \"C;B\", \"C;B\", \"A;D;E\"], # parsimonious grouping\n",
-    "        \"pg\" : [\"A;C\", \"C\", \"C\", \"A\"]\n",
+    "        \"pg\": [\"A;C\", \"C\", \"C\", \"A\"],\n",
     "    }\n",
     "\n",
     "    # 8. Complex example --> depending on which of the equivalent proteins P1 and P4 is chosen first, the grouping will be different\n",
@@ -1626,7 +1626,7 @@
     "        \"decoy\": [0, 0, 0, 0],\n",
     "        \"pg_master\": [\"P2\", \"P1\", \"P2\", \"P2\"],\n",
     "        # \"pg\": [\"P2;P3;P5\", \"P1;P4\", \"P2;P3;P5\", \"P2;P3;P5\"], # parsimonious grouping\n",
-    "        \"pg\" : [\"P1;P2\", \"P1\", \"P2\", \"P2\"]\n",
+    "        \"pg\": [\"P1;P2\", \"P1\", \"P2\", \"P2\"],\n",
     "    }\n",
     "\n",
     "\n",
diff --git a/tests/unit_tests/test_grouping.py b/tests/unit_tests/test_grouping.py
index cd7c9c4b..c9c5a12f 100644
--- a/tests/unit_tests/test_grouping.py
+++ b/tests/unit_tests/test_grouping.py
@@ -20,7 +20,7 @@ def construct_test_cases():
         "proteins": ["A", "A", "B", "B"],
         "decoy": [0, 0, 0, 0],
         "pg_master": ["A", "A", "B", "B"],
-        "pg": ["A", "A", "B", "B"], # heuristic grouping
+        "pg": ["A", "A", "B", "B"],  # heuristic grouping
     }
 
     differentiable_proteins_input = {
@@ -33,7 +33,7 @@ def construct_test_cases():
         "proteins": ["A", "A;B", "A;B", "B"],
         "decoy": [0, 0, 0, 0],
         "pg_master": ["A", "A", "A", "B"],
-        "pg": ["A", "A;B", "A;B", "B"], # heuristic grouping
+        "pg": ["A", "A;B", "A;B", "B"],  # heuristic grouping
     }
 
     indistinguishable_proteins_input = {
@@ -46,7 +46,7 @@ def construct_test_cases():
         "proteins": ["A;B", "A;B", "A;B", "A;B"],
         "decoy": [0, 0, 0, 0],
         "pg_master": ["A", "A", "A", "A"],
-        "pg": ["A", "A", "A", "A"], # heuristic grouping
+        "pg": ["A", "A", "A", "A"],  # heuristic grouping
     }
 
     subset_proteins_input = {
@@ -59,33 +59,33 @@ def construct_test_cases():
         "proteins": ["A", "A;B", "A;B", "A;B"],
         "decoy": [0, 0, 0, 0],
         "pg_master": ["A", "A", "A", "A"],
-        "pg": ["A", "A", "A", "A"], # heuristic grouping
+        "pg": ["A", "A", "A", "A"],  # heuristic grouping
     }
 
     subsumable_proteins_input = {
         "precursor_idx": [1, 2, 3, 4],
         "proteins": ["A", "A;B", "B;C", "C"],
-        "decoy": [0, 0, 0, 0], # heuristic grouping
+        "decoy": [0, 0, 0, 0],  # heuristic grouping
     }
     subsumable_proteins_expected = {
         "precursor_idx": [1, 2, 3, 4],
         "proteins": ["A", "A;B", "B;C", "C"],
         "decoy": [0, 0, 0, 0],
         "pg_master": ["A", "A", "C", "C"],
-        "pg": ["A", "A", "C", "C"], # heuristic grouping
+        "pg": ["A", "A", "C", "C"],  # heuristic grouping
     }
 
     shared_only_proteins_input = {
         "precursor_idx": [1, 2, 3, 4],
         "proteins": ["A;B", "A;B;C", "A;B;C", "A;C"],
-        "decoy": [0, 0, 0, 0], # heuristic grouping
+        "decoy": [0, 0, 0, 0],  # heuristic grouping
     }
     shared_only_proteins_expected = {
         "precursor_idx": [1, 2, 3, 4],
         "proteins": ["A;B", "A;B;C", "A;B;C", "A;C"],
         "decoy": [0, 0, 0, 0],
         "pg_master": ["A", "A", "A", "A"],
-        "pg": ["A", "A", "A", "A"], # heuristic grouping
+        "pg": ["A", "A", "A", "A"],  # heuristic grouping
     }
 
     circular_proteins_input = {
@@ -98,7 +98,7 @@ def construct_test_cases():
         "proteins": ["A;B;C", "B;C;D", "C;D;E", "D;E;A"],
         "decoy": [0, 0, 0, 0],
         "pg_master": ["C", "C", "C", "A"],
-        "pg": ["A;C", "C", "C", "A"], # heuristic grouping
+        "pg": ["A;C", "C", "C", "A"],  # heuristic grouping
     }
 
     complex_example_proteins_input = {
@@ -111,15 +111,11 @@ def construct_test_cases():
         "proteins": ["P1;P2;P3;P4", "P1;P4", "P2", "P2;P5"],
         "decoy": [0, 0, 0, 0],
         "pg_master": ["P2", "P1", "P2", "P2"],
-        "pg": ["P1;P2", "P1", "P2", "P2"], # heuristic grouping
+        "pg": ["P1;P2", "P1", "P2", "P2"],  # heuristic grouping
     }
 
     test_cases = [
-        (
-            "distinct_proteins", 
-            distinct_proteins_input, 
-            distinct_proteins_expected
-        ),
+        ("distinct_proteins", distinct_proteins_input, distinct_proteins_expected),
         (
             "differentiable proteins",
             differentiable_proteins_input,
@@ -130,26 +126,14 @@ def construct_test_cases():
             indistinguishable_proteins_input,
             indistinguishable_proteins_expected,
         ),
-        (
-            "subset proteins", 
-            subset_proteins_input, 
-            subset_proteins_expected
-        ),
+        ("subset proteins", subset_proteins_input, subset_proteins_expected),
         (
             "subsumable proteins",
             subsumable_proteins_input,
             subsumable_proteins_expected,
         ),
-        (
-            "shared only", 
-            shared_only_proteins_input, 
-            shared_only_proteins_expected
-        ),
-        (
-            "circular", 
-            circular_proteins_input, 
-            circular_proteins_expected
-        ),
+        ("shared only", shared_only_proteins_input, shared_only_proteins_expected),
+        ("circular", circular_proteins_input, circular_proteins_expected),
         (
             "complex example",
             complex_example_proteins_input,

From 303f0c4cf17da27a5678fe638b133aff76852d23 Mon Sep 17 00:00:00 2001
From: mschwoerer <82171591+mschwoer@users.noreply.github.com>
Date: Fri, 24 May 2024 18:07:23 +0200
Subject: [PATCH 10/48] #140: hack to have history shown in neptune

---
 tests/e2e_tests/calc_metrics.py | 67 +++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/tests/e2e_tests/calc_metrics.py b/tests/e2e_tests/calc_metrics.py
index b5ce1b3f..72f5fedc 100644
--- a/tests/e2e_tests/calc_metrics.py
+++ b/tests/e2e_tests/calc_metrics.py
@@ -11,6 +11,9 @@
 import pandas as pd
 import neptune
 
+import matplotlib.pyplot as plt
+from datetime import datetime
+
 from tests.e2e_tests.prepare_test_data import get_test_case, OUTPUT_DIR_NAME
 
 NEPTUNE_PROJECT_NAME = os.environ.get("NEPTUNE_PROJECT_NAME")
@@ -118,6 +121,62 @@ def _calc(self):
             self._metrics[f"{self._name}/{col}_std"] = df[col].std()
 
 
+def _basic_plot(df: pd.DataFrame, test_case: str, metric: str, metric_std: str = None):
+    """Draw a basic line plot of `metric` for `test_case` over time."""
+
+    df = (
+        df[df["test_case"] == test_case]
+        .sort_index(ascending=False)
+        .reset_index(drop=True)
+    )
+
+    fig, ax = plt.subplots()
+    ax.scatter(x=df.index, y=df[metric])
+    if metric_std:
+        ax.errorbar(x=df.index, y=df[metric], yerr=df[metric_std])
+
+    ax.set_title(f"test_case: {test_case}, metric: {metric}")
+    ax.set_ylabel(metric)
+    ax.set_xlabel("test runs")
+
+    labels = []
+    for x, y, z in zip(
+        df["sys/creation_time"],
+        df["branch_name"],
+        df["short_sha"],
+    ):
+        fmt = "%Y-%m-%d %H:%M:%S.%f"
+        dt = datetime.strptime(str(x), fmt)
+        x = dt.strftime("%Y%m%d_%H:%M:%S")
+
+        labels.append(f"{x}:\n{y} [{z}]")
+
+    ax.set_xticks(df.index, labels, rotation=66)
+
+    return fig
+
+
+def _get_history_plot(test_results: dict):
+    """Get all past runs from neptune, add the current one and create plots."""
+
+    test_results = test_results.copy()
+    test_results["sys/creation_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
+    test_results_df = pd.DataFrame(test_results, index=[0])
+
+    project = neptune.init_project(project=NEPTUNE_PROJECT_NAME, mode="read-only")
+    runs_table_df = project.fetch_runs_table().to_pandas()
+
+    df = pd.concat([runs_table_df, test_results_df])
+
+    test_case_name = test_results["test_case"]
+    # TODO do this for all metrics
+    fig = _basic_plot(
+        df, test_case_name, "BasicStats/proteins_mean", "BasicStats/proteins_std"
+    )
+
+    return [("BasicStats/proteins_mean", fig)]
+
+
 if __name__ == "__main__":
     test_case_name = sys.argv[1]
     run_time_minutes = int(sys.argv[2]) / 60
@@ -167,4 +226,12 @@ def _calc(self):
             if os.path.exists(file_path):
                 neptune_run["output/" + file_name].track_files(file_path)
 
+        try:
+            history_plots = _get_history_plot(test_results)
+
+            for name, plot in history_plots:
+                neptune_run[f"plots/{name}"].upload(plot)
+        except Exception as e:
+            print(f"no plots today: {e}")
+
         neptune_run.stop()

From 5c22f12bf8845151e6ca8d187e07fd595b43cccb Mon Sep 17 00:00:00 2001
From: mschwoerer <82171591+mschwoer@users.noreply.github.com>
Date: Fri, 24 May 2024 18:12:52 +0200
Subject: [PATCH 11/48] #140: hack to have history shown in neptune

---
 tests/e2e_tests/calc_metrics.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/tests/e2e_tests/calc_metrics.py b/tests/e2e_tests/calc_metrics.py
index 72f5fedc..a723f9c7 100644
--- a/tests/e2e_tests/calc_metrics.py
+++ b/tests/e2e_tests/calc_metrics.py
@@ -156,7 +156,7 @@ def _basic_plot(df: pd.DataFrame, test_case: str, metric: str, metric_std: str =
     return fig
 
 
-def _get_history_plot(test_results: dict):
+def _get_history_plots(test_results: dict, metrics_classes: list):
     """Get all past runs from neptune, add the current one and create plots."""
 
     test_results = test_results.copy()
@@ -169,12 +169,15 @@ def _get_history_plot(test_results: dict):
     df = pd.concat([runs_table_df, test_results_df])
 
     test_case_name = test_results["test_case"]
-    # TODO do this for all metrics
-    fig = _basic_plot(
-        df, test_case_name, "BasicStats/proteins_mean", "BasicStats/proteins_std"
-    )
 
-    return [("BasicStats/proteins_mean", fig)]
+    figs = []
+    for metrics_class in [cls.__name__ for cls in metrics_classes]:
+        # TODO find a smarter way to get the metrics
+        for metric in [k for k in test_results.keys() if k.startswith(metrics_class)]:
+            fig = _basic_plot(df, test_case_name, metric)
+            figs.append((metric, fig))
+
+    return figs
 
 
 if __name__ == "__main__":
@@ -227,7 +230,7 @@ def _get_history_plot(test_results: dict):
                 neptune_run["output/" + file_name].track_files(file_path)
 
         try:
-            history_plots = _get_history_plot(test_results)
+            history_plots = _get_history_plots(test_results, metrics_classes)
 
             for name, plot in history_plots:
                 neptune_run[f"plots/{name}"].upload(plot)

From 7354250d04d27c59762e449635837d639ac0a7b0 Mon Sep 17 00:00:00 2001
From: Vincenth Brennsteiner <brennsteiner@biochem.mpg.de>
Date: Fri, 24 May 2024 18:13:50 +0200
Subject: [PATCH 12/48] add comparison of benchmark HeLa results with heuristic
 or maximum_parsimony grouping to dev_grouping_comparison.ipynb notebook

---
 nbs/debug/dev_grouping_comparison.ipynb | 111 +++++++++++++++++++++++-
 1 file changed, 108 insertions(+), 3 deletions(-)

diff --git a/nbs/debug/dev_grouping_comparison.ipynb b/nbs/debug/dev_grouping_comparison.ipynb
index fa381c51..d15e0d3d 100644
--- a/nbs/debug/dev_grouping_comparison.ipynb
+++ b/nbs/debug/dev_grouping_comparison.ipynb
@@ -30,7 +30,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1472,10 +1472,115 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "# read and parse peptide level results\n",
+    "heuristic_data_path = \"./dev_grouping_comparison_data/ad_benchmark_heuristic/\"\n",
+    "parsimony_data_path = \"./dev_grouping_comparison_data/ad_benchmark_parsimony/\"\n",
+    "\n",
+    "# load peptide and protein group level results\n",
+    "data_tables = {\n",
+    "    \"pe_heuristic\" : read_and_parse_peptide_table(heuristic_data_path, \"precursors.tsv\")[0],\n",
+    "    \"pg_heuristic\" : read_and_parse_peptide_table(heuristic_data_path, \"pg.matrix.tsv\")[0],\n",
+    "    \"pe_parsimony\" : read_and_parse_peptide_table(parsimony_data_path, \"precursors.tsv\")[0],\n",
+    "    \"pg_parsimony\" : read_and_parse_peptide_table(parsimony_data_path, \"pg.matrix.tsv\")[0],\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnUAAAIdCAYAAABbUItjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAACO/0lEQVR4nOzdeXxM1/8/8Ndkm+wji2xExBaJxBZK0CaWRFQs1ZY2GqKK2oJQrSqCEmsoilbt+6e11NJGYm8qsYSUkKLEksqIEhOCrOf3h1/u10hCQmKSyev5eMzj0Xvue868772Z07d777kjE0IIEBEREVGlpqPpBIiIiIjo9bGoIyIiItICLOqIiIiItACLOiIiIiItwKKOiIiISAuwqCMiIiLSAizqiIiIiLQAizoiIiIiLcCijoiIiEgLsKjTUmvWrIFMJoOhoSGuX79eaL2Pjw/c3d01kBlw+PBhyGQy/PLLLxr5/NK6du0aunbtCktLS8hkMowePVrTKRXp2LFjCAsLw/379wut8/HxgY+Pz0v7uHbtGmQyGdasWVPm+ZF24lhTdirqWLNp0yYsXLjwtfoo6RhEr0dP0wlQ+crKysI333yD9evXazqVSmvMmDE4fvw4Vq1aBTs7O9jb22s6pSIdO3YMU6dORXBwMKpVq6a2bunSpZpJiqoMjjWvr6KONZs2bUJiYuJrFZkcg94MnqnTcv7+/ti0aRP++usvTafyxj1+/Bhl8dPGiYmJeOutt9CzZ0+0bt0aTk5OZZDdm+Xm5gY3NzdNp0FajGNN5Rlryirf0qhoY9Djx481nUK5YFGn5caPHw8rKyt8+eWXL4x70WU3mUyGsLAwaTksLAwymQxnz57Fhx9+CIVCAUtLS4SGhiI3NxcXL16Ev78/zMzMULt2bcyZM6fIz3zy5AlCQ0NhZ2cHIyMjeHt748yZM4XiTp06he7du8PS0hKGhoZo1qwZ/ve//6nFFFwCioqKwqefforq1avD2NgYWVlZxW7zjRs38Mknn8DGxgZyuRyurq6YP38+8vPzAfzfpZt//vkHv//+O2QyGWQyGa5du1ZsnzKZDCNGjMAPP/yABg0aQC6Xw83NDVu2bCkUq1QqMWTIENSsWRMGBgZwdnbG1KlTkZubK8UUHJc5c+ZgxowZqFWrFgwNDdGiRQscOHBA7Zh88cUXAABnZ2cp18OHDwMo+tLHrVu30Lt3b5iZmUGhUKBPnz5QKpVFbldJjsGjR48wbtw4ODs7w9DQEJaWlmjRogU2b95c7P4i7cGxpmKONS/KNz8/H3PmzEHDhg0hl8thY2ODfv36ISUlRXq/j48P9u7di+vXr0t5yWQyaX12dja+/fZbqY/q1atjwIABuHPnjloez49BBX8H8+bNQ0REBJydnWFqagovLy/ExcUVu93PiomJgZeXFwwNDVGjRg1MmjQJP/30U6F9V7t2bQQEBGD79u1o1qwZDA0NMXXqVABPC+kePXrAwsIChoaGaNq0KdauXVvkPnz+eBQct4JxtmA73d3d8ccff6B169YwMjKScsvLy1N7/7Jly9CkSROYmprCzMwMDRs2xNdff12ibS+WIK20evVqAUCcPHlSfPfddwKAOHDggLTe29tbNGrUSFpOTk4WAMTq1asL9QVATJkyRVqeMmWKACBcXFzE9OnTRXR0tBg/frwAIEaMGCEaNmwoFi1aJKKjo8WAAQMEALFt2zbp/YcOHRIAhKOjo+jRo4fYvXu32LBhg6hXr54wNzcXV65ckWIPHjwoDAwMxNtvvy22bt0qIiMjRXBwcKFcC7a3Ro0aYvDgweL3338Xv/zyi8jNzS1y/6SlpYkaNWqI6tWri+XLl4vIyEgxYsQIAUAMHTpUCCGESqUSsbGxws7OTrRt21bExsaK2NhY8eTJk2L3e8F2ubm5ic2bN4tdu3YJf39/AUD8/PPPUlxqaqpwdHQUTk5O4ocffhD79+8X06dPF3K5XAQHBxc6Lo6OjqJdu3Zi27Zt4ueffxYtW7YU+vr64tixY0IIIW7evClGjhwpAIjt27dLuapUKul4e3t7S/0+evRIuLq6CoVCIRYvXiz27dsnQkJCRK1atQrt25IegyFDhghjY2MREREhDh06JPbs2SNmzZolFi9eXOz+osqPY03FHmtelO/gwYOlfRkZGSmWL18uqlevLhwdHcWdO3eEEEKcP39etG3bVtjZ2Ul5xcbGCiGEyMvLE/7+/sLExERMnTpVREdHi59++knUqFFDuLm5iUePHqn9HTw7BhX8HdSuXVv4+/uLnTt3ip07dwoPDw9hYWEh7t+/X+y2CyHEX3/9JQwNDUXjxo3Fli1bxK5du8S7774rateuLQCI5ORkKdbJyUnY29uLOnXqiFWrVolDhw6JEydOiL///luYmZmJunXrinXr1om9e/eKjz/+WAAQs2fPLrQPn+1TiP/7+zp06JDadlpZWQkHBwexaNEiaWwFIIYPHy7Fbd68WQAQI0eOFFFRUWL//v1i+fLlIiQk5IXb/TIs6rTUswNtVlaWqFOnjmjRooXIz88XQpTNQDt//ny1uKZNm0pFRYGcnBxRvXp10atXL6mt4IvQvHlzKR8hhLh27ZrQ19cXn332mdTWsGFD0axZM5GTk6P2WQEBAcLe3l7k5eWpbW+/fv1KtH+++uorAUAcP35crX3o0KFCJpOJixcvSm1OTk6ia9euJeoXgDAyMhJKpVJqy83NFQ0bNhT16tWT2oYMGSJMTU3F9evX1d4/b948AUCcP39eCPF/x8XBwUE8fvxYisvIyBCWlpaiU6dOUtvcuXOLHHiEKDygLlu2TAAQv/76q1rcoEGDCv0dlPQYuLu7i549e75kD5G24VjzYpoea4rLNykpSQAQw4YNU2s/fvy4ACC+/vprqa1r167CycmpUA4FhcmzhbQQQpw8eVIAEEuXLpXaiivqPDw81AriEydOCABi8+bNL9z+Dz/8UJiYmEjFpxBPi0w3N7ciizpdXV21fS2EEB999JGQy+Xixo0bau1dunQRxsbGUmFZ2qKuuLFVR0dHGvNHjBghqlWr9sJtfBW8/FoFGBgY4Ntvv8WpU6cKXUp4HQEBAWrLrq6ukMlk6NKli9Smp6eHevXqFTkrLjAwUO00vpOTE9q0aYNDhw4BAP755x/8/fff6Nu3LwAgNzdXer377rtITU3FxYsX1fp8//33S5T7wYMH4ebmhrfeekutPTg4GEIIHDx4sET9FKVjx46wtbWVlnV1ddGnTx/8888/0mWNPXv2oH379nBwcFDbroJ9d+TIEbU+e/XqBUNDQ2nZzMwM3bp1w9GjRwud0i+JQ4cOwczMDN27d1drDwwMVFsuzTF466238Pvvv+Orr77C4cOHtfaeFSoex5rCND3WFJdvwbYHBwertb/11ltwdXVVu72jOHv27EG1atXQrVs3tX3WtGlT2NnZqV2WLE7Xrl2hq6srLTdu3BgAijyOzzpy5Ag6dOgAa2trqU1HRwe9e/cuMr5x48Zo0KCBWtvBgwfRsWNHODo6qrUHBwfj0aNHiI2NfWn+RSlubM3Pz8fRo0cBPN3P9+/fx8cff4xff/0V//333yt91vNY1FURH330EZo3b46JEyciJyenTPq0tLRUWzYwMICxsbFa8VHQ/uTJk0Lvt7OzK7Lt7t27AIDbt28DAMaNGwd9fX2117BhwwCg0BehpLPF7t69W2Ssg4ODtP5VFbddz/Z7+/Zt7N69u9B2NWrUCEDh7Squz+zsbDx8+LDUOd69e1ftfwbFfU5pjsGiRYvw5ZdfYufOnWjfvj0sLS3Rs2dPXL58udT5UeXFsUadpseaAs/nULC+uNxKktft27dx//59GBgYFNpvSqWyRIWKlZWV2rJcLgfw8okMxY1hRbUBRW9neR2bF42tBX0GBQVh1apVuH79Ot5//33Y2NigVatWiI6OfqXPLMBHmlQRMpkMs2fPhq+vL3788cdC6wsGx+dv9n2dAedliropX6lUSl/ygn+BTZgwAb169SqyDxcXF7XlZ/81/iJWVlZITU0t1H7r1i21z34VxW1XwecW9N+4cWPMmDGjyD4KBpWX9WlgYABTU9NS52hlZYUTJ068NPfSHAMTExNMnToVU6dOxe3bt6Wzdt26dcPff/9d6hypcuJYo07TY02B5/MtWJ+amoqaNWsWyq0keVlbW8PKygqRkZFFrjczM3tpH6/KyspKKsafVdxkr6KOV0mPTXF/s8UVrS/K69njMmDAAAwYMACZmZk4evQopkyZgoCAAFy6dOmVZz7zTF0V0qlTJ/j6+mLatGmFzu7Y2trC0NAQZ8+eVWv/9ddfyy2fzZs3q02rv379Oo4dOybNkHJxcUH9+vXx119/oUWLFkW+XnXQ6NixIy5cuIDTp0+rta9btw4ymQzt27d/5e06cOCA2pc6Ly8PW7duRd26daXBMyAgAImJiahbt26R2/V8Ubd9+3a1MxAPHjzA7t278fbbb0uXLkr6L1wAaN++PR48eIBdu3aptW/atElt+VWPga2tLYKDg/Hxxx/j4sWLePTo0UtzIu3Bseb/aHqsKU6HDh0AABs2bFBrP3nyJJKSktCxY0epTS6XFzmuBAQE4O7du8jLyytynz1fCJclb29vHDx4UK2wys/Px88//1ziPjp27IiDBw9KRVyBdevWwdjYGK1btwbwdPYsgEJ/s8+PnwWKG1t1dHTwzjvvFIo3MTFBly5dMHHiRGRnZ+P8+fMl3obn8UxdFTN79mx4enoiLS1NutQHPP1XzCeffIJVq1ahbt26aNKkCU6cOFHof/JlKS0tDe+99x4GDRoElUqFKVOmwNDQEBMmTJBifvjhB3Tp0gWdO3dGcHAwatSogXv37iEpKQmnT58u1Rf4WWPGjMG6devQtWtXTJs2DU5OTti7dy+WLl2KoUOHFrr3ojSsra3RoUMHTJo0CSYmJli6dCn+/vtvtUcNTJs2DdHR0WjTpg1CQkLg4uKCJ0+e4Nq1a/jtt9+wfPlytUFZV1cXvr6+CA0NRX5+PmbPno2MjAxpWj4AeHh4AAC+++479O/fH/r6+nBxcSnyf0b9+vXDggUL0K9fP8yYMQP169fHb7/9hn379hWKLekxaNWqFQICAtC4cWNYWFggKSkJ69evh5eXF4yNjV95f1LlxLHmKU2PNcVxcXHB4MGDsXjxYujo6KBLly64du0aJk2aBEdHR4wZM0aK9fDwwPbt27Fs2TJ4enpCR0cHLVq0wEcffYSNGzfi3XffxahRo/DWW29BX18fKSkpOHToEHr06IH33nvvlbfvRSZOnIjdu3ejY8eOmDhxIoyMjLB8+XJkZmYCeHp/3ctMmTJFur958uTJsLS0xMaNG7F3717MmTMHCoUCANCyZUu4uLhg3LhxyM3NhYWFBXbs2IGYmJgi+7WyssLQoUNx48YNNGjQAL/99htWrFiBoUOHolatWgCAQYMGwcjICG3btoW9vT2USiXCw8OhUCjQsmXLV98xZT71giqEZ2ekPS8wMFAAUJuRJsTTafWfffaZsLW1FSYmJqJbt27i2rVrxc5Ie3bWkRBC9O/fX5iYmBT6vOdnvxXMGFq/fr0ICQkR1atXF3K5XLz99tvi1KlThd7/119/id69ewsbGxuhr68v7OzsRIcOHcTy5ctLtL3FuX79uggMDBRWVlZCX19fuLi4iLlz50qz3AqUdkba8OHDxdKlS0XdunWFvr6+aNiwodi4cWOh2Dt37oiQkBDh7Ows9PX1haWlpfD09BQTJ04UDx8+FEL83wyx2bNni6lTp4qaNWsKAwMD0axZM7Fv375CfU6YMEE4ODgIHR0dtVlZz888E0KIlJQU8f777wtTU1NhZmYm3n//fXHs2LEiZyaW5Bh89dVXokWLFsLCwkLI5XJRp04dMWbMGPHff/+VaN9R5cSx5uU0Oda8KN+8vDwxe/Zs0aBBA6Gvry+sra3FJ598Im7evKkWd+/ePfHBBx+IatWqCZlMJp4tHXJycsS8efNEkyZNhKGhoTA1NRUNGzYUQ4YMEZcvX5biipv9Onfu3CK37dm/g+L88ccfolWrVkIulws7OzvxxRdfiNmzZwsAao9EedF+PXfunOjWrZtQKBTCwMBANGnSpMiZ2ZcuXRJ+fn7C3NxcVK9eXYwcOVLs3bu3yNmvjRo1EocPHxYtWrQQcrlc2Nvbi6+//lptZvXatWtF+/btha2trTAwMBAODg6id+/e4uzZsy/d7heRCfGGHytNpMVkMhmGDx+OJUuWlEl/165dg7OzM+bOnYtx48aVSZ9EVPmV9VijLfz8/HDt2jVcunRJI5/v4+OD//77D4mJiRr5fF5+JSIiokonNDQUzZo1g6OjI+7du4eNGzciOjoaK1eu1HRqGsOijoiIiCqdvLw8TJ48GUqlEjKZDG5ubli/fj0++eQTTaemMbz8SkRERKQF+EgTIiIiIi3Aoo7oGdeuXYNMJsOaNWs0nUq52rRpExYuXFiovWD7582bV+45hIWFlfgBrkRUehzPqt54xqKO6Bn29vaIjY1F165dNZ1KuSpuECQi7cHxrOrhRAkqtUePHpXLw2SFEHjy5AmMjIzKvO+Sksvl0lPEiUj7cTwjbcIzdVqk4PTvmTNn0KtXL5ibm0OhUOCTTz7BnTt3CsVv3boVXl5eMDExgampKTp37owzZ86oxQQHB8PU1BTnzp2Dn58fzMzM1H4+pii//vorGjduDLlcjjp16uC7774r8tS0TCbDiBEjsHz5cri6ukIul2Pt2rUAgJiYGHTs2BFmZmYwNjZGmzZtsHfv3iK393lr1qyBTCbDtWvXpLbatWsjICAAO3bsQOPGjWFoaIg6depg0aJFau8t6nJFweecP38eH3/8MRQKBWxtbfHpp59CpVKpvf/+/fsYOHAgLC0tYWpqiq5du+Lq1auQyWQICwt74X47fPgwZDIZNm3ahC+//BL29vYwNTVFt27dcPv2bTx48ACDBw+GtbU1rK2tMWDAgEI/wSSEwNKlS9G0aVMYGRnBwsICH3zwAa5evSrF+Pj4YO/evbh+/TpkMpn0el5ERAScnZ1hamoKLy8vxMXFFYrZtWuX9IsRZmZm8PX1RWxsbKG4vXv3omnTppDL5XB2dn4jl0OocuN49hTHM45npfJajy6mCqXg6etOTk7iiy++EPv27RMRERHCxMRENGvWTGRnZ0uxM2bMEDKZTHz66adiz549Yvv27cLLy0uYmJiI8+fPS3H9+/cX+vr6onbt2iI8PFwcOHCgyF8yKPD7778LHR0d4ePjI3bs2CF+/vln0apVK1G7dm3x/J8bAFGjRg3RuHFjsWnTJnHw4EGRmJgoDh8+LPT19YWnp6fYunWr2Llzp/Dz8xMymUxs2bKl0PY+r+AJ6snJyVKbk5OTqFGjhqhVq5ZYtWqV+O2330Tfvn0LPdG84Cnnzz5RvOBzXFxcxOTJk0V0dLSIiIgQcrlcDBgwQIrLy8sT7dq1E4aGhmLWrFkiKipKTJ06VdSvX79ET0gvePq9k5OTCA4OFpGRkWL58uXC1NRUtG/fXvj6+opx48aJqKgoMXv2bKGrqytGjhyp1segQYOEvr6+GDt2rIiMjBSbNm0SDRs2FLa2tkKpVAohhDh//rxo27atsLOzE7GxsdLr2e2vXbu28Pf3Fzt37hQ7d+4UHh4ewsLCQu0p7Rs3bhQAhJ+fn9i5c6fYunWr8PT0FAYGBuKPP/6Q4vbv3y90dXVFu3btxPbt28XPP/8sWrZsKWrVqlXk8SMSguNZAY5nHM9KQ/MZUJkp+LKOGTNGrb3gj3XDhg1CCCFu3Lgh9PT0Cn2BHjx4IOzs7ETv3r2ltv79+wsAYtWqVSXKoWXLlsLR0VFkZWWp9WtlZVXkIKhQKMS9e/fU2lu3bi1sbGzEgwcPpLbc3Fzh7u4uatasKfLz89W293nFDYIymUwkJCSoxfr6+gpzc3ORmZkphHjxIDhnzhy19w4bNkwYGhpK+RT8ZMyyZcvU4sLDw0s1CHbr1k2tffTo0QKACAkJUWvv2bOnsLS0lJZjY2MFADF//ny1uJs3bwojIyMxfvx4qa1r167CycmpUA4F2+/h4SFyc3Ol9hMnTggAYvPmzUKIpwO+g4OD8PDwUPupowcPHggbGxvRpk0bqa1Vq1bCwcFBPH78WGrLyMgQlpaWFWIQpIqJ49lTHM84npUGL79qob59+6ot9+7dG3p6ejh06BAAYN++fcjNzUW/fv2Qm5srvQwNDeHt7Y3Dhw8X6vP9999/6edmZmbi1KlT6NmzJwwMDKT2glPuRenQoQMsLCzU+jh+/Dg++OADmJqaSu26uroICgpCSkoKLl68+NJcitKoUSM0adJErS0wMBAZGRk4ffr0S9/fvXt3teXGjRvjyZMnSEtLAwAcOXIEwNP9/ayPP/64VHkGBASoLbu6ugJAoZudXV1dce/ePemSxZ49e6QfS3/2uNrZ2aFJkyZFHtfidO3aFbq6utJy48aNAQDXr18HAFy8eBG3bt1CUFCQ2g9nm5qa4v3330dcXBwePXqEzMxMnDx5Er169YKhoaEUZ2ZmVuzfBNGzOJ4VjePZ4RLnUJXGM06U0EJ2dnZqy3p6erCyssLdu3cBALdv3wYAtGzZssj3P/tHDQDGxsYwNzd/6eemp6dDCAFbW9tC64pqA57Oziqqj+fbAcDBwQEApO0oref3y7NtJenTyspKbVkulwMAHj9+LPWhp6cHS0tLtbjitr04z7+/4H8oxbU/efIEpqamuH37drH7HwDq1KlT4hxKsq1A4eMHPD1O+fn50rHMz89/4b4nehGOZ0XjeMbxrCgs6rSQUqlEjRo1pOXc3FzcvXtX+sO2trYGAPzyyy9wcnJ6aX8lffaOhYUFZDKZNMg+n1NJ+rawsICOjg5SU1MLxd66dQvA/+Vf8C+lrKws6UsKAP/991+Rn1VUDgVtz3/pX4WVlRVyc3Nx7949tQGruG0va9bW1pDJZPjjjz/U9keBotpeVcH+Ku446ejowMLCAkIIyGSyF+57ohfheMbxjONZyfHyqxbauHGj2vL//vc/5ObmwsfHBwDQuXNn6Onp4cqVK2jRokWRr1dhYmKCFi1aYOfOncjOzpbaHz58iD179pS4j1atWmH79u3Sv6IAID8/Hxs2bEDNmjXRoEEDAE9ngAHA2bNn1frYvXt3kX2fP38ef/31l1rbpk2bYGZmhubNm5covxfx9vYG8HQW3rO2bNny2n2XREBAAIQQ+Pfff4s8ph4eHlKsXC5X27+l5eLigho1amDTpk0Qz/zSYGZmJrZt2ybNIDMxMcFbb72F7du348mTJ1LcgwcPij1ORM/ieMbxjONZyfFMnRbavn079PT04Ovri/Pnz2PSpElo0qSJdG9E7dq1MW3aNEycOBFXr16Fv78/LCwscPv2bZw4cQImJiaYOnXqK332tGnT0LVrV3Tu3BmjRo1CXl4e5s6dC1NTU9y7d69EfYSHh8PX1xft27fHuHHjYGBggKVLlyIxMRGbN2+W/jX87rvvwtLSEgMHDsS0adOgp6eHNWvW4ObNm0X26+DggO7duyMsLAz29vbYsGEDoqOjMXv27DJ5TpW/vz/atm2LsWPHIiMjA56enoiNjcW6desAFL4MVNbatm2LwYMHY8CAATh16hTeeecdmJiYIDU1FTExMfDw8MDQoUMBAB4eHti+fTuWLVsGT09P6OjolOp/fjo6OpgzZw769u2LgIAADBkyBFlZWZg7dy7u37+PWbNmSbHTp0+Hv78/fH19MXbsWOTl5WH27NkwMTEp8d8EVV0czziecTwrBU3MzqDyUTCrKT4+XnTr1k2YmpoKMzMz8fHHH4vbt28Xit+5c6do3769MDc3F3K5XDg5OYkPPvhA7N+/X4rp37+/MDExKVUeO3bsEB4eHsLAwEDUqlVLzJo1S4SEhAgLCwu1OABi+PDhRfbxxx9/iA4dOggTExNhZGQkWrduLXbv3l0o7sSJE6JNmzbCxMRE1KhRQ0yZMkX89NNPRc4W69q1q/jll19Eo0aNhIGBgahdu7aIiIhQ6+9Fs8Xu3LmjFlvUrLR79+6JAQMGiGrVqgljY2Ph6+sr4uLiBADx3XffvXC/FcwW+/nnn4v8nJMnT6q1F5fXqlWrRKtWraR9V7duXdGvXz9x6tQptTw/+OADUa1aNSGTyaRZWwXb/+xjEQqgiBlvO3fuFK1atRKGhobCxMREdOzYUfz555+F3rtr1y7RuHFjtb+J4mb7EQnB8Yzj2VMcz0pHJsQz5xqpUgsLC8PUqVNx584d6T6NiiAnJwdNmzZFjRo1EBUVpZEcateuDXd39xJfNilLmzZtQt++ffHnn3+iTZs2b/zziSojjmfF43hGxeHlVypzAwcOhK+vL+zt7aFUKrF8+XIkJSXhu+++03Rq5W7z5s34999/4eHhAR0dHcTFxWHu3Ll45513OAASVUIczzieVSYs6qjMPXjwAOPGjcOdO3egr6+P5s2b47fffkOnTp00nVq5MzMzw5YtW/Dtt98iMzMT9vb2CA4Oxrfffqvp1IjoFXA843hWmfDyKxEREZEW4CNNiIiIiLQAizoiIiIiLcCijoiIiEgLcKLEG5afn49bt27BzMysxD9XQ0SlJ4TAgwcP4ODgUO4PSq2KOJYRvTklHc9Y1L1ht27dgqOjo6bTIKoybt68iZo1a2o6Da3DsYzozXvZeMai7g0zMzMD8PTAmJubazgbIu2VkZEBR0dH6TtHZYtjGdGbU9LxjEXdG1ZwmcLc3JwDIdEbwEuD5YNjGdGb97LxjDeaEBEREWkBFnVEREREWkCjRd2yZcvQuHFj6fS9l5cXfv/9d2m9EAJhYWFwcHCAkZERfHx8cP78ebU+srKyMHLkSFhbW8PExATdu3dHSkqKWkx6ejqCgoKgUCigUCgQFBSE+/fvq8XcuHED3bp1g4mJCaytrRESEoLs7Gy1mHPnzsHb2xtGRkaoUaMGpk2bBv4gBxEREVUEGi3qatasiVmzZuHUqVM4deoUOnTogB49ekiF25w5cxAREYElS5bg5MmTsLOzg6+vLx48eCD1MXr0aOzYsQNbtmxBTEwMHj58iICAAOTl5UkxgYGBSEhIQGRkJCIjI5GQkICgoCBpfV5eHrp27YrMzEzExMRgy5Yt2LZtG8aOHSvFZGRkwNfXFw4ODjh58iQWL16MefPmISIi4g3sKSIiIqKXEBWMhYWF+Omnn0R+fr6ws7MTs2bNktY9efJEKBQKsXz5ciGEEPfv3xf6+vpiy5YtUsy///4rdHR0RGRkpBBCiAsXLggAIi4uToqJjY0VAMTff/8thBDit99+Ezo6OuLff/+VYjZv3izkcrlQqVRCCCGWLl0qFAqFePLkiRQTHh4uHBwcRH5+fom3T6VSCQBSv0RUPvhdK1/cv0RvTkm/bxXmnrq8vDxs2bIFmZmZ8PLyQnJyMpRKJfz8/KQYuVwOb29vHDt2DAAQHx+PnJwctRgHBwe4u7tLMbGxsVAoFGjVqpUU07p1aygUCrUYd3d3ODg4SDGdO3dGVlYW4uPjpRhvb2/I5XK1mFu3buHatWvFbldWVhYyMjLUXkRERERlTeNF3blz52Bqagq5XI7PP/8cO3bsgJubG5RKJQDA1tZWLd7W1lZap1QqYWBgAAsLixfG2NjYFPpcGxsbtZjnP8fCwgIGBgYvjClYLogpSnh4uHQvn0Kh0MjDOsPDw9GyZUuYmZnBxsYGPXv2xMWLF9Vibt++jeDgYDg4OMDY2Bj+/v64fPmyWsyPP/4IHx8fmJubQyaTFbov8VlZWVlo2rQpZDIZEhISpPa7d+/C398fDg4OkMvlcHR0xIgRI9SK3bCwMMhkskIvExOTMtkflUlFOnYAijwuy5cvL7Kff/75B2ZmZqhWrdqrbDrRK8vNzcU333wDZ2dnGBkZoU6dOpg2bRry8/OlmJJ8b0pyz/alS5fQo0cPWFtbw9zcHG3btsWhQ4feyHYSPU/jRZ2LiwsSEhIQFxeHoUOHon///rhw4YK0/vlnsgghXvqcludjioovixjx/ydJvCifCRMmQKVSSa+bN2++MPfycOTIEQwfPhxxcXGIjo5Gbm4u/Pz8kJmZCeDpdvTs2RNXr17Fr7/+ijNnzsDJyQmdOnWSYgDg0aNH8Pf3x9dff/3Szxw/frzamc8COjo66NGjB3bt2oVLly5hzZo12L9/Pz7//HMpZty4cUhNTVV7ubm54cMPPyyDvVG5VKRjV2D16tVqx6Z///6FYnJycvDxxx/j7bfffoWtJno9s2fPxvLly7FkyRIkJSVhzpw5mDt3LhYvXgyg5N+bktyz3bVrV+Tm5uLgwYOIj49H06ZNERAQ8MJ/7BOVm3K+DFxqHTt2FIMHDxZXrlwRAMTp06fV1nfv3l3069dPCCHEgQMHBABx7949tZjGjRuLyZMnCyGEWLlypVAoFIU+R6FQiFWrVgkhhJg0aZJo3Lix2vp79+4JAOLgwYNCCCGCgoJE9+7d1WJOnz4tAIirV6+WePsqwn0oaWlpAoA4cuSIEEKIixcvCgAiMTFRisnNzRWWlpZixYoVhd5/6NAhAUCkp6cX2f9vv/0mGjZsKM6fPy8AiDNnzrwwn++++07UrFmz2PUJCQkCgDh69OjLN07LafrYARA7dux4aZ7jx48Xn3zyiVi9enWR3783oSJ817RZRd6/Xbt2FZ9++qlaW69evcQnn3wihCjZ96Yk92zfuXOn0NiUkZEhAIj9+/eX2/ZR1VPp7qkrIIRAVlYWnJ2dYWdnh+joaGlddnY2jhw5gjZt2gAAPD09oa+vrxaTmpqKxMREKcbLywsqlQonTpyQYo4fPw6VSqUWk5iYiNTUVCkmKioKcrkcnp6eUszRo0fVHnMSFRUFBwcH1K5du+x3RDlSqVQAAEtLSwBPLzEAgKGhoRSjq6sLAwMDxMTElKrv27dvY9CgQVi/fj2MjY1fGn/r1i1s374d3t7excb89NNPaNCgAc/6oGIcuxEjRsDa2hotW7bE8uXL1S5pAcDBgwfx888/4/vvvy/V5xOVlXbt2uHAgQO4dOkSAOCvv/5CTEwM3n33XQAl+96U5J5tKysruLq6Yt26dcjMzERubi5++OEH2NraSv/vIHqj3kyNWbQJEyaIo0ePiuTkZHH27Fnx9ddfCx0dHREVFSWEEGLWrFlCoVCI7du3i3PnzomPP/5Y2Nvbi4yMDKmPzz//XNSsWVPs379fnD59WnTo0EE0adJE5ObmSjH+/v6icePGIjY2VsTGxgoPDw8REBAgrc/NzRXu7u6iY8eO4vTp02L//v2iZs2aYsSIEVLM/fv3ha2trfj444/FuXPnxPbt24W5ubmYN29eqbZZ0/+6zc/PF926dRPt2rWT2rKzs4WTk5P48MMPxb1790RWVpYIDw8XAISfn1+hPoo725Ofny/8/f3F9OnThRBCJCcnF3um7qOPPhJGRkYCgOjWrZt4/Phxkfk+efJEWFhYiNmzZ7/6RmuJinDspk+fLo4dOybOnDkj5s2bJ4yNjaX3CCHEf//9JxwdHaUziTxTp70q8v7Nz88XX331lZDJZEJPT0/IZDIxc+ZMaX1JvjcbN24UBgYGhfr29fUVgwcPlpZTUlKEp6enkMlkQldXVzg4OLz06gRRaZX0+6bRou7TTz8VTk5OwsDAQFSvXl107NhRKuiEePrFnDJlirCzsxNyuVy888474ty5c2p9PH78WIwYMUJYWloKIyMjERAQIG7cuKEWc/fuXdG3b19hZmYmzMzMRN++fQv9T+369euia9euwsjISFhaWooRI0aoPb5ECCHOnj0r3n77bSGXy4WdnZ0ICwsr1eNMhND8QDhs2DDh5OQkbt68qdZ+6tQp0aRJEwFA6Orqis6dO4suXbqILl26FOqjuMLgu+++E23atJEK6hcVdampqSIpKUns3LlTuLm5iaFDhxaZ76ZNm4Senp5ITU19tQ3WIhXl2D1r3rx5wtzcXFp+7733xJdffikts6jTXhV5/27evFnUrFlTbN68WZw9e1asW7dOWFpaijVr1kgxL/veFFfUderUSQwZMkQI8fT/Ud27dxddunQRMTExIj4+XgwdOlTUqFFD3Lp1681sLFUJlaKoq4o0ORCOGDFC1KxZ84X3AN6/f1+kpaUJIYR46623xLBhwwrFFFcY9OjRQ+jo6AhdXV3pVTBgFtwHWZQ//vhDAChyEOzQoYPo2bNnCbdQe1XUYxcTEyMACKVSKYR4eq/qs33o6OhI/axcufIVtvzVVeSiQxtU5P1bs2ZNsWTJErW26dOnCxcXl0KxxX1vSnLP9v79+4WOjk6hfVCvXj0RHh5eZttDVNLvm145X92lCkAIgZEjR2LHjh04fPgwnJ2di41VKBQAgMuXL+PUqVOYPn16iT9n0aJF+Pbbb6XlW7duoXPnzti6davacwKLyg/4v/tcCiQnJ+PQoUPYtWtXiXPQNhX92J05cwaGhobSY0tiY2PVZgb++uuvmD17No4dO4YaNWqUOB+i1/Ho0SPo6KjfMq6rq1vo/k+g+O/Ns/ds9+7dG8D/3bM9Z84c6XMAFPosHR2dIj+LqLyxqKsChg8fjk2bNuHXX3+FmZmZNNVeoVDAyMgIAPDzzz+jevXqqFWrFs6dO4dRo0ahZ8+eajcJK5VKKJVK/PPPPwCePmPQzMwMtWrVgqWlJWrVqqX2uaampgCAunXrombNmgCA3377Dbdv30bLli1hamqKCxcuYPz48Wjbtm2hCSerVq2Cvb09unTpUi77pTKoSMdu9+7dUCqV8PLygpGREQ4dOoSJEydi8ODB0kO5XV1d1fo5deoUdHR04O7uXg57h6ho3bp1w4wZM1CrVi00atQIZ86cQUREBD799FMp5mXfG4VCgYEDB2Ls2LGwsrKCpaUlxo0bBw8PD3Tq1AnA0wl0FhYW6N+/PyZPngwjIyOsWLECycnJ6Nq1q0a2naq4N3HakP6PJi5ZACjytXr1aimm4LEi+vr6olatWuKbb74RWVlZav1MmTLlpf08q6j7sg4ePCi8vLyEQqEQhoaGon79+uLLL78sdDkwLy9P1KxZU3z99ddltBcqp4p07H7//XfRtGlTYWpqKoyNjYW7u7tYuHChyMnJKTZ/3lOnvSry/s3IyBCjRo0StWrVEoaGhqJOnTpi4sSJat+LknxvSnLP9smTJ4Wfn5+wtLQUZmZmonXr1uK33357I9tJVUdJv28yIf7/tS96IzIyMqBQKKBSqWBubq7pdIi0Fr9r5Yv7l+jNKen3jZdfK7DHuUA2b8soNwY6gFE5fAN43MpfeR07Kh/8TpQ/ficIYFFXYT3OBQ7efnqNjMqHDEAH27IdCHnc3ozyOHZUPvideDP4nSCARV2FlZ3/dBA8cxd4kKvpbLSPmR7QzOrpfjYqw3553MpfeR07Kh/8TpQ/fieoAIu6Cu5BLpCRo+ksqLR43IjU8TtBVP4q3G+/EhEREVHpsagjIiIi0gIs6oiIiIi0AIs6IiIiIi3Aoo6IiIhIC7CoIyIiItICLOqIiIiItACLOiIiIiItwKKOiIiISAuwqCMiIiLSAizqiIiIiLQAizoiIiIiLcCijoiIiEgLsKgjIiIi0gIs6oiIiIi0AIs6IiIiIi3Aoo6IiIhIC7CoIyIiItICLOqIiIiItACLOiIiIiItwKKOiIiISAuwqCMiIiLSAizqiIiIiLQAizoiIiIiLcCijoiIiEgLsKgjIiIi0gIs6oiIiIi0AIs6IiIiIi3Aoo6IiIhIC7CoIyIiItICLOqIiIiItACLOiIiIqoQateuDZlMVug1fPhwAMDt27cRHBwMBwcHGBsbw9/fH5cvX5bef+/ePYwcORIuLi4wNjZGrVq1EBISApVKpfY5p0+fhq+vL6pVqwYrKysMHjwYDx8+fKPbWh5Y1BEREVGFcPLkSaSmpkqv6OhoAMCHH34IIQR69uyJq1ev4tdff8WZM2fg5OSETp06ITMzEwBw69Yt3Lp1C/PmzcO5c+ewZs0aREZGYuDAgdJn3Lp1C506dUK9evVw/PhxREZG4vz58wgODtbEJpcpPU0nQERERAQA1atXV1ueNWsW6tatC29vb1y+fBlxcXFITExEo0aNAABLly6FjY0NNm/ejM8++wzu7u7Ytm2b9P66detixowZ+OSTT5Cbmws9PT3s2bMH+vr6+P7776Gj8/Tc1vfff49mzZrhn3/+Qb169d7cBpcxnqkjIiKiCic7OxsbNmzAp59+CplMhqysLACAoaGhFKOrqwsDAwPExMQU249KpYK5uTn09J6ex8rKyoKBgYFU0AGAkZERALywn8qARR0RERFVODt37sT9+/ely6INGzaEk5MTJkyYgPT0dGRnZ2PWrFlQKpVITU0tso+7d+9i+vTpGDJkiNTWoUMHKJVKzJ07F9nZ2UhPT8fXX38NAMX2U1mwqCMiqiByc3PxzTffwNnZGUZGRqhTpw6mTZuG/Px8KUYIgbCwMDg4OMDIyAg+Pj44f/68Wj9ZWVkYOXIkrK2tYWJigu7duyMlJUUtJj09HUFBQVAoFFAoFAgKCsL9+/ffxGYSlcjKlSvRpUsXODg4AAD09fWxbds2XLp0CZaWljA2Nsbhw4fRpUsX6OrqFnp/RkYGunbtCjc3N0yZMkVqb9SoEdauXYv58+fD2NgYdnZ2qFOnDmxtbYvspzJhUUdEVEHMnj0by5cvx5IlS5CUlIQ5c+Zg7ty5WLx4sRQzZ84cREREYMmSJTh58iTs7Ozg6+uLBw8eSDGjR4/Gjh07sGXLFsTExODhw4cICAhAXl6eFBMYGIiEhARERkYiMjISCQkJCAoKeqPbS1Sc69evY//+/fjss8/U2j09PZGQkID79+8jNTUVkZGRuHv3LpydndXiHjx4AH9/f5iammLHjh3Q19dXWx8YGAilUol///0Xd+/eRVhYGO7cuVOon8qGEyWIiCqI2NhY9OjRA127dgXw9PEOmzdvxqlTpwA8PUu3cOFCTJw4Eb169QIArF27Fra2tti0aROGDBkClUqFlStXYv369ejUqRMAYMOGDXB0dMT+/fvRuXNnJCUlITIyEnFxcWjVqhUAYMWKFfDy8sLFixfh4uKiga0n+j+rV6+GjY2N9F14nkKhAABcvnwZp06dwvTp06V1GRkZ6Ny5M+RyOXbt2qV2D97zbG1tAQCrVq2CoaEhfH19y3Ar3jyeqSMiqiDatWuHAwcO4NKlSwCAv/76CzExMXj33XcBAMnJyVAqlfDz85PeI5fL4e3tjWPHjgEA4uPjkZOToxbj4OAAd3d3KSY2NhYKhUIq6ACgdevWUCgUUszzsrKykJGRofYiKg/5+flYvXo1+vfvL01uKPDzzz/j8OHD0mNNfH190bNnT+nv/cGDB/Dz80NmZiZWrlyJjIwMKJVKKJVKtTPVS5YswenTp3Hp0iV8//33GDFiBMLDw1GtWrU3ualljmfqiIgqiC+//BIqlQoNGzaErq4u8vLyMGPGDHz88ccAAKVSCeD/zi4UsLW1xfXr16UYAwMDWFhYFIopeL9SqYSNjU2hz7exsZFinhceHo6pU6e+3gYSlcD+/ftx48YNfPrpp4XWpaamIjQ0FLdv34a9vT369euHSZMmSevj4+Nx/PhxACj0aJLk5GTUrl0bAHDixAlMmTIFDx8+RMOGDfHDDz9oxe0HLOqIiCqIrVu3YsOGDdi0aRMaNWqEhIQEjB49Gg4ODujfv78UJ5PJ1N4nhCjU9rznY4qKf1E/EyZMQGhoqLSckZEBR0fHEm0XUWn4+flBCFHkupCQEISEhBT7Xh8fn2Lf+6x169a9cn4VGYs6IqIK4osvvsBXX32Fjz76CADg4eGB69evIzw8HP3794ednR2Ap2fa7O3tpfelpaVJZ+/s7OykxzQ8e7YuLS0Nbdq0kWJu375d6PPv3LlT6CxgAblcDrlcXjYbSpXG41wgO//lcfTqDHQAozKqxljUERFVEI8ePVJ7ICrw9OGqBY80cXZ2hp2dHaKjo9GsWTMATx/QeuTIEcyePRvA09mB+vr6iI6ORu/evQE8vWSVmJiIOXPmAAC8vLygUqlw4sQJvPXWWwCA48ePQ6VSSYUf0eNc4OBt4OXnveh1yAB0sC2bwo5FHRFRBdGtWzfMmDEDtWrVQqNGjXDmzBlERERI9xbJZDKMHj0aM2fORP369VG/fn3MnDkTxsbGCAwMBPB0VuDAgQMxduxYWFlZwdLSEuPGjYOHh4c0G9bV1RX+/v4YNGgQfvjhBwDA4MGDERAQwJmvJMnOf1rQnbkLPMjVdDbayUwPaGb1dF8blUF/LOqIiCqIxYsXY9KkSRg2bBjS0tLg4OCAIUOGYPLkyVLM+PHj8fjxYwwbNgzp6elo1aoVoqKiYGZmJsUsWLAAenp66N27Nx4/foyOHTtizZo1ag9W3bhxI0JCQqRZg927d8eSJUve3MZSpfEgF8jI0XQWVBIyUZI7CqnMZGRkQKFQSL9FVxxVNvDHHeDobX6ZyoO5PvCOLfB2dUBhUHb98riVv5Ieu5J+1+jVcCyrODieVV5lPZ7xOXVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFtBoURceHo6WLVvCzMwMNjY26NmzJy5evKgWExwcDJlMpvZq3bq1WkxWVhZGjhwJa2trmJiYoHv37khJSVGLSU9PR1BQEBQKBRQKBYKCgnD//n21mBs3bqBbt24wMTGBtbU1QkJCkJ2drRZz7tw5eHt7w8jICDVq1MC0adNK9DtzREREROVJo0XdkSNHMHz4cMTFxSE6Ohq5ubnw8/NDZmamWpy/vz9SU1Ol12+//aa2fvTo0dixYwe2bNmCmJgYPHz4EAEBAcjLy5NiAgMDkZCQgMjISERGRiIhIQFBQUHS+ry8PHTt2hWZmZmIiYnBli1bsG3bNowdO1aKycjIgK+vLxwcHHDy5EksXrwY8+bNQ0RERDntISIiIqKS0egvSkRGRqotr169GjY2NoiPj8c777wjtcvlcumHrJ+nUqmwcuVKrF+/XvoJnA0bNsDR0RH79+9H586dkZSUhMjISMTFxaFVq1YAgBUrVsDLywsXL16Ei4sLoqKicOHCBdy8eRMODg4AgPnz5yM4OBgzZsyAubk5Nm7ciCdPnmDNmjWQy+Vwd3fHpUuXEBERgdDQUMhksvLYTUREREQvVaHuqVOpVAAAS0tLtfbDhw/DxsYGDRo0wKBBg5CWliati4+PR05OjvRTNwDg4OAAd3d3HDt2DAAQGxsLhUIhFXQA0Lp1aygUCrUYd3d3qaADgM6dOyMrKwvx8fFSjLe3N+RyuVrMrVu3cO3atSK3KSsrCxkZGWovIiIiorJWYYo6IQRCQ0PRrl07uLu7S+1dunTBxo0bcfDgQcyfPx8nT55Ehw4dkJWVBQBQKpUwMDCAhYWFWn+2trZQKpVSjI2NTaHPtLGxUYuxtbVVW29hYQEDA4MXxhQsF8Q8Lzw8XLqPT6FQwNHRscT7hIiIiKikNHr59VkjRozA2bNnERMTo9bep08f6b/d3d3RokULODk5Ye/evejVq1ex/Qkh1C6HFnVptCxiCiZJFHfpdcKECQgNDZWWMzIyWNgRERFRmasQZ+pGjhyJXbt24dChQ6hZs+YLY+3t7eHk5ITLly8DAOzs7JCdnY309HS1uLS0NOksmp2dHW7fvl2orzt37qjFPH+2LT09HTk5OS+MKbgU/PwZvAJyuRzm5uZqLyIiIqKyptGiTgiBESNGYPv27Th48CCcnZ1f+p67d+/i5s2bsLe3BwB4enpCX18f0dHRUkxqaioSExPRpk0bAICXlxdUKhVOnDghxRw/fhwqlUotJjExEampqVJMVFQU5HI5PD09pZijR4+qPeYkKioKDg4OqF279qvvCCIiIqLXpNGibvjw4diwYQM2bdoEMzMzKJVKKJVKPH78GADw8OFDjBs3DrGxsbh27RoOHz6Mbt26wdraGu+99x4AQKFQYODAgRg7diwOHDiAM2fO4JNPPoGHh4c0G9bV1RX+/v4YNGgQ4uLiEBcXh0GDBiEgIAAuLi4AAD8/P7i5uSEoKAhnzpzBgQMHMG7cOAwaNEg6uxYYGAi5XI7g4GAkJiZix44dmDlzJme+EhERkcZptKhbtmwZVCoVfHx8YG9vL722bt0KANDV1cW5c+fQo0cPNGjQAP3790eDBg0QGxsLMzMzqZ8FCxagZ8+e6N27N9q2bQtjY2Ps3r0burq6UszGjRvh4eEBPz8/+Pn5oXHjxli/fr20XldXF3v37oWhoSHatm2L3r17o2fPnpg3b54Uo1AoEB0djZSUFLRo0QLDhg1DaGio2j1zRERERJqg0YkSL/slBiMjI+zbt++l/RgaGmLx4sVYvHhxsTGWlpbYsGHDC/upVasW9uzZ88IYDw8PHD169KU5EREREb1JFWKiBBERERG9HhZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVERK8hMjISMTEx0vL333+Ppk2bIjAwEOnp6RrMjIiqGhZ1RESv4YsvvkBGRgYA4Ny5cxg7dizeffddXL16FaGhoRrOjoiqEj1NJ0BEVJklJyfDzc0NALBt2zYEBARg5syZOH36NN59910NZ0dEVQnP1BERvQYDAwM8evQIALB//374+fkBACwtLaUzeEREbwLP1BERvYZ27dohNDQUbdu2xYkTJ7B161YAwKVLl1CzZk0NZ0dEVQnP1BERvYYlS5ZAT08Pv/zyC5YtW4YaNWoAAH7//Xf4+/trODsiqkp4po6I6DXUqlULe/bsKdS+YMECDWRDRFUZizoioteUl5eHHTt2ICkpCTKZDA0bNkTPnj2hp8chlojeHI44RESvITExEd27d8ft27fh4uIC4On9dNWrV8euXbvg4eGh4QyJqKrgPXVERK/hs88+g7u7O1JSUnD69GmcPn0aN2/eROPGjTF48GBNp0dEVQjP1BERvYa//voLp06dgoWFhdRmYWGBGTNmoGXLlhrMjIiqGp6pIyJ6DS4uLrh9+3ah9rS0NNSrV08DGRFRVcWijojoNcycORMhISH45ZdfkJKSgpSUFPzyyy8YPXo0Zs+ejYyMDOlFRFSeePmViOg1BAQEAAB69+4NmUwGABBCAAC6desmLctkMuTl5WkmSSKqEljUERG9hkOHDmk6BSIiACzqiIhei7e3t6ZTICICoOF76sLDw9GyZUuYmZnBxsYGPXv2xMWLF9VihBAICwuDg4MDjIyM4OPjg/Pnz6vFZGVlYeTIkbC2toaJiQm6d++OlJQUtZj09HQEBQVBoVBAoVAgKCgI9+/fV4u5ceMGunXrBhMTE1hbWyMkJATZ2dlqMefOnYO3tzeMjIxQo0YNTJs2TbrUQkRVz9GjR1/4IiJ6UzR6pu7IkSMYPnw4WrZsidzcXEycOBF+fn64cOECTExMAABz5sxBREQE1qxZgwYNGuDbb7+Fr68vLl68CDMzMwDA6NGjsXv3bmzZsgVWVlYYO3YsAgICEB8fD11dXQBAYGAgUlJSEBkZCQAYPHgwgoKCsHv3bgBPnwjftWtXVK9eHTExMbh79y769+8PIQQWL14MAMjIyICvry/at2+PkydP4tKlSwgODoaJiQnGjh37pncfEVUAPj4+hdoK7q0DwPvoiOiN0WhRV1BgFVi9ejVsbGwQHx+Pd955B0IILFy4EBMnTkSvXr0AAGvXroWtrS02bdqEIUOGQKVSYeXKlVi/fj06deoEANiwYQMcHR2xf/9+dO7cGUlJSYiMjERcXBxatWoFAFixYgW8vLxw8eJFuLi4ICoqChcuXMDNmzfh4OAAAJg/fz6Cg4MxY8YMmJubY+PGjXjy5AnWrFkDuVwOd3d3XLp0CREREQgNDVUbyImoakhPT1dbzsnJwZkzZzBp0iTMmDFDQ1kRUVVUoR5polKpAACWlpYAgOTkZCiVSvj5+Ukxcrkc3t7eOHbsGAAgPj4eOTk5ajEODg5wd3eXYmJjY6FQKKSCDgBat24NhUKhFuPu7i4VdADQuXNnZGVlIT4+Xorx9vaGXC5Xi7l16xauXbtW5DZlZWWpPdKAjzUg0i4Ft3QUvKytreHr64s5c+Zg/Pjxmk6PiKqQUhd1jx8/xqNHj6Tl69evY+HChYiKinqtRIQQCA0NRbt27eDu7g4AUCqVAABbW1u1WFtbW2mdUqmEgYGB2tPci4qxsbEp9Jk2NjZqMc9/joWFBQwMDF4YU7BcEPO88PBwtQHf0dHxJXuCiLRB9erVC90jTERUnkpd1PXo0QPr1q0DANy/fx+tWrXC/Pnz0aNHDyxbtuyVExkxYgTOnj2LzZs3F1r3/GXNgmc+vcjzMUXFl0VMwSSJ4vKZMGECVCqV9Lp58+YL8yaiyuXs2bNqr7/++guRkZEYOnQomjRpUur+/v33X3zyySewsrKCsbExmjZtKl0tAN7s5DEiqlxKXdSdPn0ab7/9NgDgl19+ga2tLa5fv45169Zh0aJFr5TEyJEjsWvXLhw6dAg1a9aU2u3s7AAUPguWlpYmnSGzs7NDdnZ2oftano8p6md87ty5oxbz/Oekp6cjJyfnhTFpaWkACp9NLCCXy2Fubq72IiLt0bRpUzRr1gxNmzaV/vvdd99FdnY2Vq5cWaq+0tPT0bZtW+jr6+P333/HhQsXMH/+fFSrVk2KKZg8tmTJEpw8eRJ2dnbw9fXFgwcPpJjRo0djx44d2LJlC2JiYvDw4UMEBASoTdoIDAxEQkICIiMjERkZiYSEBAQFBb32/iAizSl1Uffo0SNp1mlUVBR69eoFHR0dtG7dGtevXy9VX0IIjBgxAtu3b8fBgwfh7Oystt7Z2Rl2dnaIjo6W2rKzs3HkyBG0adMGAODp6Ql9fX21mNTUVCQmJkoxXl5eUKlUOHHihBRz/PhxqFQqtZjExESkpqZKMVFRUZDL5fD09JRijh49qvaYk6ioKDg4OKB27dql2nYi0g7Jycm4evUqkpOTkZycjOvXr+PRo0c4duwYGjZsWKq+Zs+eDUdHR6xevRpvvfUWateujY4dO6Ju3boAUGjymLu7O9auXYtHjx5h06ZNACBNHps/fz46deqEZs2aYcOGDTh37hz2798PANLksZ9++gleXl7w8vLCihUrsGfPHl4yJqrESl3U1atXDzt37sTNmzexb98+aYJCWlpaqc9CDR8+HBs2bMCmTZtgZmYGpVIJpVKJx48fA3h6SXP06NGYOXMmduzYgcTERAQHB8PY2BiBgYEAnt6kPHDgQIwdOxYHDhzAmTNn8Mknn8DDw0OaDevq6gp/f38MGjQIcXFxiIuLw6BBgxAQEAAXFxcAgJ+fH9zc3BAUFIQzZ87gwIEDGDduHAYNGiRtV2BgIORyOYKDg5GYmIgdO3Zg5syZnPlKVIU5OTmpvRwdHWFoaPhKfe3atQstWrTAhx9+CBsbGzRr1gwrVqyQ1r/JyWPP46Qvooqv1EXd5MmTMW7cONSuXRtvvfUWvLy8ADw9Y9WsWbNS9bVs2TKoVCr4+PjA3t5eem3dulWKGT9+PEaPHo1hw4ahRYsW+PfffxEVFSWdLQSABQsWoGfPnujduzfatm0LY2Nj7N69W3pGHQBs3LgRHh4e8PPzg5+fHxo3boz169dL63V1dbF3714YGhqibdu26N27N3r27Il58+ZJMQqFAtHR0UhJSUGLFi0wbNgwhIaGIjQ0tLS7kYi0yJEjR9CtWzfUq1cP9evXR/fu3fHHH3+Uup+rV69i2bJlqF+/Pvbt24fPP/8cISEh0n3Mb3Ly2PM46Yuo4iv1c+o++OADtGvXDqmpqWo3AXfs2BHvvfdeqfoqyS8xyGQyhIWFISwsrNgYQ0NDLF68WHpIcFEsLS2xYcOGF35WrVq1sGfPnhfGeHh48CnxRCTZsGEDBgwYgF69eiEkJARCCBw7dgwdO3bEmjVrpKsKJZGfn48WLVpg5syZAIBmzZrh/PnzWLZsGfr16yfFvanJY8+aMGGC2j9gMzIyWNgRVTCv9Jw6Ozs7mJmZITo6WrpU2rJly1LfP0JEVNnNmDEDc+bMwdatWxESEoJRo0Zh69atmDVrFqZPn16qvuzt7eHm5qbW5urqihs3bgB4s5PHnsdJX0QVX6mLurt376Jjx45o0KAB3n33XWliwWeffcafyiKiKufq1avo1q1bofbu3bsjOTm5VH21bdu20ESFS5cuwcnJCcCbnTxGRJVPqYu6MWPGQF9fHzdu3ICxsbHU3qdPn0I/+0VEpO0cHR1x4MCBQu0HDhwo9eXJMWPGIC4uDjNnzsQ///yDTZs24ccff8Tw4cMBvNnJY0RU+ZT6nrqoqCjs27dP7XlyAFC/fv1SP9KEiKiyGzt2LEJCQpCQkIA2bdpAJpMhJiYGa9aswXfffVeqvlq2bIkdO3ZgwoQJmDZtGpydnbFw4UL07dtXihk/fjweP36MYcOGIT09Ha1atSpy8pienh569+6Nx48fS/f3PT95LCQkRJol2717dyxZsuQ19wYRaVKpi7rMzEy1M3QF/vvvP7XfRCUiqgqGDh0KOzs7zJ8/H//73/8APD0TtnXrVvTo0aPU/QUEBCAgIKDY9W9y8hgRVS6lLureeecdrFu3TroBWCaTIT8/H3PnzkX79u3LPEEioooqNzcXM2bMwKeffoqYmBhNp0NEVVypi7q5c+fCx8cHp06dQnZ2NsaPH4/z58/j3r17+PPPP8sjRyKiCklPTw9z585F//79NZ0KEVHpJ0q4ubnh7NmzeOutt+Dr64vMzEz06tULZ86ckX7KhoioqujUqRMOHz6s6TSIiEp/pg54+oyjqVOnlnUuRESVTpcuXTBhwgQkJibC09MTJiYmauu7d++uocyIqKopUVF39uzZEnfYuHHjV06GiKiyGTp0KAAgIiKi0DqZTIa8vLw3nRIRVVElKuqaNm0KmUxW6CdkCn7m69k2DmBEVJXk5+drOgUiIgAlvKcuOTkZV69eRXJyMrZt2wZnZ2csXboUCQkJSEhIwNKlS1G3bl1s27atvPMlIiIioiKU6ExdwU/UAMCHH36IRYsW4d1335XaGjduDEdHR0yaNAk9e/Ys8ySJiCqqRYsWFdkuk8lgaGiIevXq4Z133lF78C8RUXko9USJc+fOwdnZuVC7s7MzLly4UCZJERFVFgsWLMCdO3fw6NEjWFhYQAiB+/fvw9jYGKampkhLS0OdOnVw6NChUv9sGBFRaZT6kSaurq749ttv8eTJE6ktKysL3377LVxdXcs0OSKiim7mzJlo2bIlLl++jLt37+LevXu4dOkSWrVqhe+++w43btyAnZ0dxowZo+lUiUjLlfpM3fLly9GtWzc4OjqiSZMmAIC//voLMpkMe/bsKfMEiYgqsm+++Qbbtm1Te05nvXr1MG/ePLz//vu4evUq5syZg/fff1+DWRJRVVDqou6tt95CcnIyNmzYgL///htCCPTp0weBgYGFns9ERKTtUlNTkZubW6g9NzcXSqUSAODg4IAHDx686dSIqIp5pYcPGxsbY/DgwWWdCxFRpdO+fXsMGTIEP/30E5o1awYAOHPmDIYOHYoOHToAKP5eZCKislSiom7Xrl3o0qUL9PX1sWvXrhfG8unpRFSVrFy5EkFBQfD09IS+vj6Ap2fpOnbsiJUrVwIATE1NMX/+fE2mSURVQImKup49e0KpVMLGxuaFjyzh09OJqKqxs7NDdHQ0Ll68iIsXL0IIgYYNG8LFxUWKad++vQYzJKKqokRF3bNPTOfT04mICnNxcVEr5IiI3rRSP9Jk3bp1yMrKKtSenZ2NdevWlUlSRERERFQ6pS7qBgwYAJVKVaj9wYMHGDBgQJkkRURERESlU+qiTggBmUxWqD0lJQUKhaJMkiIiIiKi0inxI02aNWsGmUwGmUyGjh07Qk/v/96al5eH5ORk+Pv7l0uSRERERPRiJS7qCma9JiQkoHPnzjA1NZXWGRgYoHbt2nxiOhFVSffv38eJEyeQlpZWaDJZv379NJQVEVU1JS7qpkyZAgCoXbs2+vTpA0NDw3JLioiosti9ezf69u2LzMxMmJmZqd2eIpPJWNQR0RtT6l+U6N+/PwDg1KlTSEpKgkwmg6urKzw9Pcs8OSKiim7s2LH49NNPMXPmTBgbG2s6HSKqwkpd1P3777/46KOP8Oeff6JatWoAnl56aNOmDTZv3gxHR8eyzpGIqML6999/ERISwoKOiDTulR5pkpOTg6SkJNy7dw/37t1DUlIShBAYOHBgeeRIRFRhde7cGadOndJ0GkREpT9T98cff+DYsWNqT053cXHB4sWL0bZt2zJNjoioouvatSu++OILXLhwAR4eHtLvvxbg72ET0ZtS6qKuVq1ayMnJKdSem5uLGjVqlElSRESVxaBBgwAA06ZNK7SOv4dNRG9SqS+/zpkzByNHjsSpU6cghADwdNLEqFGjMG/evDJPkIioIsvPzy/2xYKOiN6kUp+pCw4OxqNHj9CqVSvpAcS5ubnQ09PDp59+ik8//VSKvXfvXtllSkRERETFKnVRt3DhwnJIg4io8li0aBEGDx4MQ0NDLFq06IWxISEhbygrIqrqXvk5dUREVdWCBQvQt29fGBoaYsGCBcXGyWQyFnVE9MaUuqgDgCtXrmD16tW4cuUKvvvuO9jY2CAyMhKOjo5o1KhRWedIRFShJCcnF/nfRESaVOqJEkeOHIGHhweOHz+O7du34+HDhwCAs2fPSj8lRkRU1WRnZ+PixYvIzc3VdCpEVEWVuqj76quv8O233yI6OhoGBgZSe/v27REbG1umyRERVXSPHj3CwIEDYWxsjEaNGuHGjRsAnt5LN2vWLA1nR0RVSamLunPnzuG9994r1F69enXcvXu3TJIiIqosJkyYgL/++guHDx+GoaGh1N6pUyds3bpVg5kRUVVT6qKuWrVqSE1NLdR+5swZPnyYiKqcnTt3YsmSJWjXrh1kMpnU7ubmhitXrmgwMyKqakpd1AUGBuLLL7+EUqmETCZDfn4+/vzzT4wbNw79+vUrjxyJiCqsO3fuwMbGplB7ZmamWpFHRFTeSl3UzZgxA7Vq1UKNGjXw8OFDuLm54Z133kGbNm3wzTfflEeOREQVVsuWLbF3715puaCQW7FiBby8vDSVFhFVQaV+pIm+vj42btyIadOm4cyZM8jPz0ezZs1Qv3798siPiKhCCw8Ph7+/Py5cuIDc3Fx89913OH/+PGJjY3HkyBFNp0dEVcgrPacOAOrWrYs6deoAAC8xEFGV1aZNG/z555+YN28e6tati6ioKDRv3hyxsbHw8PDQdHpEVIW8UlG3cuVKLFiwAJcvXwYA1K9fH6NHj8Znn31WpskREVUGHh4eWLt2rabTIKIqrtRF3aRJk7BgwQKMHDlSul8kNjYWY8aMwbVr1/Dtt9+WeZJERBWVrq4uUlNTC02WuHv3LmxsbJCXl6ehzIioqil1Ubds2TKsWLECH3/8sdTWvXt3NG7cGCNHjmRRR0RVihCiyPasrCy1B7QTEZW3Uhd1eXl5aNGiRaF2T09P/jwOEVUZixYtAvD0nuKffvoJpqam0rq8vDwcPXoUDRs21FR6RFQFlbqo++STT7Bs2TJERESotf/444/o27dvmSVGRFSRLViwAMDTM3XLly+Hrq6utM7AwAC1a9fG8uXLNZUeEVVBrzxRIioqCq1btwYAxMXF4ebNm+jXrx9CQ0OluOcLPyIibZGcnAzg6e9eb9++HRYWFhrOiIiqulIXdYmJiWjevDkASD+BU716dVSvXh2JiYlSHB9zQkRVwaFDh6T/Lri/juMfEWlCqYu6ZwcwIiIC1q1bh7lz50qPeWrQoAG++OILBAUFaTgzIqpKXvnhw0RE9PQ2k0mTJmHEiBFo27YthBD4888/8fnnn+O///7DmDFjNJ0iEVURLOqIiF7D4sWLsWzZMvTr109q69GjBxo1aoSwsDAWdUT0xuhoOgEiososNTUVbdq0KdTepk0bpKamaiAjIqqqWNQREb2GevXq4X//+1+h9q1bt6J+/foayIiIqqoSXX5t3rw5Dhw4AAsLC0ybNg3jxo2DsbFxeedGRFThTZ06FX369MHRo0fRtm1byGQyxMTE4MCBA0UWe0RE5aVEZ+qSkpKQmZkJ4OkA9vDhw3JNioiosnj//fdx4sQJWFtbY+fOndi+fTusra1x4sQJvPfee5pOj4iqkBKdqWvatCkGDBiAdu3aQQiBefPmqf0kzrMmT55cpgkSEVVUOTk5GDx4MCZNmoQNGzZoOh0iquJKVNStWbMGU6ZMwZ49eyCTyfD7779DT6/wW2UyGYs6Iqoy9PX1sWPHDkyaNEnTqRARlezyq4uLC7Zs2YKTJ09CCIEDBw7gzJkzhV6nT58u1YcfPXoU3bp1g4ODA2QyGXbu3Km2Pjg4GDKZTO1V8NNkBbKysjBy5EhYW1vDxMQE3bt3R0pKilpMeno6goKCoFAooFAoEBQUhPv376vF3LhxA926dYOJiQmsra0REhKC7OxstZhz587B29sbRkZGqFGjBqZNmyY9QZ6Iqqb33nuv0NhFRKQJpX5OXX5+fpl9eGZmJpo0aYIBAwbg/fffLzLG398fq1evlpYNDAzU1o8ePRq7d+/Gli1bYGVlhbFjxyIgIADx8fHSD2wHBgYiJSUFkZGRAIDBgwcjKCgIu3fvBgDk5eWha9euqF69OmJiYnD37l30798fQggsXrwYAJCRkQFfX1+0b98eJ0+exKVLlxAcHAwTExOMHTu2zPYJEVUu9erVw/Tp03Hs2DF4enrCxMREbX1ISIiGMiOiquaVHj585coVLFy4EElJSZDJZHB1dcWoUaNQt27dUvXTpUsXdOnS5YUxcrkcdnZ2Ra5TqVRYuXIl1q9fj06dOgEANmzYAEdHR+zfvx+dO3dGUlISIiMjERcXh1atWgEAVqxYAS8vL1y8eBEuLi6IiorChQsXcPPmTTg4OAAA5s+fj+DgYMyYMQPm5ubYuHEjnjx5gjVr1kAul8Pd3R2XLl1CREQEQkND+VuPRFXUTz/9hGrVqiE+Ph7x8fFq62QyGYs6InpjSv2cun379sHNzQ0nTpxA48aN4e7ujuPHj6NRo0aIjo4u8wQPHz4MGxsbNGjQAIMGDUJaWpq0Lj4+Hjk5OfDz85PaHBwc4O7ujmPHjgEAYmNjoVAopIIOAFq3bg2FQqEW4+7uLhV0ANC5c2dkZWVJg3RsbCy8vb0hl8vVYm7duoVr164Vm39WVhYyMjLUXkSkPZKTk4t9Xb16VdPpEVEVUuozdV999RXGjBmDWbNmFWr/8ssv4evrW2bJdenSBR9++CGcnJyQnJyMSZMmoUOHDoiPj4dcLodSqYSBgQEsLCzU3mdrawulUgkAUCqVsLGxKdS3jY2NWoytra3aegsLCxgYGKjF1K5du9DnFKxzdnYuchvCw8MxderU0m88EVU6BffY8sw9EWlCqc/UJSUlYeDAgYXaP/30U1y4cKFMkirQp08fdO3aFe7u7ujWrRt+//13XLp0CXv37n3h+4QQaoNqUQNsWcSUZACfMGECVCqV9Lp58+YLcyeiymflypVwd3eHoaEhDA0N4e7ujp9++knTaRFRFVPqoq569epISEgo1J6QkFDkGbGyZG9vDycnJ1y+fBkAYGdnh+zsbKSnp6vFpaWlSWfR7OzscPv27UJ93blzRy2m4IxcgfT0dOTk5LwwpuBS8PNn+Z4ll8thbm6u9iIi7TFp0iSMGjUK3bp1w88//4yff/4Z3bp1w5gxY/DNN99oOj0iqkJKXdQNGjQIgwcPxuzZs/HHH38gJiYGs2bNwpAhQzB48ODyyFFy9+5d3Lx5E/b29gAAT09P6Ovrq93Ll5qaisTEROkHtr28vKBSqXDixAkp5vjx41CpVGoxiYmJaj++HRUVBblcDk9PTynm6NGjao85iYqKgoODQ6HLskRUdSxbtgwrVqxAeHg4unfvju7duyM8PBw//vgjli9frun0iKgKKfU9dZMmTYKZmRnmz5+PCRMmAHg6OSEsLKzUs7wePnyIf/75R1pOTk5GQkICLC0tYWlpibCwMLz//vuwt7fHtWvX8PXXX8Pa2lr66R2FQoGBAwdi7NixsLKygqWlJcaNGwcPDw9pNqyrqyv8/f0xaNAg/PDDDwCePtIkICAALi4uAAA/Pz+4ubkhKCgIc+fOxb179zBu3DgMGjRIOrMWGBiIqVOnIjg4GF9//TUuX76MmTNnYvLkybx/hqgKy8vLQ4sWLQq1e3p6Ijc3VwMZEVFVVeozdTKZDGPGjEFKSop0n1hKSgpGjRpV6uLm1KlTaNasGZo1awYACA0NRbNmzTB58mTo6uri3Llz6NGjBxo0aID+/fujQYMGiI2NhZmZmdTHggUL0LNnT/Tu3Rtt27aFsbExdu/eLT2jDgA2btwIDw8P+Pn5wc/PD40bN8b69eul9bq6uti7dy8MDQ3Rtm1b9O7dGz179sS8efOkGIVCgejoaKSkpKBFixYYNmwYQkNDERoaWtpdSERa5JNPPsGyZcsKtf/444/o27evBjIioqrqlZ5TV+DZ4upV+Pj4vPAXGfbt2/fSPgwNDbF48WLpIcFFsbS0fOnvMtaqVQt79ux5YYyHhweOHj360pyIqGpZuXIloqKipF+8iYuLw82bN9GvXz+1f/hFRERoKkUiqgJeq6gjIqrqEhMT0bx5cwBPH8wOPJ1QVr16dSQmJkpxvE2DiMobizoiotdw6NAhTadARATgFe6pIyIiIqKKp1RFXU5ODtq3b49Lly6VVz5ERERE9ApKVdTp6+sjMTGR94YQERERVTClvvzar18/rFy5sjxyISIiIqJXVOqJEtnZ2fjpp58QHR2NFi1awMTERG09p+wTkbZr3rw5Dhw4AAsLC0ybNg3jxo2DsbGxptMioiqu1EXds9P3n7+3jpdliagqSEpKQmZmJiwsLDB16lR8/vnnLOqISONKXdRx+j4RVXVNmzbFgAED0K5dOwghMG/ePJiamhYZO3ny5DecHRFVVa/8nLp//vkHV65cwTvvvAMjIyMIIXimjoiqhDVr1mDKlCnYs2cPZDIZfv/9d+jpFR5OZTIZizoiemNKXdTdvXsXvXv3xqFDhyCTyXD58mXUqVMHn332GapVq4b58+eXR55ERBWGi4sLtmzZAgDQ0dHBgQMHYGNjo+GsiKiqK/Xs1zFjxkBfXx83btxQu4ekT58+iIyMLNPkiIgquvz8fBZ0RFQhlPpMXVRUFPbt24eaNWuqtdevXx/Xr18vs8SIiCqLK1euYOHChUhKSoJMJoOrqytGjRqFunXrajo1IqpCSn2mLjMzs8hZXv/99x/kcnmZJEVEVFns27cPbm5uOHHiBBo3bgx3d3ccP34cjRo1QnR0tKbTI6IqpNRn6t555x2sW7cO06dPB/D0RuD8/HzMnTsX7du3L/MEiYgqsq+++gpjxozBrFmzCrV/+eWX8PX11VBmRFTVlLqomzt3Lnx8fHDq1ClkZ2dj/PjxOH/+PO7du4c///yzPHIkIqqwkpKS8L///a9Q+6effoqFCxe++YSIqMoq9eVXNzc3nD17Fm+99RZ8fX2RmZmJXr164cyZM7x/hIiqnOrVqyMhIaFQe0JCAidQENEbVeqiDgDs7OwwdepU7NmzB7/99hu+/fZb2Nvbl3VuREQV3qBBgzB48GDMnj0bf/zxB2JiYjBr1iwMGTIEgwcPfuV+w8PDIZPJMHr0aKlNCIGwsDA4ODjAyMgIPj4+OH/+vNr7srKyMHLkSFhbW8PExATdu3dHSkqKWkx6ejqCgoKgUCigUCgQFBSE+/fvv3KuRFQxvNLDh9PT07Fy5Uq1mV4DBgyApaVlWedHRFShTZo0CWZmZpg/fz4mTJgAAHBwcEBYWBhCQkJeqc+TJ0/ixx9/ROPGjdXa58yZg4iICKxZswYNGjTAt99+C19fX1y8eBFmZmYAgNGjR2P37t3YsmULrKysMHbsWAQEBCA+Ph66uroAgMDAQKSkpEiPoRo8eDCCgoKwe/fuV90NRFQBlPpM3ZEjR+Ds7IxFixYhPT0d9+7dw6JFi+Ds7IwjR46UR45ERBWWTCbDmDFjkJKSApVKBZVKhZSUFIwaNeqVfmXn4cOH6Nu3L1asWAELCwupXQiBhQsXYuLEiejVqxfc3d2xdu1aPHr0CJs2bQIAqFQqrFy5EvPnz0enTp3QrFkzbNiwAefOncP+/fsBPL0HMDIyEj/99BO8vLzg5eWFFStWYM+ePbh48WKxeWVlZSEjI0PtRUQVS6mLuuHDh6N3795ITk7G9u3bsX37dly9ehUfffQRhg8fXh45EhFVCmZmZtIZs1c1fPhwdO3aFZ06dVJrT05OhlKphJ+fn9Qml8vh7e2NY8eOAQDi4+ORk5OjFuPg4AB3d3cpJjY2FgqFAq1atZJiWrduDYVCIcUUJTw8XLpcq1Ao4Ojo+FrbSURlr9RF3ZUrVzB27FjpND4A6OrqIjQ0FFeuXCnT5IiIqpItW7bg9OnTCA8PL7ROqVQCAGxtbdXabW1tpXVKpRIGBgZqZ/iKiilqAoeNjY0UU5QJEyZIZyJVKhVu3rxZuo0jonJX6nvqmjdvjqSkJLi4uKi1JyUloWnTpmWVFxFRlXLz5k2MGjUKUVFRMDQ0LDbu+Uu6QoiXXuZ9Pqao+Jf1I5fL+YB5ogquREXd2bNnpf8OCQnBqFGj8M8//6B169YAgLi4OHz//feFHr5JREQlEx8fj7S0NHh6ekpteXl5OHr0KJYsWSLd76ZUKtWeNpCWliadvbOzs0N2djbS09PVztalpaWhTZs2Uszt27cLff6dO3cKnQUkosqlREVd06ZNIZPJIISQ2saPH18oLjAwEH369Cm77IiIKrCC+9d++OEHNGjQ4LX66tixI86dO6fWNmDAADRs2BBffvkl6tSpAzs7O0RHR6NZs2YAgOzsbBw5cgSzZ88GAHh6ekJfXx/R0dHo3bs3ACA1NRWJiYmYM2cOAMDLywsqlQonTpzAW2+9BQA4fvw4VCqVVPgRUeVUoqIuOTm5vPMgIqp09PX1kZiY+EqzXJ9nZmYGd3d3tTYTExNYWVlJ7aNHj8bMmTNRv3591K9fHzNnzoSxsTECAwMBAAqFAgMHDsTYsWNhZWUFS0tLjBs3Dh4eHtLEC1dXV/j7+2PQoEH44YcfADx9pElAQECh22qIqHIpUVHn5ORU3nkQEVVK/fr1w8qVK9/I7Sfjx4/H48ePMWzYMKSnp6NVq1aIiopSm3G7YMEC6OnpoXfv3nj8+DE6duyINWvWqE1u27hxI0JCQqRZst27d8eSJUvKPX8iKl+v9PDhf//9F3/++SfS0tKQn5+vtu5VH7ZJRFQZZWdn46effkJ0dDRatGgBExMTtfURERGv3Pfhw4fVlmUyGcLCwhAWFlbsewwNDbF48WIsXry42BhLS0ts2LDhlfMiooqp1EXd6tWr8fnnn8PAwABWVlaFZlSxqCOiqiQxMRHNmzcHAFy6dEltXVlcliUiKqlSF3WTJ0/G5MmTMWHCBOjovNJPxxIRaY1Dhw5pOgUiIgCv8PDhR48e4aOPPmJBR0T0jH/++Qf79u3D48ePAUDtaQFERG9CqSuzgQMH4ueffy6PXIiIKp27d++iY8eOaNCgAd59912kpqYCAD777DOMHTtWw9kRUVVS6suv4eHhCAgIQGRkJDw8PKCvr6+2/nVuCiYiqmzGjBkDfX193LhxA66urlJ7nz59MGbMGMyfP1+D2RFRVVLqom7mzJnYt2+f9Dyjl/30DBGRNouKisK+fftQs2ZNtfb69evj+vXrGsqKiKqiUhd1ERERWLVqFYKDg8shHSKiyiUzMxPGxsaF2v/77z/+VioRvVGlvqdOLpejbdu25ZELEVGl884772DdunXSskwmQ35+PubOnYv27dtrMDMiqmpKfaZu1KhRWLx4MRYtWlQe+RARVSpz586Fj48PTp06hezsbIwfPx7nz5/HvXv38Oeff2o6PSKqQkpd1J04cQIHDx7Enj170KhRo0ITJbZv315myRERVXRubm44e/Ysli1bBl1dXWRmZqJXr14YPnw47O3tNZ0eEVUhpS7qqlWrhl69epVHLkRElZKdnR2mTp2q6TSIqIp7pZ8JIyKi/5Oeno6VK1ciKSkJMpkMrq6uGDBgACwtLTWdGhFVIfxZCCKi13DkyBE4Oztj0aJFSE9Px71797Bo0SI4OzvjyJEjmk6PiKqQUp+pc3Z2fuHz6K5evfpaCRERVSbDhw9H7969pXvqACAvLw/Dhg3D8OHDkZiYqOEMiaiqKHVRN3r0aLXlnJwcnDlzBpGRkfjiiy/KKi8iokrhypUr2LZtm1TQAYCuri5CQ0PVHnVCRFTeXumRJkX5/vvvcerUqddOiIioMmnevDmSkpKkX9kpkJSUhKZNm2omKSKqkkpd1BWnS5cumDBhAidSEJHWO3v2rPTfISEhGDVqFP755x+0bt0aABAXF4fvv/8es2bN0lSKRFQFlVlR98svv3CmFxFVCU2bNoVMJoMQQmobP358objAwED06dPnTaZGRFVYqYu6Zs2aqU2UEEJAqVTizp07WLp0aZkmR0RUESUnJ2s6BSKiQkpd1PXs2VNtWUdHB9WrV4ePjw8aNmxYVnkREVVYTk5Omk6BiKiQUhd1U6ZMKY88iIgqrX///Rd//vkn0tLSkJ+fr7YuJCREQ1kRUVVTZvfUERFVRatXr8bnn38OAwMDWFlZqd2eIpPJWNQR0RtT4qJOR0fnhQ8dBp4OYLm5ua+dFBFRZTF58mRMnjwZEyZMgI4Of6SHiDSnxEXdjh07il137NgxLF68WG0mGBFRVfDo0SN89NFHLOiISONKXNT16NGjUNvff/+NCRMmYPfu3ejbty+mT59epskREVV0AwcOxM8//4yvvvpK06kQURX3SvfU3bp1C1OmTMHatWvRuXNnJCQkwN3dvaxzIyKq8MLDwxEQEIDIyEh4eHhAX19fbX1ERISGMiOiqqZURZ1KpcLMmTOxePFiNG3aFAcOHMDbb79dXrkREVV4M2fOxL59+6SfCXt+ogQR0ZtS4qJuzpw5mD17Nuzs7LB58+YiL8cSEVU1ERERWLVqFYKDgzWdChFVcSUu6r766isYGRmhXr16WLt2LdauXVtk3Pbt28ssOSKiik4ul6Nt27aaToOIqORFXb9+/XgpgYjoOaNGjcLixYuxaNEiTadCRFVciYu6NWvWlGMaRESV04kTJ3Dw4EHs2bMHjRo1KjRRglcviOhN4S9KEBG9hmrVqqFXr16aToOIiEUdEdHrWL16taZTICICAGj0EehHjx5Ft27d4ODgAJlMhp07d6qtF0IgLCwMDg4OMDIygo+PD86fP68Wk5WVhZEjR8La2homJibo3r07UlJS1GLS09MRFBQEhUIBhUKBoKAg3L9/Xy3mxo0b6NatG0xMTGBtbY2QkBBkZ2erxZw7dw7e3t4wMjJCjRo1MG3aNP6KBhEREVUIGi3qMjMz0aRJEyxZsqTI9XPmzEFERASWLFmCkydPws7ODr6+vnjw4IEUM3r0aOzYsQNbtmxBTEwMHj58iICAAOTl5UkxgYGBSEhIQGRkJCIjI5GQkICgoCBpfV5eHrp27YrMzEzExMRgy5Yt2LZtG8aOHSvFZGRkwNfXFw4ODjh58iQWL16MefPm8cGiRFWcs7Mz6tSpU+yLiOhN0ejl1y5duqBLly5FrhNCYOHChZg4caJ0v8ratWtha2uLTZs2YciQIVCpVFi5ciXWr1+PTp06AQA2bNgAR0dH7N+/H507d0ZSUhIiIyMRFxeHVq1aAQBWrFgBLy8vXLx4ES4uLoiKisKFCxdw8+ZNODg4AADmz5+P4OBgzJgxA+bm5ti4cSOePHmCNWvWQC6Xw93dHZcuXUJERARCQ0M5M5ioiho9erTack5ODs6cOYPIyEh88cUXmkmKiKqkCntPXXJyMpRKJfz8/KQ2uVwOb29vHDt2DEOGDEF8fDxycnLUYhwcHODu7o5jx46hc+fOiI2NhUKhkAo6AGjdujUUCgWOHTsGFxcXxMbGwt3dXSroAKBz587IyspCfHw82rdvj9jYWHh7e0Mul6vFTJgwAdeuXYOzs3OR25GVlYWsrCxpOSMjo0z2DxFVDKNGjSqy/fvvv8epU6fecDZEVJVp9PLriyiVSgCAra2tWrutra20TqlUwsDAABYWFi+MsbGxKdS/jY2NWszzn2NhYQEDA4MXxhQsF8QUJTw8XLqXT6FQwNHR8cUbTkRaoUuXLti2bZum0yCiKqTCFnUFnr+sKYR46aXO52OKii+LmIJJEi/KZ8KECVCpVNLr5s2bL8ydiLTDL7/8AktLS02nQURVSIW9/GpnZwfg6Vkwe3t7qT0tLU06Q2ZnZ4fs7Gykp6erna1LS0tDmzZtpJjbt28X6v/OnTtq/Rw/flxtfXp6OnJyctRinj8jl5aWBqDw2cRnyeVytUu2RKRdmjVrpvYPOyEElEol7ty5g6VLl2owMyKqairsmTpnZ2fY2dkhOjpaasvOzsaRI0ekgs3T0xP6+vpqMampqUhMTJRivLy8oFKpcOLECSnm+PHjUKlUajGJiYlITU2VYqKioiCXy+Hp6SnFHD16VO0xJ1FRUXBwcEDt2rXLfgcQUaXQs2dP9OjRQ3r16tULU6ZMQWJiIgYPHqzp9IioCtHombqHDx/in3/+kZaTk5ORkJAAS0tL1KpVC6NHj8bMmTNRv3591K9fHzNnzoSxsTECAwMBAAqFAgMHDsTYsWNhZWUFS0tLjBs3Dh4eHtJsWFdXV/j7+2PQoEH44YcfAACDBw9GQEAAXFxcAAB+fn5wc3NDUFAQ5s6di3v37mHcuHEYNGgQzM3NATx9LMrUqVMRHByMr7/+GpcvX8bMmTMxefJkznwlqsKmTJmi6RSIiABouKg7deoU2rdvLy2HhoYCAPr37481a9Zg/PjxePz4MYYNG4b09HS0atUKUVFRMDMzk96zYMEC6OnpoXfv3nj8+DE6duyINWvWQFdXV4rZuHEjQkJCpFmy3bt3V3s2nq6uLvbu3Ythw4ahbdu2MDIyQmBgIObNmyfFKBQKREdHY/jw4WjRogUsLCwQGhoq5UxERESkSTLBn0R4ozIyMqBQKKBSqaSzgEVRZQN/3AGO3gYyct5gglWEuT7wji3wdnVAYVB2/fK4lb+SHruSftdelY6OzkvP0stkMuTm5pb5Z1cEHMsqDo5nlVdZj2cVdqIEEVFFtmPHjmLXHTt2DIsXL+bPCBLRG8WijojoFfTo0aNQ299//40JEyZg9+7d6Nu3L6ZPn66BzIioqqqws1+JiCqLW7duYdCgQWjcuDFyc3ORkJCAtWvXolatWppOjYiqEBZ1RESvSKVS4csvv0S9evVw/vx5HDhwALt374a7u7umUyOiKoiXX4mIXsGcOXMwe/Zs2NnZYfPmzUVejiUiepNY1BERvYKvvvoKRkZGqFevHtauXYu1a9cWGbd9+/Y3nBkRVVUs6oiIXkG/fv344HEiqlBY1BERvYI1a9ZoOgUiIjWcKEFERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RUQURHh6Oli1bwszMDDY2NujZsycuXryoFiOEQFhYGBwcHGBkZAQfHx+cP39eLSYrKwsjR46EtbU1TExM0L17d6SkpKjFpKenIygoCAqFAgqFAkFBQbh//355byIRlSMWdUREFcSRI0cwfPhwxMXFITo6Grm5ufDz80NmZqYUM2fOHERERGDJkiU4efIk7Ozs4OvriwcPHkgxo0ePxo4dO7BlyxbExMTg4cOHCAgIQF5enhQTGBiIhIQEREZGIjIyEgkJCQgKCnqj20tEZUtP0wkQEdFTkZGRasurV6+GjY0N4uPj8c4770AIgYULF2LixIno1asXAGDt2rWwtbXFpk2bMGTIEKhUKqxcuRLr169Hp06dAAAbNmyAo6Mj9u/fj86dOyMpKQmRkZGIi4tDq1atAAArVqyAl5cXLl68CBcXl0K5ZWVlISsrS1rOyMgor91ARK+IZ+qIiCoolUoFALC0tAQAJCcnQ6lUws/PT4qRy+Xw9vbGsWPHAADx8fHIyclRi3FwcIC7u7sUExsbC4VCIRV0ANC6dWsoFAop5nnh4eHSpVqFQgFHR8ey3Vgiem0s6oiIKiAhBEJDQ9GuXTu4u7sDAJRKJQDA1tZWLdbW1lZap1QqYWBgAAsLixfG2NjYFPpMGxsbKeZ5EyZMgEqlkl43b958vQ0kojLHy69ERBXQiBEjcPbsWcTExBRaJ5PJ1JaFEIXanvd8TFHxL+pHLpdDLpeXJHUi0hCeqSMiqmBGjhyJXbt24dChQ6hZs6bUbmdnBwCFzqalpaVJZ+/s7OyQnZ2N9PT0F8bcvn270OfeuXOn0FlAIqo8WNQREVUQQgiMGDEC27dvx8GDB+Hs7Ky23tnZGXZ2doiOjpbasrOzceTIEbRp0wYA4OnpCX19fbWY1NRUJCYmSjFeXl5QqVQ4ceKEFHP8+HGoVCophogqH15+JSKqIIYPH45Nmzbh119/hZmZmXRGTqFQwMjICDKZDKNHj8bMmTNRv3591K9fHzNnzoSxsTECAwOl2IEDB2Ls2LGwsrKCpaUlxo0bBw8PD2k2rKurK/z9/TFo0CD88MMPAIDBgwcjICCgyJmvRFQ5sKgjIqogli1bBgDw8fFRa1+9ejWCg4MBAOPHj8fjx48xbNgwpKeno1WrVoiKioKZmZkUv2DBAujp6aF37954/PgxOnbsiDVr1kBXV1eK2bhxI0JCQqRZst27d8eSJUvKdwOJqFyxqCMiqiCEEC+NkclkCAsLQ1hYWLExhoaGWLx4MRYvXlxsjKWlJTZs2PAqaRJRBcV76oiIiIi0AIs6IiIiIi3Aoo6IiIhIC7CoIyIiItICLOqIiIiItACLOiIiIiItwKKOiIiISAuwqCMiIiLSAizqiIiIiLQAizoiIiIiLcCijoiIiEgLVOiiLiwsDDKZTO1lZ2cnrRdCICwsDA4ODjAyMoKPjw/Onz+v1kdWVhZGjhwJa2trmJiYoHv37khJSVGLSU9PR1BQEBQKBRQKBYKCgnD//n21mBs3bqBbt24wMTGBtbU1QkJCkJ2dXW7bTkRERFQaFbqoA4BGjRohNTVVep07d05aN2fOHERERGDJkiU4efIk7Ozs4OvriwcPHkgxo0ePxo4dO7BlyxbExMTg4cOHCAgIQF5enhQTGBiIhIQEREZGIjIyEgkJCQgKCpLW5+XloWvXrsjMzERMTAy2bNmCbdu2YezYsW9mJxARERG9hJ6mE3gZPT09tbNzBYQQWLhwISZOnIhevXoBANauXQtbW1ts2rQJQ4YMgUqlwsqVK7F+/Xp06tQJALBhwwY4Ojpi//796Ny5M5KSkhAZGYm4uDi0atUKALBixQp4eXnh4sWLcHFxQVRUFC5cuICbN2/CwcEBADB//nwEBwdjxowZMDc3f0N7g4iIiKhoFf5M3eXLl+Hg4ABnZ2d89NFHuHr1KgAgOTkZSqUSfn5+UqxcLoe3tzeOHTsGAIiPj0dOTo5ajIODA9zd3aWY2NhYKBQKqaADgNatW0OhUKjFuLu7SwUdAHTu3BlZWVmIj49/Yf5ZWVnIyMhQexERERGVtQpd1LVq1Qrr1q3Dvn37sGLFCiiVSrRp0wZ3796FUqkEANja2qq9x9bWVlqnVCphYGAACwuLF8bY2NgU+mwbGxu1mOc/x8LCAgYGBlJMccLDw6V79RQKBRwdHUuxB4iIiIhKpkIXdV26dMH7778PDw8PdOrUCXv37gXw9DJrAZlMpvYeIUShtuc9H1NU/KvEFGXChAlQqVTS6+bNmy+MJyIiInoVFbqoe56JiQk8PDxw+fJl6T6758+UpaWlSWfV7OzskJ2djfT09BfG3L59u9Bn3blzRy3m+c9JT09HTk5OoTN4z5PL5TA3N1d7EREREZW1SlXUZWVlISkpCfb29nB2doadnR2io6Ol9dnZ2Thy5AjatGkDAPD09IS+vr5aTGpqKhITE6UYLy8vqFQqnDhxQoo5fvw4VCqVWkxiYiJSU1OlmKioKMjlcnh6epbrNhMRERGVRIWe/Tpu3Dh069YNtWrVQlpaGr799ltkZGSgf//+kMlkGD16NGbOnIn69eujfv36mDlzJoyNjREYGAgAUCgUGDhwIMaOHQsrKytYWlpi3Lhx0uVcAHB1dYW/vz8GDRqEH374AQAwePBgBAQEwMXFBQDg5+cHNzc3BAUFYe7cubh37x7GjRuHQYMG8cwbERERVQgVuqhLSUnBxx9/jP/++w/Vq1dH69atERcXBycnJwDA+PHj8fjxYwwbNgzp6elo1aoVoqKiYGZmJvWxYMEC6OnpoXfv3nj8+DE6duyINWvWQFdXV4rZuHEjQkJCpFmy3bt3x5IlS6T1urq62Lt3L4YNG4a2bdvCyMgIgYGBmDdv3hvaE0REREQvJhNCCE0nUZVkZGRAoVBApVK98CyfKhv44w5w9DaQkfMGE6wizPWBd2yBt6sDCoOy65fHrfyV9NiV9LtGr4ZjWcXB8azyKuvxrFLdU0dERERERWNRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBRR0RERKQFWNQRERERaQEWdURERERagEUdERERkRZgUUdERESkBVjUEREREWkBFnVEREREWoBFHREREZEWYFFHREREpAVY1BERERFpARZ1RERERFqARR0RERGRFmBR9wqWLl0KZ2dnGBoawtPTE3/88YemUyIiKjWOZUTahUVdKW3duhWjR4/GxIkTcebMGbz99tvo0qULbty4oenUiIhKjGMZkfZhUVdKERERGDhwID777DO4urpi4cKFcHR0xLJlyzSdGhFRiXEsI9I+eppOoDLJzs5GfHw8vvrqK7V2Pz8/HDt2rMj3ZGVlISsrS1pWqVQAgIyMjBd+VkY28OgBoP8EkOe+ZuJUiH7e0/2bIQdkBmXXL49b+SvpsSv4jgkh3lBmlQfHMu3C8azyKuvxjEVdKfz333/Iy8uDra2tWrutrS2USmWR7wkPD8fUqVMLtTs6OpZLjkSk7sGDB1AoFJpOo0LhWEZUOb1sPGNR9wpkMpnashCiUFuBCRMmIDQ0VFrOz8/HvXv3YGVlVex7KqOMjAw4Ojri5s2bMDc313Q6VAraeuyEEHjw4AEcHBw0nUqFxbGsMG39PlQF2nzsSjqesagrBWtra+jq6hb6l2xaWlqhf/EWkMvlkMvlam3VqlUrrxQ1ztzcXOu+TFWFNh47nqErGseyl9PG70NVoa3HriTjGSdKlIKBgQE8PT0RHR2t1h4dHY02bdpoKCsiotLhWEaknXimrpRCQ0MRFBSEFi1awMvLCz/++CNu3LiBzz//XNOpERGVGMcyIu3Doq6U+vTpg7t372LatGlITU2Fu7s7fvvtNzg5OWk6NY2Sy+WYMmVKocszVPHx2FVNHMuKxu9D5cVjB8gE5/sTERERVXq8p46IiIhIC7CoIyIiItICLOqIiIiItACLOi3l4+OD0aNHa+SzZTIZdu7c+crvDwsLQ9OmTcssH3pqzZo1Wv1cMdJeHM/oeRzPisaijspcamoqunTpUqLYogbMcePG4cCBA+WQWdXWp08fXLp0SdNpEFUqHM8qJo5nReMjTajMZGdnw8DAAHZ2dq/Vj6mpKUxNTcsoq6pBCIG8vDzo6RX/lTYyMoKRkdEbzIqo8uJ4pjkcz14dz9Rpsfz8fIwfPx6Wlpaws7NDWFiYtE6lUmHw4MGwsbGBubk5OnTogL/++ktaHxwcjJ49e6r1N3r0aPj4+EjLPj4+GDFiBEJDQ2FtbQ1fX18A6v9azc7OxogRI2Bvbw9DQ0PUrl0b4eHhAIDatWsDAN577z3IZDJpuajLFatWrUKjRo0gl8thb2+PESNGvPb+0aSCfTdixAhUq1YNVlZW+Oabb1DwhKENGzagRYsWMDMzg52dHQIDA5GWlia9//Dhw5DJZNi3bx9atGgBuVyOP/74A3/99Rfat28PMzMzmJubw9PTE6dOnQJQ+HJFwX5etWoVatWqBVNTUwwdOhR5eXmYM2cO7OzsYGNjgxkzZqjlfuPGDfTo0QOmpqYwNzdH7969cfv27UL9rl+/HrVr14ZCocBHH32EBw8eAADWrVsHKysrZGVlqfX7/vvvo1+/fmW6n0l7cDyruDieVZzxjEWdFlu7di1MTExw/PhxzJkzB9OmTUN0dDSEEOjatSuUSiV+++03xMfHo3nz5ujYsSPu3btX6s/Q09PDn3/+iR9++KHQ+kWLFmHXrl343//+h4sXL2LDhg3SYHfy5EkAwOrVq5GamiotP2/ZsmUYPnw4Bg8ejHPnzmHXrl2oV69e6XZGBVSw744fP45FixZhwYIF+OmnnwA8/Z/H9OnT8ddff2Hnzp1ITk5GcHBwoT7Gjx+P8PBwJCUloXHjxujbty9q1qyJkydPIj4+Hl999RX09fWLzeHKlSv4/fffERkZic2bN2PVqlXo2rUrUlJScOTIEcyePRvffPMN4uLiADz9F3TPnj1x7949HDlyBNHR0bhy5Qr69OlTqN+dO3diz5492LNnD44cOYJZs2YBAD788EPk5eVh165dUvx///2HPXv2YMCAAa+7W0lLcTyr2DieVZDxTJBW8vb2Fu3atVNra9mypfjyyy/FgQMHhLm5uXjy5Ina+rp164offvhBCCFE//79RY8ePdTWjxo1Snh7e6t9RtOmTQt9NgCxY8cOIYQQI0eOFB06dBD5+flF5vlsbIEpU6aIJk2aSMsODg5i4sSJL9jaysfb21u4urqq7Zcvv/xSuLq6Fhl/4sQJAUA8ePBACCHEoUOHBACxc+dOtTgzMzOxZs2aIvtYvXq1UCgU0vKUKVOEsbGxyMjIkNo6d+4sateuLfLy8qQ2FxcXER4eLoQQIioqSujq6oobN25I68+fPy8AiBMnThTb7xdffCFatWolLQ8dOlR06dJFWl64cKGoU6dOsX8nVLVxPKvYOJ5VnPGMZ+q0WOPGjdWW7e3tkZaWhvj4eDx8+BBWVlbS/R6mpqZITk7GlStXSvUZLVq0eOH64OBgJCQkwMXFBSEhIYiKiipV/2lpabh16xY6duxYqvdVBq1bt4ZMJpOWvby8cPnyZeTl5eHMmTPo0aMHnJycYGZmJl0munHjhlofz+//0NBQfPbZZ+jUqRNmzZr10uNZu3ZtmJmZScu2trZwc3ODjo6OWlvBpZKkpCQ4OjrC0dFRWu/m5oZq1aohKSmp2H4L/vYKDBo0CFFRUfj3338BPD27ERwcrLY/iJ7F8axi43hWMcYzFnVa7PnT1DKZDPn5+cjPz4e9vT0SEhLUXhcvXsQXX3wBANDR0ZHuhyiQk5NT6DNMTExemEPz5s2RnJyM6dOn4/Hjx+jduzc++OCDEm9DVbwR9smTJ/Dz84OpqSk2bNiAkydPYseOHQCeXsZ41vP7PywsDOfPn0fXrl1x8OBBuLm5Se8tSlF/I8X93QBPL1cUNVA93/6iPgCgWbNmaNKkCdatW4fTp0/j3LlzRV6OISrA8axy4nj2ZnH2axXUvHlzKJVK6OnpSfeDPK969epITExUa0tISHjh/QzFMTc3R58+fdCnTx988MEH8Pf3x71792BpaQl9fX3k5eUV+14zMzPUrl0bBw4cQPv27Uv92RVZwX0dzy7Xr18ff//9N/777z/MmjVL+hdkwc3BJdGgQQM0aNAAY8aMwccff4zVq1fjvffeK5Oc3dzccOPGDdy8eVPK7cKFC1CpVHB1dS1VX5999hkWLFiAf//9F506dVL71zJRSXE8qxg4nlWM8Yxn6qqg/9fe/cdEWcdxAH8fdKdwd1gCHgYmAwYhCwwOis4h1DmqEV1Z6nLhBunMJJyGLaMgdQUTDEUp5wTHH6ljo0XtWmbK+kEOSpiu8eOm0rJgTTpDhPKET384n3V4CVwqeLxf2233fJ/n+Tzf52G873v3PPec2WxGcnIyLBYLvvjiC3R1daGxsREFBQXKP9ujjz6KH374ATU1NbDZbCgsLLwuFMfi/fffx8GDB9He3o7Ozk7U1tYiKChI+dbStYDr6emB3W53WaOoqAhlZWXYuXMnbDYbTpw4gYqKCrf3f7L45ZdfsH79enR0dODAgQOoqKhAXl4e7rvvPmg0GlRUVODMmTOor6/Hli1bRq03ODiItWvXoqGhAT///DO+++47NDc3jzucbsRsNisXMJ84cQJNTU3IysrCwoULRz11NdLy5cvx66+/Yu/evcjOzr5pfaSphXk2OTDPJkeecVA3BalUKlitVqSkpCA7OxuRkZFYtmwZurq6YDAYAADp6el46623sHHjRiQmJuLixYtufT1bp9OhpKQERqMRiYmJ6OrqgtVqVa5xKCsrw5dffok5c+bgwQcfdFljxYoVKC8vR2VlJWJiYpCRkQGbzeb+AZgksrKyMDg4iKSkJLzyyivIzc3FqlWrEBgYiP3796O2thbz5s1DcXExSktLR63n7e2N3t5eZGVlITIyEkuWLMETTzyBd95556b1+drtHe655x6kpKTAbDYjLCwMhw4dGnctPz8/LF68GDqd7rrbTRCNFfNscmCeTY48U8nICw2I6JZLTU3F/PnzUV5ePtFdmVCLFi1CdHQ0du7cOdFdISI3Mc+umgx5xmvqiOi2++OPP3D48GEcPXoUu3btmujuEBG5bTLlGQd1RHTbxcfHw263o6SkBFFRURPdHSIit02mPOPpVyIiIiIPwC9KEBEREXkADuqIiIiIPAAHdUREREQegIM6IiIiIg/AQR0RERGRB+Cgjjza/v37lZ/w8STX7oR+s6WmpmLdunU3vS4R/X/Ms/GZinnGQR25raenB3l5eYiIiMD06dNhMBiwYMECfPjhhxgYGJjo7gEAli5dis7OzonuhtuKioowf/78ie4Gkcdjnt16zLNbjzcfJrecOXMGJpMJd999N95991088MADuHLlCjo7O1FVVYV7770XmZmZLtd1OBxQq9W3pZ8+Pj7w8fG5LdsiojsT84w8hhC5IT09XUJCQqS/v9/l/OHhYeU5APnggw8kMzNTfH195e233xYRkcrKSgkLCxO1Wi2RkZFSU1OjrHP27FkBIC0tLUqb3W4XAHLs2DERETl27JgAkM8++0xiY2Nl2rRpkpSUJCdPnlTWqa6ulhkzZijThYWFEhcXJzU1NTJ37lzx8/OTpUuXSl9fn7JMX1+fvPDCC+Lr6ytBQUGyfft2WbhwoeTl5f3n8bhWd9++fTJnzhzRarWyevVquXLlipSUlIjBYJDAwEDZunWr03oXLlyQlStXSmBgoOj1eklLS5PW1lal7wCcHtXV1cox3bt3r1gsFvHx8ZGIiAj55JNPnGo3NDRIYmKiaDQaCQoKktdff10cDocyv7+/X1588UXRarUSFBQkpaWlo+4nkSdinjljnt25OKijcTt//ryoVCp57733xrQ8AJk1a5bs27dPTp8+LV1dXVJXVydqtVp2794tHR0dUlZWJt7e3nL06FERGV8IRkdHy+HDh+XkyZOSkZEhoaGhcvnyZRFxHYI6nU6effZZOXXqlHz99dcSFBQkmzZtUpZ56aWXZO7cuXLkyBE5deqUPPPMM6LX60cNQZ1OJ88995z89NNPUl9fLxqNRtLT0yU3N1fa29ulqqpKAMj3338vIldfKEwmkzz11FPS3NwsnZ2dsmHDBvH395fe3l4ZGBiQDRs2SExMjHR3d0t3d7cMDAwoxzQkJEQ++ugjsdls8uqrr4pOp5Pe3l4RETl37pz4+vrKmjVrpK2tTT7++GMJCAiQwsJCpc8vv/yyhISEOB07nU435UKQpjbm2fWYZ3cuDupo3I4fPy4ApK6uzqnd399ftFqtaLVa2bhxo9IOQNatW+e07COPPCIrV650anv++eflySefFJHxheDBgweVZXp7e8XHx0cOHTokIq5D0NfX1+mdbH5+vjz00EMicvVdrVqtltraWmX+hQsXxNfXd9QQHFk3PT1dQkNDZWhoSGmLiopSXjy++uor8fPzk7/++supVnh4uOzZs0epGxcXd932AEhBQYEy3d/fLyqVSj7//HMREdm0aZNERUU5fcKwe/du0el0MjQ0JBcvXhSNRuPy2E21EKSpjXl2PebZnYvX1JHbVCqV03RTUxOGh4exfPly/P33307zjEaj03RbWxtWrVrl1GYymbBjx45x9yM5OVl5PnPmTERFRaGtre0/lw8NDYVer1emZ8+ejd9//x3A1WtrHA4HkpKSlPkzZswY0480j6xrMBjg7e0NLy8vp7Zr2/rxxx/R398Pf39/pzqDg4M4ffr0qNuLjY1Vnmu1Wuj1eqV2W1sbkpOTnf5GJpMJ/f39OHfuHOx2Oy5fvuzy2BFNRcyzG9dlnt0ZOKijcYuIiIBKpUJ7e7tTe1hYGAC4vJBXq9Ve1zYyREVEabsWHCKizHc4HGPu48ja/zbyomaVSoXh4WGn7bnq22hc1b3RtoaHhzF79mw0NDRcV2ssty0YbT/+ax9UKtWY9odoKmCejb0u82zy4y1NaNz8/f2xaNEi7Nq1C5cuXXKrRnR0NL799luntsbGRkRHRwMAAgMDAQDd3d3K/NbWVpe1jh8/rjy32+3o7OzE/fff71a/wsPDoVar0dTUpLT19fXBZrO5Ve9G4uPj0dPTg7vuugsRERFOj4CAAACARqPB0NDQuGvPmzcPjY2NTmHX2NgIvV6P4OBgREREQK1Wuzx2RFMJ8+zmYJ5NDvykjtxSWVkJk8kEo9GIoqIixMbGwsvLC83NzWhvb0dCQsIN18/Pz8eSJUsQHx+Pxx57DJ9++inq6upw5MgRAFffHT/88MMoLi5GaGgozp8/j4KCApe1Nm/eDH9/fxgMBrz55psICAiAxWJxa7/0ej1WrFiB/Px8zJw5E7NmzUJhYSG8vLxu+G7ZHWazGcnJybBYLCgpKUFUVBR+++03WK1WWCwWGI1GhIaG4uzZs2htbUVISAj0ej2mTZs2au01a9agvLwcubm5WLt2LTo6OlBYWIj169fDy8sLOp0OOTk5yM/Pdzp2/z61QjRVMM/+P+bZ5DD19phuivDwcLS0tMBsNuONN95AXFwcjEYjKioq8Nprr2HLli03XN9isWDHjh3Ytm0bYmJisGfPHlRXVyM1NVVZpqqqCg6HA0ajEXl5edi6davLWsXFxcjLy0NCQgK6u7tRX18PjUbj9r5t374dycnJyMjIgNlshslkQnR0NKZPn+52TVdUKhWsVitSUlKQnZ2NyMhILFu2DF1dXTAYDACAxYsX4/HHH0daWhoCAwNx4MCBMdUODg6G1WpFU1MT4uLisHr1auTk5Di9kGzbtg0pKSnIzMyE2WzGggULRn3xIvJEzLP/j3k2OaiEJ6PpDtXQ0IC0tDTY7fZb+tM5ly5dQnBwMMrKypCTk3PLtkNEUxfzjG4Gnn4lGqGlpQXt7e1ISkrCn3/+ic2bNwMAnn766QnuGRHR+DDPphYO6ohcKC0tRUdHBzQaDRISEvDNN98oF/sSEd1JmGdTB0+/EhEREXkAflGCiIiIyANwUEdERETkATioIyIiIvIAHNQREREReQAO6oiIiIg8AAd1RERERB6AgzoiIiIiD8BBHREREZEH+AfQyCmXwKOMJQAAAABJRU5ErkJggg==",
+      "text/plain": [
+       "<Figure size 600x500 with 2 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# visualize comparison between heuristic and parsimony grouping\n",
+    "import matplotlib.pyplot as plt\n",
+    "from alphadia.plotting.utils import lighten_color \n",
+    "\n",
+    "def basic_barplot(\n",
+    "        values : list,\n",
+    "        names : list,\n",
+    "        title : str,\n",
+    "        xlabel : str,\n",
+    "        ylabel : str,\n",
+    "        color_hex : str,\n",
+    "        hwspace : float = 0.4,\n",
+    "):\n",
+    "    \"\"\"rudimentary visualization function to plot barplot\"\"\"\n",
+    "\n",
+    "    panels = len(values)\n",
+    "    fig, axs = plt.subplots(1, panels, figsize = (panels * 3, 5))\n",
+    "    plt.tight_layout()\n",
+    "    axs = axs.flatten()\n",
+    "    for i, ax in enumerate(axs):\n",
+    "        heights = [len(v) for v in values[i]]\n",
+    "        bars = ax.bar(names[i], heights, color = color_hex[i])\n",
+    "        for b in bars:\n",
+    "            b.set_edgecolor(lighten_color(b.get_facecolor(), 0.5))\n",
+    "            b.set_facecolor(color_hex[i])\n",
+    "            ax.text(b.get_x() + (b.get_width() / 2), b.get_height(), str(int(b.get_height())), ha = 'center', va = 'bottom')\n",
+    "\n",
+    "        ax.set_title(title[i])\n",
+    "        ax.set_xlabel(xlabel[i])\n",
+    "        ax.set_ylabel(ylabel[i])\n",
+    "\n",
+    "    plt.subplots_adjust(wspace = hwspace)\n",
+    "\n",
+    "    return fig, axs\n",
+    "\n",
+    "# Confirm that both methods have (nearly) the same number of precursors\n",
+    "pe_plot, _ = basic_barplot(\n",
+    "    [[data_tables[\"pe_heuristic\"], data_tables[\"pe_parsimony\"]], [data_tables[\"pg_heuristic\"], data_tables[\"pg_parsimony\"]]],\n",
+    "    [[\"heuristic\", \"parsimony\"], [\"heuristic\", \"parsimony\"]],\n",
+    "    [\"Number of peptides \\nper grouping method\", \"Number of protein groups \\nper grouping method\"],\n",
+    "    [\"Grouping method\", \"Grouping method\"],\n",
+    "    [\"Number of peptides\", \"Number of protein groups\"],\n",
+    "    [\"#67c8ff\", \"#67c8ff\"],\n",
+    ")\n",
+    "\n",
+    "# show plots next to each other in panel\n",
+    "layout = []\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Comparison of maximum_parsimony and heuristic grouping:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(7929, 4)"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_tables[\"pg_parsimony\"].shape"
+   ]
   }
  ],
  "metadata": {

From c5876c58a42fd4805afbd74976a2c414f7947602 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Sat, 25 May 2024 01:21:47 +0200
Subject: [PATCH 13/48] FIX transferlearning bug

---
 alphadia/outputaccumulator.py       |  7 +++++++
 alphadia/workflow/peptidecentric.py | 12 ++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/alphadia/outputaccumulator.py b/alphadia/outputaccumulator.py
index 26c0a6fb..5328acab 100644
--- a/alphadia/outputaccumulator.py
+++ b/alphadia/outputaccumulator.py
@@ -100,6 +100,7 @@ def parse_output_folder(
             "mods",
             "mod_sites",
             "proba",
+            "decoy",
         ],
     ) -> Tuple[pd.DataFrame, pd.DataFrame]:
         """
@@ -132,10 +133,16 @@ def parse_output_folder(
         psm_df = psm_df[selected_precursor_columns]
         # validate.precursors_flat_from_output(psm_df)
 
+        # remove decoy precursors
+        psm_df = psm_df[psm_df["decoy"] == 0]
+
         self._precursor_df = pd.DataFrame()
         for col in psm_df.columns:
             self._precursor_df[col] = psm_df[col]
 
+        self._precursor_df["decoy"] = self._precursor_df["decoy"].astype(int)
+        self._precursor_df = psm_df[psm_df["decoy"] == 0].reset_index(drop=True)
+
         # self._precursor_df.set_index('precursor_idx', inplace=True)
         # Change the data type of the mods column to string
         self._precursor_df["mods"] = self._precursor_df["mods"].astype(str)
diff --git a/alphadia/workflow/peptidecentric.py b/alphadia/workflow/peptidecentric.py
index 2efe864f..ad438625 100644
--- a/alphadia/workflow/peptidecentric.py
+++ b/alphadia/workflow/peptidecentric.py
@@ -1109,6 +1109,9 @@ def _build_candidate_speclib_flat(
         "mod_sites",
         "sequence",
         "charge",
+        "rt_observed",
+        "mobility_observed",
+        "mz_observed",
     ],
 ) -> typing.Tuple[SpecLibFlat, pd.DataFrame]:
     """Build a candidate spectral library for transfer learning.
@@ -1141,6 +1144,7 @@ def _build_candidate_speclib_flat(
             "mod_sites",
             "sequence",
             "charge",
+            "rt_observed", "mobility_observed", "mz_observed"
         ]
 
     Returns
@@ -1152,13 +1156,13 @@ def _build_candidate_speclib_flat(
         Dataframe with scored candidates
     """
     # remove decoys
-    psm_df = psm_df[psm_df["decoy"] == 0]
+    # psm_df = psm_df[psm_df["decoy"] == 0]
 
-    for col in ["rt_observed", "mobility_observed", "mz_observed"]:
-        optional_columns += [col] if col in psm_df.columns else []
+    # make copy to avoid modifying the original dataframe
+    _optional_columns = [col for col in optional_columns if col in psm_df.columns]
 
     scored_candidates = plexscoring.candidate_features_to_candidates(
-        psm_df, optional_columns=optional_columns
+        psm_df, optional_columns=_optional_columns
     )
 
     # create speclib with fragment_types of interest

From 7b06333779c5fd5acd7688652d503962fb514391 Mon Sep 17 00:00:00 2001
From: Vincenth Brennsteiner <brennsteiner@biochem.mpg.de>
Date: Sat, 25 May 2024 15:26:01 +0200
Subject: [PATCH 14/48] changed grouping.py docstring to numpy format, moved
 data to testdata, stripped notebook outputs, adapted grouping.py parsimony
 grouping argument.

---
 .gitignore                                    |    1 -
 alphadia/grouping.py                          |   43 +-
 .../protein_grouping_tutorial.ipynb           | 1346 +----------------
 3 files changed, 30 insertions(+), 1360 deletions(-)

diff --git a/.gitignore b/.gitignore
index 172f9b21..83dfd498 100644
--- a/.gitignore
+++ b/.gitignore
@@ -139,7 +139,6 @@ dmypy.json
 
 # Data
 testdata/
-nbs/debug/dev_grouping_comparison_data/
 
 ######################
 # OS generated files #
diff --git a/alphadia/grouping.py b/alphadia/grouping.py
index 5d64d499..1810c4af 100644
--- a/alphadia/grouping.py
+++ b/alphadia/grouping.py
@@ -14,21 +14,21 @@
 def group_and_parsimony(
     precursor_idx: NDArray[np.int64],
     precursor_ids: NDArray[Any],
-    return_groups: bool = False,
+    return_parsimony_groups: bool = False,
 ):
     """Function to group ids based on precursor indices and return groups & master ids as lists
 
-    Args:
-        precursor_idx (np.array[int]): array containing unique integer indices corresponding
-            to each peptide precursor
-        precursor_ids (np.array[str]): array of variable length semicolon separated str belonging
-            to a given peptide precursor id
+    Parameters
+    ----------
+    precursor_idx : np.array[int]
+        Array containing unique integer indices corresponding to each peptide precursor
+    precursor_ids : np.array[str] 
+        Array of variable length semicolon separated str belonging to a given peptide precursor id
 
     Returns
-        ids (list[str]): list of ids linked to a given peptide precursor, such that each
-            precursor only belongs to one id. This list is ordered by precursor_idx.
-        groups (list[str]): list of semicolon separated ids belonging to a given peptide precursor,
-            such that each precursor only belongs to one group. This list is ordered by precursor_idx.
+    -------
+    tuple
+        Tuple containing two lists: ids and groups. Each list is ordered by precursor_idx
 
     """
 
@@ -64,7 +64,7 @@ def group_and_parsimony(
             id_dict[subject_protein] = new_subject_set
             # With the following lines commented out, the query will only eliminate peptides from
             # respective subject proteins, but we will not add them to the query group
-            if return_groups and len(new_subject_set) == 0:
+            if return_parsimony_groups and len(new_subject_set) == 0:
                 query_group.append(subject_protein)
 
         # save query to output lists
@@ -112,14 +112,21 @@ def perform_grouping(
 ):
     """Highest level function for grouping proteins in precursor table
 
-    Args:
-        psm (pd.DataFrame) : Precursor table with columns "precursor_idx" and protein & decoy columns.
-        gene_or_protein (str, optional) : Column to group proteins by. Defaults to "proteins".
-        decoy_column (str, optional) : Column to use for decoy annotation. Defaults to "decoy".
-        group (bool, optional) : Whether to group proteins. Defaults to True.
+    Parameters
+    ----------
+    psm : pd.DataFrame
+        Precursor table with columns "precursor_idx" and protein & decoy columns.
+    gene_or_protein : str
+        Column to group proteins by. Defaults to "proteins".
+    decoy_column : str
+        Column to use for decoy annotation. Defaults to "decoy".
+    group : bool
+        Whether to group proteins. Defaults to True.
 
-    Returns:
-        pd.DataFrame: Precursor table with grouped proteins
+    Returns
+    -------
+    pd.DataFrame : 
+        Precursor table with grouped proteins
 
     """
 
diff --git a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb
index 780d4fb3..923de778 100644
--- a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb
+++ b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb
@@ -25,7 +25,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -40,152 +40,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>precursor_idx</th>\n",
-       "      <th>proteins</th>\n",
-       "      <th>genes</th>\n",
-       "      <th>decoy</th>\n",
-       "      <th>pg_master</th>\n",
-       "      <th>pg</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0</td>\n",
-       "      <td>P1;P2;P3;P4</td>\n",
-       "      <td>P1;P2;P3;P4</td>\n",
-       "      <td>0</td>\n",
-       "      <td>P1</td>\n",
-       "      <td>P1;P4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>0</td>\n",
-       "      <td>P1;P2;P3;P4</td>\n",
-       "      <td>P1;P2;P3;P4</td>\n",
-       "      <td>0</td>\n",
-       "      <td>P1</td>\n",
-       "      <td>P1;P4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1</td>\n",
-       "      <td>P1;P2</td>\n",
-       "      <td>P1;P2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>P1</td>\n",
-       "      <td>P1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1</td>\n",
-       "      <td>P1;P2</td>\n",
-       "      <td>P1;P2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>P1</td>\n",
-       "      <td>P1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>2</td>\n",
-       "      <td>P4</td>\n",
-       "      <td>P4</td>\n",
-       "      <td>0</td>\n",
-       "      <td>P4</td>\n",
-       "      <td>P4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>2</td>\n",
-       "      <td>P4</td>\n",
-       "      <td>P4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>P4</td>\n",
-       "      <td>P4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>3</td>\n",
-       "      <td>P4;P5</td>\n",
-       "      <td>P4;P5</td>\n",
-       "      <td>1</td>\n",
-       "      <td>P4</td>\n",
-       "      <td>P4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>3</td>\n",
-       "      <td>P4;P5</td>\n",
-       "      <td>P4;P5</td>\n",
-       "      <td>1</td>\n",
-       "      <td>P4</td>\n",
-       "      <td>P4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>4</td>\n",
-       "      <td>P6</td>\n",
-       "      <td>P6</td>\n",
-       "      <td>1</td>\n",
-       "      <td>P6</td>\n",
-       "      <td>P6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>4</td>\n",
-       "      <td>P6</td>\n",
-       "      <td>P6</td>\n",
-       "      <td>1</td>\n",
-       "      <td>P6</td>\n",
-       "      <td>P6</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   precursor_idx     proteins        genes  decoy pg_master     pg\n",
-       "0              0  P1;P2;P3;P4  P1;P2;P3;P4      0        P1  P1;P4\n",
-       "1              0  P1;P2;P3;P4  P1;P2;P3;P4      0        P1  P1;P4\n",
-       "2              1        P1;P2        P1;P2      0        P1     P1\n",
-       "3              1        P1;P2        P1;P2      0        P1     P1\n",
-       "4              2           P4           P4      0        P4     P4\n",
-       "5              2           P4           P4      1        P4     P4\n",
-       "6              3        P4;P5        P4;P5      1        P4     P4\n",
-       "7              3        P4;P5        P4;P5      1        P4     P4\n",
-       "8              4           P6           P6      1        P6     P6\n",
-       "9              4           P6           P6      1        P6     P6"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# example data showcases the collapse of one precursor - multiple protein inputs to one precursor - one master protein (pg_master). All proteins that can be grouped according to\n",
     "precursor_idx = [0, 0, 1, 1, 2, 2, 3, 3, 4, 4]\n",
@@ -254,1202 +111,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "distinct proteins\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>precursor_idx</th>\n",
-       "      <th>proteins</th>\n",
-       "      <th>decoy</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>A</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>B</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>B</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   precursor_idx proteins  decoy\n",
-       "0              1        A      0\n",
-       "1              2        A      0\n",
-       "2              3        B      0\n",
-       "3              4        B      0"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>precursor_idx</th>\n",
-       "      <th>proteins</th>\n",
-       "      <th>decoy</th>\n",
-       "      <th>pg_master</th>\n",
-       "      <th>pg</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>A</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>B</td>\n",
-       "      <td>0</td>\n",
-       "      <td>B</td>\n",
-       "      <td>B</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>B</td>\n",
-       "      <td>0</td>\n",
-       "      <td>B</td>\n",
-       "      <td>B</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   precursor_idx proteins  decoy pg_master pg\n",
-       "0              1        A      0         A  A\n",
-       "1              2        A      0         A  A\n",
-       "2              3        B      0         B  B\n",
-       "3              4        B      0         B  B"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "differentiable proteins\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>precursor_idx</th>\n",
-       "      <th>proteins</th>\n",
-       "      <th>decoy</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>B</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   precursor_idx proteins  decoy\n",
-       "0              1        A      0\n",
-       "1              2      A;B      0\n",
-       "2              3      A;B      0\n",
-       "3              4        B      0"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>precursor_idx</th>\n",
-       "      <th>proteins</th>\n",
-       "      <th>decoy</th>\n",
-       "      <th>pg_master</th>\n",
-       "      <th>pg</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A;B</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A;B</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>B</td>\n",
-       "      <td>0</td>\n",
-       "      <td>B</td>\n",
-       "      <td>B</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   precursor_idx proteins  decoy pg_master   pg\n",
-       "0              1        A      0         A    A\n",
-       "1              2      A;B      0         A  A;B\n",
-       "2              3      A;B      0         A  A;B\n",
-       "3              4        B      0         B    B"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "indistinguishable proteins\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>precursor_idx</th>\n",
-       "      <th>proteins</th>\n",
-       "      <th>decoy</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   precursor_idx proteins  decoy\n",
-       "0              1      A;B      0\n",
-       "1              2      A;B      0\n",
-       "2              3      A;B      0\n",
-       "3              4      A;B      0"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>precursor_idx</th>\n",
-       "      <th>proteins</th>\n",
-       "      <th>decoy</th>\n",
-       "      <th>pg_master</th>\n",
-       "      <th>pg</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   precursor_idx proteins  decoy pg_master pg\n",
-       "0              1      A;B      0         A  A\n",
-       "1              2      A;B      0         A  A\n",
-       "2              3      A;B      0         A  A\n",
-       "3              4      A;B      0         A  A"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "subset proteins\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>precursor_idx</th>\n",
-       "      <th>proteins</th>\n",
-       "      <th>decoy</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   precursor_idx proteins  decoy\n",
-       "0              1        A      0\n",
-       "1              2      A;B      0\n",
-       "2              3      A;B      0\n",
-       "3              4      A;B      0"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>precursor_idx</th>\n",
-       "      <th>proteins</th>\n",
-       "      <th>decoy</th>\n",
-       "      <th>pg_master</th>\n",
-       "      <th>pg</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   precursor_idx proteins  decoy pg_master pg\n",
-       "0              1        A      0         A  A\n",
-       "1              2      A;B      0         A  A\n",
-       "2              3      A;B      0         A  A\n",
-       "3              4      A;B      0         A  A"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "subsumable proteins\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>precursor_idx</th>\n",
-       "      <th>proteins</th>\n",
-       "      <th>decoy</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>B;C</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>C</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   precursor_idx proteins  decoy\n",
-       "0              1        A      0\n",
-       "1              2      A;B      0\n",
-       "2              3      B;C      0\n",
-       "3              4        C      0"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>precursor_idx</th>\n",
-       "      <th>proteins</th>\n",
-       "      <th>decoy</th>\n",
-       "      <th>pg_master</th>\n",
-       "      <th>pg</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>B;C</td>\n",
-       "      <td>0</td>\n",
-       "      <td>C</td>\n",
-       "      <td>C</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>C</td>\n",
-       "      <td>0</td>\n",
-       "      <td>C</td>\n",
-       "      <td>C</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   precursor_idx proteins  decoy pg_master pg\n",
-       "0              1        A      0         A  A\n",
-       "1              2      A;B      0         A  A\n",
-       "2              3      B;C      0         C  C\n",
-       "3              4        C      0         C  C"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "shared only\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>precursor_idx</th>\n",
-       "      <th>proteins</th>\n",
-       "      <th>decoy</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>A;B;C</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>A;B;C</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>A;C</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   precursor_idx proteins  decoy\n",
-       "0              1      A;B      0\n",
-       "1              2    A;B;C      0\n",
-       "2              3    A;B;C      0\n",
-       "3              4      A;C      0"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>precursor_idx</th>\n",
-       "      <th>proteins</th>\n",
-       "      <th>decoy</th>\n",
-       "      <th>pg_master</th>\n",
-       "      <th>pg</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A;B</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>A;B;C</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>A;B;C</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>A;C</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   precursor_idx proteins  decoy pg_master pg\n",
-       "0              1      A;B      0         A  A\n",
-       "1              2    A;B;C      0         A  A\n",
-       "2              3    A;B;C      0         A  A\n",
-       "3              4      A;C      0         A  A"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "circular\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>precursor_idx</th>\n",
-       "      <th>proteins</th>\n",
-       "      <th>decoy</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A;B;C</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>B;C;D</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>C;D;E</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>D;E;A</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   precursor_idx proteins  decoy\n",
-       "0              1    A;B;C      0\n",
-       "1              2    B;C;D      0\n",
-       "2              3    C;D;E      0\n",
-       "3              4    D;E;A      0"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>precursor_idx</th>\n",
-       "      <th>proteins</th>\n",
-       "      <th>decoy</th>\n",
-       "      <th>pg_master</th>\n",
-       "      <th>pg</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A;B;C</td>\n",
-       "      <td>0</td>\n",
-       "      <td>C</td>\n",
-       "      <td>A;C</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>B;C;D</td>\n",
-       "      <td>0</td>\n",
-       "      <td>C</td>\n",
-       "      <td>C</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>C;D;E</td>\n",
-       "      <td>0</td>\n",
-       "      <td>C</td>\n",
-       "      <td>C</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>D;E;A</td>\n",
-       "      <td>0</td>\n",
-       "      <td>A</td>\n",
-       "      <td>A</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   precursor_idx proteins  decoy pg_master   pg\n",
-       "0              1    A;B;C      0         C  A;C\n",
-       "1              2    B;C;D      0         C    C\n",
-       "2              3    C;D;E      0         C    C\n",
-       "3              4    D;E;A      0         A    A"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "complex example\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>precursor_idx</th>\n",
-       "      <th>proteins</th>\n",
-       "      <th>decoy</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0</td>\n",
-       "      <td>P1;P2;P3;P4</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1</td>\n",
-       "      <td>P1;P4</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>2</td>\n",
-       "      <td>P2</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>3</td>\n",
-       "      <td>P2;P5</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   precursor_idx     proteins  decoy\n",
-       "0              0  P1;P2;P3;P4      0\n",
-       "1              1        P1;P4      0\n",
-       "2              2           P2      0\n",
-       "3              3        P2;P5      0"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>precursor_idx</th>\n",
-       "      <th>proteins</th>\n",
-       "      <th>decoy</th>\n",
-       "      <th>pg_master</th>\n",
-       "      <th>pg</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0</td>\n",
-       "      <td>P1;P2;P3;P4</td>\n",
-       "      <td>0</td>\n",
-       "      <td>P2</td>\n",
-       "      <td>P1;P2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1</td>\n",
-       "      <td>P1;P4</td>\n",
-       "      <td>0</td>\n",
-       "      <td>P1</td>\n",
-       "      <td>P1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>2</td>\n",
-       "      <td>P2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>P2</td>\n",
-       "      <td>P2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>3</td>\n",
-       "      <td>P2;P5</td>\n",
-       "      <td>0</td>\n",
-       "      <td>P2</td>\n",
-       "      <td>P2</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   precursor_idx     proteins  decoy pg_master     pg\n",
-       "0              0  P1;P2;P3;P4      0        P2  P1;P2\n",
-       "1              1        P1;P4      0        P1     P1\n",
-       "2              2           P2      0        P2     P2\n",
-       "3              3        P2;P5      0        P2     P2"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# test cases per Nesvizhskii, Alexey I., and Ruedi Aebersold. \"Interpretation of shotgun proteomic data.\" Molecular & cellular proteomics 4.10 (2005): 1419-1440. Figure 5 (see Appendix)\n",
     "def test_grouping():\n",

From 4312a35c125a3b060057fbccf9c39163e7aa6156 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Wed, 29 May 2024 16:40:58 +0200
Subject: [PATCH 15/48] fix alphabase requirement

---
 requirements/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index 80cfff2f..c543661d 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -5,7 +5,7 @@ numba==0.59.1
 argparse==1.4.0
 alpharaw==0.4.5
 alphatims==1.0.8
-alphabase==1.2.3
+alphabase==1.2.4
 peptdeep==1.1.9
 progressbar==2.5
 neptune==1.10.4

From b08d43ae0ae2d134f78f015c4bd2fdecb00278f1 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Fri, 7 Jun 2024 09:34:44 -0700
Subject: [PATCH 16/48] FIX fasta for Bruker

---
 alphadia/libtransform.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/alphadia/libtransform.py b/alphadia/libtransform.py
index 46125a60..ba1e5045 100644
--- a/alphadia/libtransform.py
+++ b/alphadia/libtransform.py
@@ -660,7 +660,7 @@ def forward(self, input: SpecLibFlat) -> SpecLibFlat:
                 "rt_norm_pred",
                 "irt",
             ],
-            "mobility_library": ["mobility_library", "mobility"],
+            "mobility_library": ["mobility_library", "mobility", "mobility_pred"],
         }
 
         fragment_columns = {
@@ -680,6 +680,7 @@ def forward(self, input: SpecLibFlat) -> SpecLibFlat:
 
         if "mobility_library" not in input.precursor_df.columns:
             input.precursor_df["mobility_library"] = 0
+            logger.warning("Library contains no ion mobility annotations")
 
         validate.precursors_flat_schema(input.precursor_df)
         validate.fragments_flat_schema(input.fragment_df)

From 1d3690951afb8972d059e31070b8d7fcbb1ff167 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Fri, 7 Jun 2024 09:41:22 -0700
Subject: [PATCH 17/48] update requirements

---
 requirements/requirements.txt       | 2 +-
 requirements/requirements_loose.txt | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index c543661d..c01d989c 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -6,7 +6,7 @@ argparse==1.4.0
 alpharaw==0.4.5
 alphatims==1.0.8
 alphabase==1.2.4
-peptdeep==1.1.9
+peptdeep==1.2.1
 progressbar==2.5
 neptune==1.10.4
 seaborn==0.13.2
diff --git a/requirements/requirements_loose.txt b/requirements/requirements_loose.txt
index 70dd08af..488d24af 100644
--- a/requirements/requirements_loose.txt
+++ b/requirements/requirements_loose.txt
@@ -5,8 +5,8 @@ numba
 argparse
 alpharaw>=0.3.1  # test: tolerate_version
 alphatims
-alphabase>=1.1.2 # test: tolerate_version
-peptdeep
+alphabase>=1.2.4 # test: tolerate_version
+peptdeep>=1.2.1 # test: tolerate_version
 progressbar
 neptune
 seaborn

From 2fd2fcf9bf9d867bb79c0c6a091961b10d48b0d2 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Mon, 10 Jun 2024 12:34:20 -0400
Subject: [PATCH 18/48] FEAT logging raw  file stats

---
 alphadia/data/alpharaw.py |  5 +++++
 alphadia/data/bruker.py   |  6 +++---
 alphadia/data/stats.py    | 44 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+), 3 deletions(-)
 create mode 100644 alphadia/data/stats.py

diff --git a/alphadia/data/alpharaw.py b/alphadia/data/alpharaw.py
index 31079c24..8eedc094 100644
--- a/alphadia/data/alpharaw.py
+++ b/alphadia/data/alpharaw.py
@@ -8,6 +8,8 @@
 # alphadia imports
 from alphadia import utils
 
+from alphadia.data.stats import log_stats
+
 # alpha family imports
 from alpharaw import thermo as alpharawthermo
 from alpharaw import sciex as alpharawsciex
@@ -339,6 +341,7 @@ def __init__(self, raw_file_path: str, process_count: int = 10, **kwargs):
         super().__init__(process_count=process_count)
         self.load_raw(raw_file_path)
         self.process_alpharaw(**kwargs)
+        log_stats(self.rt_values, self.cycle)
 
 
 class Sciex(AlphaRaw, alpharawsciex.SciexWiffData):
@@ -346,6 +349,7 @@ def __init__(self, raw_file_path: str, process_count: int = 10, **kwargs):
         super().__init__(process_count=process_count)
         self.load_raw(raw_file_path)
         self.process_alpharaw(**kwargs)
+        log_stats(self.rt_values, self.cycle)
 
 
 class Thermo(AlphaRaw, alpharawthermo.ThermoRawData):
@@ -353,6 +357,7 @@ def __init__(self, raw_file_path: str, process_count: int = 10, **kwargs):
         super().__init__(process_count=process_count)
         self.load_raw(raw_file_path)
         self.process_alpharaw(**kwargs)
+        log_stats(self.rt_values, self.cycle)
 
     def filter_spectra(self, cv: float = None, astral_ms1: bool = False, **kwargs):
         """
diff --git a/alphadia/data/bruker.py b/alphadia/data/bruker.py
index 520dab27..2f003998 100644
--- a/alphadia/data/bruker.py
+++ b/alphadia/data/bruker.py
@@ -7,6 +7,7 @@
 
 # alphadia imports
 from alphadia import utils
+from alphadia.data.stats import log_stats
 
 # alpha family imports
 import alphatims.utils
@@ -94,6 +95,7 @@ def __init__(
 
         # Precompile
         logger.info(f"Successfully imported data from {bruker_d_folder_name}")
+        log_stats(self.rt_values, self.cycle)
 
     def transpose(self):
         # abort if transposed data is already present
@@ -655,9 +657,7 @@ def assemble_push(
                                 relative_precursor_index[i],
                                 relative_scan,
                                 relative_precursor,
-                            ] = (
-                                accumulated_intensity + new_intensity
-                            )
+                            ] = accumulated_intensity + new_intensity
                             dense_output[
                                 1,
                                 j,
diff --git a/alphadia/data/stats.py b/alphadia/data/stats.py
new file mode 100644
index 00000000..6cabe37d
--- /dev/null
+++ b/alphadia/data/stats.py
@@ -0,0 +1,44 @@
+import numpy as np
+import logging
+
+logger = logging.getLogger()
+
+
+def log_stats(rt_values: np.array, cycle: np.array):
+    """Log raw file statistics
+
+    Parameters
+    ----------
+
+    rt_values: np.ndarray
+            retention time values in seconds for all frames
+
+    cycle: np.ndarray
+            DIA cycle object describing the msms pattern
+    """
+
+    logger.info(f"============ Raw file stats ============")
+
+    rt_limits = rt_values.min() / 60, rt_values.max() / 60
+    rt_duration_sec = rt_values.max() - rt_values.min()
+    rt_duration_min = rt_duration_sec / 60
+
+    logger.info(f"{'RT (min)':<20}: {rt_limits[0]:.1f} - {rt_limits[1]:.1f}")
+    logger.info(f"{'RT duration (sec)':<20}: {rt_duration_sec:.1f}")
+    logger.info(f"{'RT duration (min)':<20}: {rt_duration_min:.1f}")
+
+    cycle_length = cycle.shape[1]
+    cycle_duration = np.diff(rt_values[::cycle_length]).mean()
+    cycle_number = len(rt_values) // cycle_length
+
+    logger.info(f"{'Cycle len (scans)':<20}: {cycle_length:.0f}")
+    logger.info(f"{'Cycle len (sec)':<20}: {cycle_duration:.2f}")
+    logger.info(f"{'Number of cycles':<20}: {cycle_number:.0f}")
+
+    flat_cycle = cycle.flatten()
+    flat_cycle = flat_cycle[flat_cycle > 0]
+    msms_range = flat_cycle.min(), flat_cycle.max()
+
+    logger.info(f"{'MS2 range (m/z)':<20}: {msms_range[0]:.1f} - {msms_range[1]:.1f}")
+
+    logger.info(f"========================================")

From 6bc923a522447c285a4d1e942783c0099890cba6 Mon Sep 17 00:00:00 2001
From: Vincenth Brennsteiner <brennsteiner@biochem.mpg.de>
Date: Mon, 10 Jun 2024 18:49:32 +0200
Subject: [PATCH 19/48] MicroCommit: clean notebook

---
 nbs/tutorial_nbs/protein_grouping_tutorial.ipynb | 1 -
 1 file changed, 1 deletion(-)

diff --git a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb
index 923de778..d9169f29 100644
--- a/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb
+++ b/nbs/tutorial_nbs/protein_grouping_tutorial.ipynb
@@ -293,7 +293,6 @@
     "        \"pg\": [\"P1;P2\", \"P1\", \"P2\", \"P2\"],\n",
     "    }\n",
     "\n",
-    "\n",
     "test_grouping()"
    ]
   }

From 82301d79f52cd2ac58cbcd4497b6beb14a8b6e29 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Mon, 10 Jun 2024 12:49:32 -0400
Subject: [PATCH 20/48] FEAT set log level from config

---
 alphadia/planning.py           | 8 ++++++++
 alphadia/workflow/reporting.py | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/alphadia/planning.py b/alphadia/planning.py
index 5f8ea39e..9c13f287 100644
--- a/alphadia/planning.py
+++ b/alphadia/planning.py
@@ -123,6 +123,14 @@ def __init__(
         if "output" not in self.config:
             self.config["output"] = output_folder
 
+        # set log level
+        level_to_set = self.config["general"]["log_level"]
+        level_code = logging.getLevelNamesMapping().get(level_to_set)
+        if level_code is None:
+            logger.error(f"Setting logging to unknown level {level_to_set}")
+        else:
+            logger.setLevel(level_code)
+
         self.load_library()
 
         torch.set_num_threads(self.config["general"]["thread_count"])
diff --git a/alphadia/workflow/reporting.py b/alphadia/workflow/reporting.py
index 21c2b1e5..63acf82f 100644
--- a/alphadia/workflow/reporting.py
+++ b/alphadia/workflow/reporting.py
@@ -25,7 +25,7 @@
 
 # Add a new logging level to the default logger
 # This has to happen at load time to make the .progress() method available even if no logger is instantiated
-PROGRESS_LEVELV_NUM = 100
+PROGRESS_LEVELV_NUM = 21
 logging.PROGRESS = PROGRESS_LEVELV_NUM
 logging.addLevelName(PROGRESS_LEVELV_NUM, "PROGRESS")
 

From fccc463374ee10df83abb00cdb42b9e6ca9f4db7 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Mon, 10 Jun 2024 15:04:51 -0400
Subject: [PATCH 21/48] pip no cache

---
 misc/pip_install.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/misc/pip_install.sh b/misc/pip_install.sh
index d8ce0368..28dd4d5f 100644
--- a/misc/pip_install.sh
+++ b/misc/pip_install.sh
@@ -3,7 +3,7 @@ set -e -u
 INSTALL_TYPE=$1 # stable, loose, etc..
 ENV_NAME=${2:-alphadia}
 
-conda create -n $ENV_NAME python=3.9 -y
+conda create -n $ENV_NAME python=3.11 -y
 
 if [ "$INSTALL_TYPE" = "loose" ]; then
   INSTALL_STRING=""
@@ -12,5 +12,5 @@ else
 fi
 
 # conda 'run' vs. 'activate', cf. https://stackoverflow.com/a/72395091
-conda run -n $ENV_NAME --no-capture-output pip install -e "../.$INSTALL_STRING"
+conda run -n $ENV_NAME --no-capture-output pip install --no-cache-dir -e "../.$INSTALL_STRING"
 conda run -n $ENV_NAME --no-capture-output alphadia -v

From d59c6e03e475b97ea254e32b05d4f35423afc2e7 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Mon, 10 Jun 2024 15:05:05 -0400
Subject: [PATCH 22/48] pip no cache

---
 misc/pip_install.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/misc/pip_install.sh b/misc/pip_install.sh
index 28dd4d5f..196ed5ee 100644
--- a/misc/pip_install.sh
+++ b/misc/pip_install.sh
@@ -3,7 +3,7 @@ set -e -u
 INSTALL_TYPE=$1 # stable, loose, etc..
 ENV_NAME=${2:-alphadia}
 
-conda create -n $ENV_NAME python=3.11 -y
+conda create -n $ENV_NAME python=3.9 -y
 
 if [ "$INSTALL_TYPE" = "loose" ]; then
   INSTALL_STRING=""

From 92a240fe19401a0ed196fdb92d2c826cf8fc30d1 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Mon, 10 Jun 2024 15:56:54 -0400
Subject: [PATCH 23/48] fix testcase

---
 tests/unit_tests/conftest.py               |  7 +++++--
 tests/unit_tests/test_outputaccumulator.py | 14 +++++++-------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/tests/unit_tests/conftest.py b/tests/unit_tests/conftest.py
index c8df3b31..b1a90b82 100644
--- a/tests/unit_tests/conftest.py
+++ b/tests/unit_tests/conftest.py
@@ -13,6 +13,7 @@
 
 def mock_precursor_df(
     n_precursor: int = 100,
+    with_decoy=True,
 ) -> pd.DataFrame:
     """Create a mock precursor dataframe as it's found as the individual search outputs
 
@@ -30,7 +31,6 @@ def mock_precursor_df(
     """
 
     precursor_idx = np.arange(n_precursor)
-    decoy = np.zeros(n_precursor)
     precursor_mz = np.random.rand(n_precursor) * 2000 + 500
     precursor_charge = np.random.choice([2, 3], size=n_precursor)
 
@@ -40,7 +40,10 @@ def mock_precursor_df(
     proteins = np.random.choice(protein_names, size=n_precursor)
     genes = proteins
 
-    decoy = np.concatenate([np.zeros(n_precursor // 2), np.ones(n_precursor // 2)])
+    if with_decoy:
+        decoy = np.concatenate([np.zeros(n_precursor // 2), np.ones(n_precursor // 2)])
+    else:
+        decoy = np.zeros(n_precursor)
     proba = np.zeros(n_precursor) + decoy * np.random.rand(n_precursor)
     qval = np.random.rand(n_precursor) * 10e-3
 
diff --git a/tests/unit_tests/test_outputaccumulator.py b/tests/unit_tests/test_outputaccumulator.py
index 239ed3ee..097e59f5 100644
--- a/tests/unit_tests/test_outputaccumulator.py
+++ b/tests/unit_tests/test_outputaccumulator.py
@@ -1,6 +1,7 @@
 import os
 import tempfile
 import numpy as np
+import pandas as pd
 from conftest import mock_precursor_df, mock_fragment_df
 from alphadia import outputtransform
 from alphabase.spectral_library.base import SpecLibBase
@@ -67,7 +68,7 @@ def prepare_input_data():
     # setup raw folders
     raw_folders = [os.path.join(progress_folder, run) for run in run_columns]
 
-    psm_base_df = mock_precursor_df(n_precursor=100)
+    psm_base_df = mock_precursor_df(n_precursor=100, with_decoy=True)
     fragment_base_df = mock_fragment_df(n_precursor=200, n_fragments=10)
 
     psm_dfs = []
@@ -122,12 +123,11 @@ def test_complete_output_accumulation():
         os.path.join(temp_folder, f"{output.TRANSFER_OUTPUT}.hdf"), load_mod_seq=True
     )
 
-    # Then: all unique precursors should be in the built library
-    number_of_unique_precursors = len(
-        np.unique(
-            np.concatenate([psm_df["precursor_idx"].values for psm_df in psm_dfs])
-        )
-    )
+    # Then: all unique none decoy precursors should be in the built library
+    union_psm_df = pd.concat(psm_dfs)
+    union_psm_df = union_psm_df[union_psm_df["decoy"] == 0]
+    number_of_unique_precursors = len(np.unique(union_psm_df["precursor_idx"]))
+
     assert (
         len(np.unique(built_lib.precursor_df["precursor_idx"]))
         == number_of_unique_precursors

From 7250ced981f2d19ab6001382075565775cd97466 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Mon, 10 Jun 2024 18:03:58 -0400
Subject: [PATCH 24/48] use parquet as intermediate output

---
 alphadia/outputaccumulator.py |  4 ++--
 alphadia/outputtransform.py   | 16 ++++------------
 alphadia/planning.py          |  8 ++++----
 3 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/alphadia/outputaccumulator.py b/alphadia/outputaccumulator.py
index 5328acab..f9465711 100644
--- a/alphadia/outputaccumulator.py
+++ b/alphadia/outputaccumulator.py
@@ -122,8 +122,8 @@ def parse_output_folder(
 
 
         """
-        psm_df = pd.read_csv(os.path.join(folder, "psm.tsv"), sep="\t")
-        frag_df = pd.read_csv(os.path.join(folder, "frag.tsv"), sep="\t")
+        psm_df = pd.read_parquet(os.path.join(folder, "psm.parquet"))
+        frag_df = pd.read_parquet(os.path.join(folder, "frag.parquet"))
 
         assert set(
             selected_precursor_columns
diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py
index b62dc1cf..9276cff7 100644
--- a/alphadia/outputtransform.py
+++ b/alphadia/outputtransform.py
@@ -54,22 +54,14 @@ def get_frag_df_generator(folder_list: List[str]):
 
     for folder in folder_list:
         raw_name = os.path.basename(folder)
-        frag_path = os.path.join(folder, "frag.tsv")
+        frag_path = os.path.join(folder, "frag.parquet")
 
         if not os.path.exists(frag_path):
             logger.warning(f"no frag file found for {raw_name}")
         else:
             try:
                 logger.info(f"reading frag file for {raw_name}")
-                run_df = pd.read_csv(
-                    frag_path,
-                    sep="\t",
-                    dtype={
-                        "precursor_idx": np.uint32,
-                        "number": np.uint8,
-                        "type": np.uint8,
-                    },
-                )
+                run_df = pd.read_parquet(frag_path)
             except Exception as e:
                 logger.warning(f"Error reading frag file for {raw_name}")
                 logger.warning(e)
@@ -497,7 +489,7 @@ def build_precursor_table(
 
         for folder in folder_list:
             raw_name = os.path.basename(folder)
-            psm_path = os.path.join(folder, f"{self.PSM_INPUT}.tsv")
+            psm_path = os.path.join(folder, f"{self.PSM_INPUT}.parquet")
 
             logger.info(f"Building output for {raw_name}")
 
@@ -506,7 +498,7 @@ def build_precursor_table(
                 run_df = pd.DataFrame()
             else:
                 try:
-                    run_df = pd.read_csv(psm_path, sep="\t")
+                    run_df = pd.read_parquet(psm_path)
                 except Exception as e:
                     logger.warning(f"Error reading psm file for {raw_name}")
                     logger.warning(e)
diff --git a/alphadia/planning.py b/alphadia/planning.py
index 784028ff..f9fe1292 100644
--- a/alphadia/planning.py
+++ b/alphadia/planning.py
@@ -283,8 +283,8 @@ def run(
                 workflow_folder_list.append(workflow.path)
 
                 # check if the raw file is already processed
-                psm_location = os.path.join(workflow.path, "psm.tsv")
-                frag_location = os.path.join(workflow.path, "frag.tsv")
+                psm_location = os.path.join(workflow.path, "psm.parquet")
+                frag_location = os.path.join(workflow.path, "frag.parquet")
 
                 if self.config["general"]["reuse_quant"]:
                     if os.path.exists(psm_location) and os.path.exists(frag_location):
@@ -306,8 +306,8 @@ def run(
                     psm_df, frag_df = workflow.requantify_fragments(psm_df)
 
                 psm_df["run"] = raw_name
-                psm_df.to_csv(psm_location, sep="\t", index=False)
-                frag_df.to_csv(frag_location, sep="\t", index=False)
+                psm_df.to_parquet(psm_location, index=False)
+                frag_df.to_parquet(frag_location, index=False)
 
                 workflow.reporter.log_string(f"Finished workflow for {raw_name}")
                 workflow.reporter.context.__exit__(None, None, None)

From 64c9c94dcae5e47f2add19eea6e58fb001920b22 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Mon, 10 Jun 2024 19:14:53 -0400
Subject: [PATCH 25/48] universal read write

---
 alphadia/consensus/utils.py        | 84 ++++++++++++++++++++++++++++++
 alphadia/constants/default.yaml    |  2 +
 alphadia/outputtransform.py        | 16 ++----
 alphadia/peakgroup/search.py       |  2 +-
 tests/unit_tests/conftest.py       | 12 +++++
 tests/unit_tests/test_consensus.py | 25 +++++++++
 6 files changed, 127 insertions(+), 14 deletions(-)
 create mode 100644 alphadia/consensus/utils.py
 create mode 100644 tests/unit_tests/test_consensus.py

diff --git a/alphadia/consensus/utils.py b/alphadia/consensus/utils.py
new file mode 100644
index 00000000..71054635
--- /dev/null
+++ b/alphadia/consensus/utils.py
@@ -0,0 +1,84 @@
+import logging
+import os
+import pandas as pd
+
+logger = logging.getLogger()
+supported_formats = ["parquet", "tsv"]
+
+
+def read_df(path_no_format, file_format="parquet"):
+    """Read dataframe from disk with choosen file format
+
+    Parameters
+    ----------
+
+    path_no_format: str
+        File to read from disk without file format
+
+    file_format: str, default = 'parquet'
+        File format for loading the file. Available options: ['parquet', 'tsv']
+
+    Returns
+    -------
+
+    pd.DataFrame
+        loaded dataframe from disk
+
+    """
+
+    if file_format not in supported_formats:
+        raise ValueError(
+            f"Provided unknown file format: {file_format}, supported_formats: {supported_formats}"
+        )
+
+    file_path = f"{path_no_format}.{file_format}"
+
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"Can't load file as file was not found: {file_path}")
+
+    logger.info(f"Reading {file_path} from disk")
+
+    if file_format == "parquet":
+        return pd.read_parquet(file_path)
+
+    elif file_format == "tsv":
+        return pd.read_csv(file_path, sep="\t")
+
+    else:
+        raise ValueError("I don't know how you ended up here")
+
+
+def write_df(df, path_no_format, file_format="parquet"):
+    """Read dataframe from disk with choosen file format
+
+    Parameters
+    ----------
+
+    df: pd.DataFrame
+        Dataframe to save to disk
+
+    path_no_format: str
+        Path for file without format
+
+    file_format: str, default = 'parquet'
+        File format for loading the file. Available options: ['parquet', 'tsv']
+
+    """
+
+    if file_format not in supported_formats:
+        raise ValueError(
+            f"Provided unknown file format: {file_format}, supported_formats: {supported_formats}"
+        )
+
+    file_path = f"{path_no_format}.{file_format}"
+
+    logger.info(f"Saving {file_path} to disk")
+
+    if file_format == "parquet":
+        df.to_parquet(file_path, index=False)
+
+    elif file_format == "tsv":
+        df.to_csv(file_path, sep="\t", index=False)
+
+    else:
+        raise ValueError("I don't know how you ended up here")
diff --git a/alphadia/constants/default.yaml b/alphadia/constants/default.yaml
index 5339a3df..4833a500 100644
--- a/alphadia/constants/default.yaml
+++ b/alphadia/constants/default.yaml
@@ -129,6 +129,8 @@ search_output:
   num_samples_quadratic: 50
   min_nonnan: 3
   normalize_lfq: True
+  # can be either "parquet" or "tsv"
+  file_format: "parquet"
 
 # configuration for the optimization manager
 # initial parameters, will nbe optimized
diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py
index 9276cff7..c8ce6985 100644
--- a/alphadia/outputtransform.py
+++ b/alphadia/outputtransform.py
@@ -445,20 +445,10 @@ def load_precursor_table(self):
             Precursor table
         """
 
-        if not os.path.exists(
-            os.path.join(self.output_folder, f"{self.PRECURSOR_OUTPUT}.tsv")
-        ):
-            logger.error(
-                f"Can't continue as no {self.PRECURSOR_OUTPUT}.tsv file was found in the output folder: {self.output_folder}"
-            )
-            raise FileNotFoundError(
-                f"Can't continue as no {self.PRECURSOR_OUTPUT}.tsv file was found in the output folder: {self.output_folder}"
-            )
-        logger.info(f"Reading {self.PRECURSOR_OUTPUT}.tsv file")
-        psm_df = pd.read_csv(
-            os.path.join(self.output_folder, f"{self.PRECURSOR_OUTPUT}.tsv"), sep="\t"
+        return read_df(
+            os.path.join(self.output_folder, f"{self.PRECURSOR_OUTPUT}"),
+            file_type=self.config["file_format"],
         )
-        return psm_df
 
     def build_precursor_table(
         self,
diff --git a/alphadia/peakgroup/search.py b/alphadia/peakgroup/search.py
index 0b97d445..9f3f6a42 100644
--- a/alphadia/peakgroup/search.py
+++ b/alphadia/peakgroup/search.py
@@ -1041,7 +1041,7 @@ def assemble_candidates(self, elution_group_container):
                     precursor_flat_lookup
                 ]
 
-        # save features for training if desired.
+        # DEBUG: save features for training if desired.
         if self.feature_path is not None:
             feature_matrix = np.zeros(
                 (len(candidates), len(candidates[0].features)), dtype=np.float32
diff --git a/tests/unit_tests/conftest.py b/tests/unit_tests/conftest.py
index b1a90b82..1976b9d7 100644
--- a/tests/unit_tests/conftest.py
+++ b/tests/unit_tests/conftest.py
@@ -4,6 +4,7 @@
 import pandas as pd
 import numpy as np
 import matplotlib
+import tempfile
 
 matplotlib.use("Agg")
 from matplotlib import pyplot as plt
@@ -184,3 +185,14 @@ def pytest_configure(config):
         pytest.test_data[raw_folder] = raw_files
 
     # important to supress matplotlib output
+
+
+def _random_tempfolder():
+    tempdir = tempfile.gettempdir()
+    # 6 alphanumeric characters
+    random_foldername = "alphadia_" + "".join(
+        np.random.choice(list("abcdefghijklmnopqrstuvwxyz0123456789"), 6)
+    )
+    path = os.path.join(tempdir, random_foldername)
+    os.mkdir(path)
+    return path
diff --git a/tests/unit_tests/test_consensus.py b/tests/unit_tests/test_consensus.py
new file mode 100644
index 00000000..f592e4ee
--- /dev/null
+++ b/tests/unit_tests/test_consensus.py
@@ -0,0 +1,25 @@
+import pytest
+import pandas as pd
+import os
+from conftest import _random_tempfolder
+from alphadia.consensus.utils import read_df, write_df
+
+
+@pytest.mark.parametrize(
+    "format, should_fail",
+    [("tsv", False), ("parquet", False), ("a321", True)],
+)
+def test_read_write(format, should_fail):
+    # given
+    df = pd.DataFrame([{"a": "a", "b": "b"}, {"a": "a", "b": "b"}])
+    path = os.path.join(_random_tempfolder())
+
+    # when
+    if should_fail:
+        with pytest.raises(ValueError):
+            write_df(df, path, file_format=format)
+
+    else:
+        write_df(df, path, file_format=format)
+        _df = read_df(path, file_format=format)
+        assert df.equals(_df)

From 7876db62e3cc68bccb87775e8d1b082f0360ce1f Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Mon, 10 Jun 2024 19:37:12 -0400
Subject: [PATCH 26/48] variable output format

---
 alphadia/consensus/utils.py |  2 +-
 alphadia/outputtransform.py | 39 +++++++++++++++++--------------------
 2 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/alphadia/consensus/utils.py b/alphadia/consensus/utils.py
index 71054635..07638fb6 100644
--- a/alphadia/consensus/utils.py
+++ b/alphadia/consensus/utils.py
@@ -78,7 +78,7 @@ def write_df(df, path_no_format, file_format="parquet"):
         df.to_parquet(file_path, index=False)
 
     elif file_format == "tsv":
-        df.to_csv(file_path, sep="\t", index=False)
+        df.to_csv(file_path, sep="\t", index=False, float_format="%.6f")
 
     else:
         raise ValueError("I don't know how you ended up here")
diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py
index c8ce6985..9f1746d6 100644
--- a/alphadia/outputtransform.py
+++ b/alphadia/outputtransform.py
@@ -10,7 +10,7 @@
     TransferLearningAccumulator,
     AccumulationBroadcaster,
 )
-
+from alphadia.consensus.utils import read_df, write_df
 
 import pandas as pd
 import numpy as np
@@ -578,11 +578,10 @@ def build_precursor_table(
             psm_df = psm_df[psm_df["decoy"] == 0]
         if save:
             logger.info("Writing precursor output to disk")
-            psm_df.to_csv(
-                os.path.join(self.output_folder, f"{self.PRECURSOR_OUTPUT}.tsv"),
-                sep="\t",
-                index=False,
-                float_format="%.6f",
+            write_df(
+                psm_df,
+                os.path.join(self.output_folder, f"{self.PRECURSOR_OUTPUT}"),
+                file_format=self.config["search_output"]["file_format"],
             )
 
         return psm_df
@@ -630,11 +629,10 @@ def build_stat_df(
 
         if save:
             logger.info("Writing stat output to disk")
-            stat_df.to_csv(
-                os.path.join(self.output_folder, f"{self.STAT_OUTPUT}.tsv"),
-                sep="\t",
-                index=False,
-                float_format="%.6f",
+            write_df(
+                stat_df,
+                os.path.join(self.output_folder, f"{self.STAT_OUTPUT}"),
+                file_format="tsv",
             )
 
         return stat_df
@@ -712,11 +710,11 @@ def build_lfq_tables(
 
             if save:
                 logger.info(f"Writing {group_nice} output to disk")
-                lfq_df.to_csv(
-                    os.path.join(self.output_folder, f"{group_nice}.matrix.tsv"),
-                    sep="\t",
-                    index=False,
-                    float_format="%.6f",
+
+                write_df(
+                    lfq_df,
+                    os.path.join(self.output_folder, f"{group_nice}.matrix"),
+                    file_format=self.config["search_output"]["file_format"],
                 )
 
         protein_df_melted = lfq_df.melt(
@@ -727,11 +725,10 @@ def build_lfq_tables(
 
         if save:
             logger.info("Writing psm output to disk")
-            psm_df.to_csv(
-                os.path.join(self.output_folder, f"{self.PRECURSOR_OUTPUT}.tsv"),
-                sep="\t",
-                index=False,
-                float_format="%.6f",
+            write_df(
+                psm_df,
+                os.path.join(self.output_folder, f"{self.PRECURSOR_OUTPUT}"),
+                file_format=self.config["search_output"]["file_format"],
             )
 
         return lfq_df

From 74bb3621a3817eba07200843dbb76b77a3669f61 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Mon, 10 Jun 2024 19:58:58 -0400
Subject: [PATCH 27/48] fix tests

---
 alphadia/outputtransform.py                |  2 +-
 tests/unit_tests/conftest.py               |  4 ++--
 tests/unit_tests/test_consensus.py         |  4 ++--
 tests/unit_tests/test_outputaccumulator.py |  6 +++---
 tests/unit_tests/test_outputtransform.py   | 11 ++++++-----
 tests/unit_tests/test_reporting.py         | 23 +++++++---------------
 6 files changed, 21 insertions(+), 29 deletions(-)

diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py
index 9f1746d6..75a0db4b 100644
--- a/alphadia/outputtransform.py
+++ b/alphadia/outputtransform.py
@@ -447,7 +447,7 @@ def load_precursor_table(self):
 
         return read_df(
             os.path.join(self.output_folder, f"{self.PRECURSOR_OUTPUT}"),
-            file_type=self.config["file_format"],
+            file_format=self.config["search_output"]["file_format"],
         )
 
     def build_precursor_table(
diff --git a/tests/unit_tests/conftest.py b/tests/unit_tests/conftest.py
index 1976b9d7..5f977eb9 100644
--- a/tests/unit_tests/conftest.py
+++ b/tests/unit_tests/conftest.py
@@ -187,12 +187,12 @@ def pytest_configure(config):
     # important to supress matplotlib output
 
 
-def _random_tempfolder():
+def random_tempfolder():
     tempdir = tempfile.gettempdir()
     # 6 alphanumeric characters
     random_foldername = "alphadia_" + "".join(
         np.random.choice(list("abcdefghijklmnopqrstuvwxyz0123456789"), 6)
     )
     path = os.path.join(tempdir, random_foldername)
-    os.mkdir(path)
+    os.makedirs(path, exist_ok=True)
     return path
diff --git a/tests/unit_tests/test_consensus.py b/tests/unit_tests/test_consensus.py
index f592e4ee..65aa5b19 100644
--- a/tests/unit_tests/test_consensus.py
+++ b/tests/unit_tests/test_consensus.py
@@ -1,7 +1,7 @@
 import pytest
 import pandas as pd
 import os
-from conftest import _random_tempfolder
+from conftest import random_tempfolder
 from alphadia.consensus.utils import read_df, write_df
 
 
@@ -12,7 +12,7 @@
 def test_read_write(format, should_fail):
     # given
     df = pd.DataFrame([{"a": "a", "b": "b"}, {"a": "a", "b": "b"}])
-    path = os.path.join(_random_tempfolder())
+    path = os.path.join(random_tempfolder())
 
     # when
     if should_fail:
diff --git a/tests/unit_tests/test_outputaccumulator.py b/tests/unit_tests/test_outputaccumulator.py
index 097e59f5..9de649b4 100644
--- a/tests/unit_tests/test_outputaccumulator.py
+++ b/tests/unit_tests/test_outputaccumulator.py
@@ -98,9 +98,9 @@ def prepare_input_data():
 
     for i, raw_folder in enumerate(raw_folders):
         os.makedirs(raw_folder, exist_ok=True)
-        psm_dfs[i].to_csv(os.path.join(raw_folder, "psm.tsv"), sep="\t", index=False)
-        fragment_dfs[i].to_csv(
-            os.path.join(raw_folder, "frag.tsv"), sep="\t", index=False
+        psm_dfs[i].to_parquet(os.path.join(raw_folder, "psm.parquet"), index=False)
+        fragment_dfs[i].to_parquet(
+            os.path.join(raw_folder, "frag.parquet"), index=False
         )
 
     return config, temp_folder, raw_folders, psm_dfs, fragment_dfs
diff --git a/tests/unit_tests/test_outputtransform.py b/tests/unit_tests/test_outputtransform.py
index 91a18926..bdbe038a 100644
--- a/tests/unit_tests/test_outputtransform.py
+++ b/tests/unit_tests/test_outputtransform.py
@@ -28,6 +28,7 @@ def test_output_transform():
             "normalize_lfq": True,
             "peptide_level_lfq": False,
             "precursor_level_lfq": False,
+            "file_format": "parquet",
         },
     }
 
@@ -52,8 +53,8 @@ def test_output_transform():
             fragment_base_df["precursor_idx"].isin(psm_df["precursor_idx"])
         ]
 
-        frag_df.to_csv(os.path.join(raw_folder, "frag.tsv"), sep="\t", index=False)
-        psm_df.to_csv(os.path.join(raw_folder, "psm.tsv"), sep="\t", index=False)
+        frag_df.to_parquet(os.path.join(raw_folder, "frag.parquet"), index=False)
+        psm_df.to_parquet(os.path.join(raw_folder, "psm.parquet"), index=False)
 
     output = outputtransform.SearchPlanOutput(config, temp_folder)
     _ = output.build_precursor_table(raw_folders, save=True)
@@ -61,8 +62,8 @@ def test_output_transform():
     _ = output.build_lfq_tables(raw_folders, save=True)
 
     # validate psm_df output
-    psm_df = pd.read_csv(
-        os.path.join(temp_folder, f"{output.PRECURSOR_OUTPUT}.tsv"), sep="\t"
+    psm_df = pd.read_parquet(
+        os.path.join(temp_folder, f"{output.PRECURSOR_OUTPUT}.parquet"),
     )
     assert all(
         [
@@ -91,7 +92,7 @@ def test_output_transform():
     assert all([col in stat_df.columns for col in ["run", "precursors", "proteins"]])
 
     # validate protein_df output
-    protein_df = pd.read_csv(os.path.join(temp_folder, "pg.matrix.tsv"), sep="\t")
+    protein_df = pd.read_parquet(os.path.join(temp_folder, "pg.matrix.parquet"))
     assert all([col in protein_df.columns for col in ["run_0", "run_1", "run_2"]])
 
     for i in run_columns:
diff --git a/tests/unit_tests/test_reporting.py b/tests/unit_tests/test_reporting.py
index ed536de0..5d171f74 100644
--- a/tests/unit_tests/test_reporting.py
+++ b/tests/unit_tests/test_reporting.py
@@ -7,23 +7,14 @@
 import sys
 import pytest
 
-from alphadia.workflow import reporting
-
+from conftest import random_tempfolder
 
-def _random_tempfolder():
-    tempdir = tempfile.gettempdir()
-    # 6 alphanumeric characters
-    random_foldername = "".join(
-        np.random.choice(list("abcdefghijklmnopqrstuvwxyz0123456789"), 6)
-    )
-    path = os.path.join(tempdir, random_foldername)
-    os.mkdir(path)
-    return path
+from alphadia.workflow import reporting
 
 
 @pytest.mark.skipif(sys.platform == "win32", reason="does not run on windows")
 def test_logging():
-    tempfolder = _random_tempfolder()
+    tempfolder = random_tempfolder()
 
     if os.path.exists(os.path.join(tempfolder, "log.txt")):
         os.remove(os.path.join(tempfolder, "log.txt"))
@@ -58,7 +49,7 @@ def test_backend():
 
 
 def test_figure_backend():
-    tempfolder = _random_tempfolder()
+    tempfolder = random_tempfolder()
 
     figure_backend = reporting.FigureBackend(path=tempfolder)
 
@@ -79,7 +70,7 @@ def test_figure_backend():
 
 
 def test_jsonl_backend():
-    tempfolder = _random_tempfolder()
+    tempfolder = random_tempfolder()
 
     with reporting.JSONLBackend(path=tempfolder) as jsonl_backend:
         jsonl_backend.log_event("start_extraction", None)
@@ -96,7 +87,7 @@ def test_jsonl_backend():
 
 @pytest.mark.skipif(sys.platform == "win32", reason="does not run on windows")
 def test_log_backend():
-    tempfolder = _random_tempfolder()
+    tempfolder = random_tempfolder()
 
     if os.path.exists(os.path.join(tempfolder, "log.txt")):
         os.remove(os.path.join(tempfolder, "log.txt"))
@@ -117,7 +108,7 @@ def test_log_backend():
 
 @pytest.mark.skipif(sys.platform == "win32", reason="does not run on windows")
 def test_pipeline():
-    tempfolder = _random_tempfolder()
+    tempfolder = random_tempfolder()
 
     pipeline = reporting.Pipeline(
         backends=[

From 537a800ca55362dd2cb8f59fbfb0b7142c6064ba Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Mon, 10 Jun 2024 20:54:53 -0400
Subject: [PATCH 28/48] tl step

---
 alphadia/constants/default.yaml            | 15 ++++++++-
 alphadia/outputtransform.py                | 38 +++++++++++++++++++---
 alphadia/planning.py                       |  2 +-
 alphadia/workflow/peptidecentric.py        |  4 +--
 nbs/tutorial_nbs/finetuning.ipynb          | 31 +++++++++++++++++-
 tests/unit_tests/test_outputaccumulator.py |  8 ++---
 6 files changed, 84 insertions(+), 14 deletions(-)

diff --git a/alphadia/constants/default.yaml b/alphadia/constants/default.yaml
index 4833a500..089eff9e 100644
--- a/alphadia/constants/default.yaml
+++ b/alphadia/constants/default.yaml
@@ -141,7 +141,7 @@ optimization_manager:
 
 # This section controls transfer learning
 # currently only the library is created with transfer learning
-transfer_learning:
+transfer_library:
   # if true, the library is created with transfer learning
   enabled: False
 
@@ -166,6 +166,19 @@ transfer_learning:
   # include only fragments with a XIC correlation at least 0.75 of the median for all fragments
   fragment_correlation_ratio: 0.75
 
+transfer_learning:
+  enabled: False
+  batch_size: 2000
+  max_lr: 0.0001
+  train_ratio: 0.8
+  test_interval: 1
+  lr_patience: 3
+  minimum_psms: 1200
+  epochs: 51
+  warmup_epochs: 5
+  nce: 25
+  instrument: 'Lumos'
+
 # configuration for the calibration manager
 # the config has to start with the calibration keyword and consists of a list of calibration groups.
 # each group consists of datapoints which have multiple properties.
diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py
index 75a0db4b..7095530b 100644
--- a/alphadia/outputtransform.py
+++ b/alphadia/outputtransform.py
@@ -11,6 +11,7 @@
     AccumulationBroadcaster,
 )
 from alphadia.consensus.utils import read_df, write_df
+from alphadia.transferlearning.train import FinetuneManager
 
 import pandas as pd
 import numpy as np
@@ -307,6 +308,7 @@ class SearchPlanOutput:
     PG_OUTPUT = "protein_groups"
     LIBRARY_OUTPUT = "speclib.mbr"
     TRANSFER_OUTPUT = "speclib.transfer"
+    TRANSFER_MODEL = "peptdeep.transfer"
 
     def __init__(self, config: dict, output_folder: str):
         """Combine individual searches into and build combined outputs
@@ -374,9 +376,35 @@ def build(
         _ = self.build_lfq_tables(folder_list, psm_df=psm_df, save=True)
         _ = self.build_library(base_spec_lib, psm_df=psm_df, save=True)
 
-        if self.config["transfer_learning"]["enabled"]:
+        if self.config["transfer_library"]["enabled"]:
             _ = self.build_transfer_library(folder_list, save=True)
 
+        if self.config["transfer_learning"]["enabled"]:
+            _ = self.build_transfer_model()
+
+    def build_transfer_model(self):
+        logger.progress("Train PeptDeep Models")
+        transfer_lib = SpecLibBase()
+        transfer_lib.load_hdf(
+            os.path.join(self.output_folder, f"{self.TRANSFER_OUTPUT}.hdf"),
+            load_mod_seq=True,
+        )
+
+        device = "cpu"
+        if self.config["general"]["use_gpu"]:
+            device = "mps" if os.uname().sysname == "Darwin" else "gpu"
+
+        tune_mgr = FinetuneManager(
+            device=device, settings=self.config["transfer_learning"]
+        )
+        stats = tune_mgr.finetune_rt(transfer_lib.precursor_df)
+        stats = tune_mgr.finetune_charge(transfer_lib.precursor_df)
+        stats = tune_mgr.finetune_ms2(
+            transfer_lib.precursor_df.copy(), transfer_lib.fragment_intensity_df.copy()
+        )
+
+        tune_mgr.save_models(os.path.join(self.output_folder, self.TRANSFER_MODEL))
+
     def build_transfer_library(
         self,
         folder_list: List[str],
@@ -408,12 +436,12 @@ def build_transfer_library(
         """
         logger.progress("======== Building transfer library ========")
         transferAccumulator = TransferLearningAccumulator(
-            keep_top=self.config["transfer_learning"]["top_k_samples"],
-            norm_delta_max=self.config["transfer_learning"]["norm_delta_max"],
-            precursor_correlation_cutoff=self.config["transfer_learning"][
+            keep_top=self.config["transfer_library"]["top_k_samples"],
+            norm_delta_max=self.config["transfer_library"]["norm_delta_max"],
+            precursor_correlation_cutoff=self.config["transfer_library"][
                 "precursor_correlation_cutoff"
             ],
-            fragment_correlation_ratio=self.config["transfer_learning"][
+            fragment_correlation_ratio=self.config["transfer_library"][
                 "fragment_correlation_ratio"
             ],
         )
diff --git a/alphadia/planning.py b/alphadia/planning.py
index f9fe1292..a92c7bdc 100644
--- a/alphadia/planning.py
+++ b/alphadia/planning.py
@@ -302,7 +302,7 @@ def run(
                     psm_df = workflow.requantify(psm_df)
                     psm_df = psm_df[psm_df["qval"] <= self.config["fdr"]["fdr"]]
 
-                if self.config["transfer_learning"]["enabled"]:
+                if self.config["transfer_library"]["enabled"]:
                     psm_df, frag_df = workflow.requantify_fragments(psm_df)
 
                 psm_df["run"] = raw_name
diff --git a/alphadia/workflow/peptidecentric.py b/alphadia/workflow/peptidecentric.py
index ad438625..f0dbfc83 100644
--- a/alphadia/workflow/peptidecentric.py
+++ b/alphadia/workflow/peptidecentric.py
@@ -1023,8 +1023,8 @@ def requantify_fragments(
             verbosity="progress",
         )
 
-        fragment_types = self.config["transfer_learning"]["fragment_types"].split(";")
-        max_charge = self.config["transfer_learning"]["max_charge"]
+        fragment_types = self.config["transfer_library"]["fragment_types"].split(";")
+        max_charge = self.config["transfer_library"]["max_charge"]
 
         self.reporter.log_string(
             f"creating library for charged fragment types: {fragment_types}",
diff --git a/nbs/tutorial_nbs/finetuning.ipynb b/nbs/tutorial_nbs/finetuning.ipynb
index 936d2d17..33c0204c 100644
--- a/nbs/tutorial_nbs/finetuning.ipynb
+++ b/nbs/tutorial_nbs/finetuning.ipynb
@@ -16,6 +16,35 @@
     "from alphadia.transferlearning.train import *\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'batch_size': 1000,\n",
+       " 'max_lr': 0.0005,\n",
+       " 'train_ratio': 0.8,\n",
+       " 'test_interval': 1,\n",
+       " 'lr_patience': 3,\n",
+       " 'minimum_psms': 1200,\n",
+       " 'epochs': 51,\n",
+       " 'warmup_epochs': 5,\n",
+       " 'nce': 25,\n",
+       " 'instrument': 'Lumos'}"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "settings"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 2,
@@ -988,7 +1017,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.18"
+   "version": "3.11.7"
   }
  },
  "nbformat": 4,
diff --git a/tests/unit_tests/test_outputaccumulator.py b/tests/unit_tests/test_outputaccumulator.py
index 9de649b4..063687d0 100644
--- a/tests/unit_tests/test_outputaccumulator.py
+++ b/tests/unit_tests/test_outputaccumulator.py
@@ -48,7 +48,7 @@ def prepare_input_data():
             "peptide_level_lfq": False,
             "precursor_level_lfq": False,
         },
-        "transfer_learning": {
+        "transfer_library": {
             "enabled": True,
             "fragment_types": "b;y",
             "max_charge": 2,
@@ -113,7 +113,7 @@ def test_complete_output_accumulation():
     """
     # Given:
     config, temp_folder, raw_folders, psm_dfs, fragment_dfs = prepare_input_data()
-    config["transfer_learning"]["top_k_samples"] = 2
+    config["transfer_library"]["top_k_samples"] = 2
 
     # When:
     output = outputtransform.SearchPlanOutput(config, temp_folder)
@@ -145,7 +145,7 @@ def test_selection_of_precursors():
     # Given:
     config, temp_folder, raw_folders, psm_dfs, fragment_dfs = prepare_input_data()
     keep_top = 2
-    config["transfer_learning"]["top_k_samples"] = keep_top
+    config["transfer_library"]["top_k_samples"] = keep_top
     # When:
     output = outputtransform.SearchPlanOutput(config, temp_folder)
     _ = output.build_transfer_library(raw_folders, save=True)
@@ -186,7 +186,7 @@ def test_keep_top_constraint():
     # Given:
     config, temp_folder, raw_folders, psm_dfs, fragment_dfs = prepare_input_data()
     keep_top = 2
-    config["transfer_learning"]["top_k_samples"] = keep_top
+    config["transfer_library"]["top_k_samples"] = keep_top
 
     # When:
     output = outputtransform.SearchPlanOutput(config, temp_folder)

From be14409f6f020be2372b5e70371f0ccc6321a913 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Mon, 10 Jun 2024 21:24:10 -0400
Subject: [PATCH 29/48] bugfixes

---
 alphadia/outputtransform.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py
index 7095530b..0aef4105 100644
--- a/alphadia/outputtransform.py
+++ b/alphadia/outputtransform.py
@@ -384,9 +384,18 @@ def build(
 
     def build_transfer_model(self):
         logger.progress("Train PeptDeep Models")
+
+        transfer_lib_path = os.path.join(
+            self.output_folder, f"{self.TRANSFER_OUTPUT}.hdf"
+        )
+        if not os.path.exists:
+            raise ValueError(
+                f"Transfer learning library was not found at {self.TRANSFER_OUTPUT}.hdf. Did you enable library generation?"
+            )
+
         transfer_lib = SpecLibBase()
         transfer_lib.load_hdf(
-            os.path.join(self.output_folder, f"{self.TRANSFER_OUTPUT}.hdf"),
+            transfer_lib_path,
             load_mod_seq=True,
         )
 

From 3be7a4e627d2c232a2afcbdf3d66a829d4e79ee4 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Mon, 10 Jun 2024 21:29:02 -0400
Subject: [PATCH 30/48] add parameter description

---
 alphadia/constants/default.yaml | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/alphadia/constants/default.yaml b/alphadia/constants/default.yaml
index 089eff9e..070bf523 100644
--- a/alphadia/constants/default.yaml
+++ b/alphadia/constants/default.yaml
@@ -142,7 +142,7 @@ optimization_manager:
 # This section controls transfer learning
 # currently only the library is created with transfer learning
 transfer_library:
-  # if true, the library is created with transfer learning
+  # if true, the library is created for transfer learning
   enabled: False
 
   # semicolon separated list of fragment types to include in the library. possible values are 'a', 'b', 'c', 'x', 'y', 'z'
@@ -167,16 +167,39 @@ transfer_library:
   fragment_correlation_ratio: 0.75
 
 transfer_learning:
+
+  # if true, a custom peptdeep model will be created using the transfer learned library
   enabled: False
+
+  # number of precursors per batch
   batch_size: 2000
+
+  # maximum learning rate per batch.
+  # The maximum learning rate will be reached after a warmup phase and decreased using a plateau scheduler
   max_lr: 0.0001
+
+  # TODO remove and replaced by fixed 70:20:10 split
   train_ratio: 0.8
+
+  # test every n intervals
   test_interval: 1
+
+  # learning rate patience after which the lr will be halved
   lr_patience: 3
-  minimum_psms: 1200
+
+  # minimum precursor number to perform transfer learning
+  minimum_psms: 10000
+
+  # maximum number of epochs
   epochs: 51
+
+  # number of warmup epochs during which the lr is ramped up
   warmup_epochs: 5
+
+  # normalised collision energy encoded during training
   nce: 25
+
+  # instrument type encoded during training
   instrument: 'Lumos'
 
 # configuration for the calibration manager

From 989705d4a89cede318e0fa717333bf23264ad6f7 Mon Sep 17 00:00:00 2001
From: Mohamed Sameh <mohamedsameh2210@gmail.com>
Date: Tue, 11 Jun 2024 19:12:08 +0200
Subject: [PATCH 31/48] log trace error in callback

---
 alphadia/outputaccumulator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/alphadia/outputaccumulator.py b/alphadia/outputaccumulator.py
index 5328acab..6f993000 100644
--- a/alphadia/outputaccumulator.py
+++ b/alphadia/outputaccumulator.py
@@ -239,7 +239,7 @@ def process_folder(folder):
 
 
 def error_callback(e):
-    logger.error(e)
+    logger.error(e, exc_info=True)
 
 
 class AccumulationBroadcaster:

From 68b6b00910742e66327f86235cdd1131652eaa8e Mon Sep 17 00:00:00 2001
From: Mohamed Sameh <mohamedsameh2210@gmail.com>
Date: Wed, 12 Jun 2024 22:06:56 +0200
Subject: [PATCH 32/48] fix: output accumulator test failure

---
 alphadia/outputaccumulator.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/alphadia/outputaccumulator.py b/alphadia/outputaccumulator.py
index 6f993000..251ae7fa 100644
--- a/alphadia/outputaccumulator.py
+++ b/alphadia/outputaccumulator.py
@@ -167,7 +167,8 @@ def parse_output_folder(
             self._precursor_df[col] = values
 
         # ----------------- Fragment -----------------
-
+        # Filer fragments that are not used in the precursors
+        frag_df = frag_df[frag_df["precursor_idx"].isin(self._precursor_df["precursor_idx"])]
         self._fragment_df = frag_df[
             ["mz", "intensity", "precursor_idx", "frag_idx", "correlation"]
         ].copy()

From 258a6a7b41599851b11b69a99d51ba762e00d5c9 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Wed, 12 Jun 2024 14:03:48 -0700
Subject: [PATCH 33/48] implement changes

---
 alphadia/workflow/peptidecentric.py | 47 ++++++++++++++---------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/alphadia/workflow/peptidecentric.py b/alphadia/workflow/peptidecentric.py
index cfc2d627..da8e831e 100644
--- a/alphadia/workflow/peptidecentric.py
+++ b/alphadia/workflow/peptidecentric.py
@@ -1102,25 +1102,7 @@ def _build_candidate_speclib_flat(
     psm_df: pd.DataFrame,
     fragment_types: typing.List[str] = ["b", "y"],
     max_charge: int = 2,
-    optional_columns: typing.List[str] = [
-        "proba",
-        "score",
-        "qval",
-        "channel",
-        "rt_library",
-        "mz_library",
-        "mobility_library",
-        "genes",
-        "proteins",
-        "decoy",
-        "mods",
-        "mod_sites",
-        "sequence",
-        "charge",
-        "rt_observed",
-        "mobility_observed",
-        "mz_observed",
-    ],
+    optional_columns: typing.Union[typing.List[str], None] = None,
 ) -> typing.Tuple[SpecLibFlat, pd.DataFrame]:
     """Build a candidate spectral library for transfer learning.
 
@@ -1163,14 +1145,31 @@ def _build_candidate_speclib_flat(
     scored_candidates: pd.DataFrame
         Dataframe with scored candidates
     """
-    # remove decoys
-    # psm_df = psm_df[psm_df["decoy"] == 0]
 
-    # make copy to avoid modifying the original dataframe
-    _optional_columns = [col for col in optional_columns if col in psm_df.columns]
+    # set default optional columns
+    if optional_columns is None:
+        optional_columns = [
+            "proba",
+            "score",
+            "qval",
+            "channel",
+            "rt_library",
+            "mz_library",
+            "mobility_library",
+            "genes",
+            "proteins",
+            "decoy",
+            "mods",
+            "mod_sites",
+            "sequence",
+            "charge",
+            "rt_observed",
+            "mobility_observed",
+            "mz_observed",
+        ]
 
     scored_candidates = plexscoring.candidate_features_to_candidates(
-        psm_df, optional_columns=_optional_columns
+        psm_df, optional_columns=optional_columns
     )
 
     # create speclib with fragment_types of interest

From ee7e59d580fdd39d6da051efd492c4bd3d9d583a Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Wed, 12 Jun 2024 17:37:08 -0700
Subject: [PATCH 34/48] implement comments

---
 alphadia/planning.py           | 3 +--
 alphadia/workflow/reporting.py | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/alphadia/planning.py b/alphadia/planning.py
index 9c13f287..803ddc5a 100644
--- a/alphadia/planning.py
+++ b/alphadia/planning.py
@@ -125,8 +125,7 @@ def __init__(
 
         # set log level
         level_to_set = self.config["general"]["log_level"]
-        level_code = logging.getLevelNamesMapping().get(level_to_set)
-        if level_code is None:
+        if (level_code := logging.getLevelNamesMapping().get(level_to_set)) is None:
             logger.error(f"Setting logging to unknown level {level_to_set}")
         else:
             logger.setLevel(level_code)
diff --git a/alphadia/workflow/reporting.py b/alphadia/workflow/reporting.py
index 63acf82f..b92e84c4 100644
--- a/alphadia/workflow/reporting.py
+++ b/alphadia/workflow/reporting.py
@@ -23,7 +23,7 @@
 # As soon as its instantiated the default logger will be configured with a path to save the log file
 __is_initiated__ = False
 
-# Add a new logging level to the default logger
+# Add a new logging level to the default logger, level 21 is just above INFO (20)
 # This has to happen at load time to make the .progress() method available even if no logger is instantiated
 PROGRESS_LEVELV_NUM = 21
 logging.PROGRESS = PROGRESS_LEVELV_NUM

From db1e3f4c8cae05085f52ac9c43e0f6818c8b9025 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Wed, 12 Jun 2024 18:03:53 -0700
Subject: [PATCH 35/48] implement fixes

---
 alphadia/consensus/utils.py     |  9 +++------
 alphadia/constants/default.yaml |  2 +-
 alphadia/outputtransform.py     |  4 ++--
 tests/unit_tests/conftest.py    | 12 ++++++++++++
 4 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/alphadia/consensus/utils.py b/alphadia/consensus/utils.py
index 07638fb6..084ded8d 100644
--- a/alphadia/consensus/utils.py
+++ b/alphadia/consensus/utils.py
@@ -26,11 +26,6 @@ def read_df(path_no_format, file_format="parquet"):
 
     """
 
-    if file_format not in supported_formats:
-        raise ValueError(
-            f"Provided unknown file format: {file_format}, supported_formats: {supported_formats}"
-        )
-
     file_path = f"{path_no_format}.{file_format}"
 
     if not os.path.exists(file_path):
@@ -45,7 +40,9 @@ def read_df(path_no_format, file_format="parquet"):
         return pd.read_csv(file_path, sep="\t")
 
     else:
-        raise ValueError("I don't know how you ended up here")
+        raise ValueError(
+            f"Provided unknown file format: {file_format}, supported_formats: {supported_formats}"
+        )
 
 
 def write_df(df, path_no_format, file_format="parquet"):
diff --git a/alphadia/constants/default.yaml b/alphadia/constants/default.yaml
index 4833a500..3d3a8112 100644
--- a/alphadia/constants/default.yaml
+++ b/alphadia/constants/default.yaml
@@ -130,7 +130,7 @@ search_output:
   min_nonnan: 3
   normalize_lfq: True
   # can be either "parquet" or "tsv"
-  file_format: "parquet"
+  file_format: "tsv"
 
 # configuration for the optimization manager
 # initial parameters, will nbe optimized
diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py
index 75a0db4b..f5db4491 100644
--- a/alphadia/outputtransform.py
+++ b/alphadia/outputtransform.py
@@ -580,7 +580,7 @@ def build_precursor_table(
             logger.info("Writing precursor output to disk")
             write_df(
                 psm_df,
-                os.path.join(self.output_folder, f"{self.PRECURSOR_OUTPUT}"),
+                os.path.join(self.output_folder, self.PRECURSOR_OUTPUT),
                 file_format=self.config["search_output"]["file_format"],
             )
 
@@ -631,7 +631,7 @@ def build_stat_df(
             logger.info("Writing stat output to disk")
             write_df(
                 stat_df,
-                os.path.join(self.output_folder, f"{self.STAT_OUTPUT}"),
+                os.path.join(self.output_folder, self.STAT_OUTPUT),
                 file_format="tsv",
             )
 
diff --git a/tests/unit_tests/conftest.py b/tests/unit_tests/conftest.py
index 5f977eb9..3ddd2017 100644
--- a/tests/unit_tests/conftest.py
+++ b/tests/unit_tests/conftest.py
@@ -24,6 +24,9 @@ def mock_precursor_df(
     n_precursor : int
         Number of precursors to generate
 
+    with_decoy : bool
+        If True, half of the precursors will be decoys
+
     Returns
     -------
 
@@ -188,6 +191,14 @@ def pytest_configure(config):
 
 
 def random_tempfolder():
+    """Create a randomly named temp folder in the system temp folder
+
+    Returns
+    -------
+    path : str
+        Path to the created temp folder
+
+    """
     tempdir = tempfile.gettempdir()
     # 6 alphanumeric characters
     random_foldername = "alphadia_" + "".join(
@@ -195,4 +206,5 @@ def random_tempfolder():
     )
     path = os.path.join(tempdir, random_foldername)
     os.makedirs(path, exist_ok=True)
+    print(f"Created temp folder: {path}")
     return path

From 10ac05a93dda8922c2f7a7dc0a2e3278536a7e21 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Thu, 13 Jun 2024 12:16:19 -0700
Subject: [PATCH 36/48] remove misc config

---
 misc/config/default.yaml | 186 ---------------------------------------
 1 file changed, 186 deletions(-)
 delete mode 100644 misc/config/default.yaml

diff --git a/misc/config/default.yaml b/misc/config/default.yaml
deleted file mode 100644
index b3d1d7a5..00000000
--- a/misc/config/default.yaml
+++ /dev/null
@@ -1,186 +0,0 @@
-# configuration for the extraction plan
-version: 1
-
-general:
-  thread_count: 10
-  # maximum number of threads or processes to use per raw file
-  reuse_calibration: false
-  reuse_quant: false
-  astral_ms1: false
-  log_level: 'INFO'
-  wsl: false
-  mmap_detector_events: false
-  use_gpu: true
-
-library_loading:
-  rt_heuristic: 180
-  # if retention times are reported in absolute units, the rt_heuristic defines rt is interpreted as minutes or seconds
-
-library_prediction:
-  predict: False
-  enzyme: trypsin
-  fixed_modifications: 'Carbamidomethyl@C'
-  variable_modifications: 'Oxidation@M;Acetyl@Protein N-term'
-  max_var_mod_num: 2
-  missed_cleavages: 1
-  precursor_len:
-    - 7
-    - 35
-  precursor_charge:
-    - 2
-    - 4
-  precursor_mz:
-    - 400
-    - 1200
-  fragment_mz:
-    - 200
-    - 2000
-  nce: 25.0
-  instrument: QE
-  save_hdf: True
-
-search:
-  channel_filter: '0'
-  exclude_shared_ions: True
-  compete_for_fragments: True
-
-  target_num_candidates: 2
-  target_ms1_tolerance: 15
-  target_ms2_tolerance: 15
-  target_mobility_tolerance: 0.04
-  target_rt_tolerance: 60
-
-  quant_window: 3
-
-search_advanced:
-  top_k_fragments: 12
-
-calibration:
-  min_epochs: 3
-  max_epochs: 20
-  batch_size: 8000
-  recalibration_target: 200
-  final_full_calibration: False
-  norm_rt_mode: 'linear'
-
-search_initial:
-  initial_num_candidates: 1
-  initial_ms1_tolerance: 30
-  initial_ms2_tolerance: 30
-  initial_mobility_tolerance: 0.08
-  initial_rt_tolerance: 240
-
-selection_config:
-  peak_len_rt: 10.
-  sigma_scale_rt: 0.5
-  peak_len_mobility: 0.01
-  sigma_scale_mobility: 1.
-
-  top_k_precursors: 3
-  kernel_size: 30
-
-  f_mobility: 1.0
-  f_rt: 0.99
-  center_fraction: 0.5
-  min_size_mobility: 8
-  min_size_rt: 3
-  max_size_mobility: 50
-  max_size_rt: 15
-
-  group_channels: False
-  use_weighted_score: True
-
-  join_close_candidates: True
-  join_close_candidates_scan_threshold: 0.01
-  join_close_candidates_cycle_threshold: 0.6
-
-scoring_config:
-  score_grouped: false
-  top_k_isotopes: 3
-  reference_channel: -1
-  precursor_mz_tolerance: 10
-  fragment_mz_tolerance: 15
-
-multiplexing:
-  multiplexed_quant: False
-  target_channels: '4,8'
-  decoy_channel: 12
-  reference_channel: 0
-  competetive_scoring: True
-
-fdr:
-  fdr: 0.01
-  group_level: 'proteins'
-  competetive_scoring: true
-  keep_decoys: false
-  channel_wise_fdr: false
-  inference_strategy: "heuristic"
-
-search_output:
-  peptide_level_lfq: false
-  precursor_level_lfq: false
-  min_k_fragments: 12
-  min_correlation: 0.9
-  num_samples_quadratic: 50
-  min_nonnan: 3
-  normalize_lfq: True
-
-# configuration for the optimization manager
-# initial parameters, will nbe optimized
-optimization_manager:
-  fwhm_rt: 5
-  fwhm_mobility: 0.01
-  score_cutoff: 0
-
-# configuration for the calibration manager
-# the config has to start with the calibration keyword and consists of a list of calibration groups.
-# each group consists of datapoints which have multiple properties.
-# This can be for example precursors (mz, rt ...), fragments (mz, ...), quadrupole (transfer_efficiency)
-calibration_manager:
-  - name: fragment
-    estimators:
-      - name: mz
-        model: LOESSRegression
-        model_args:
-          n_kernels: 2
-        input_columns:
-          - mz_library
-        target_columns:
-          - mz_observed
-        output_columns:
-          - mz_calibrated
-        transform_deviation: 1e6
-  - name: precursor
-    estimators:
-        - name: mz
-          model: LOESSRegression
-          model_args:
-            n_kernels: 2
-          input_columns:
-            - mz_library
-          target_columns:
-            - mz_observed
-          output_columns:
-            - mz_calibrated
-          transform_deviation: 1e6
-        - name: rt
-          model: LOESSRegression
-          model_args:
-            n_kernels: 6
-            uniform: True
-          input_columns:
-            - rt_library
-          target_columns:
-            - rt_observed
-          output_columns:
-            - rt_calibrated
-        - name: mobility
-          model: LOESSRegression
-          model_args:
-            n_kernels: 2
-          input_columns:
-            - mobility_library
-          target_columns:
-            - mobility_observed
-          output_columns:
-            - mobility_calibrated

From e34c050e76b650c112e6910ae3e23edc099d2f79 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Thu, 13 Jun 2024 12:27:07 -0700
Subject: [PATCH 37/48] remove uname

---
 alphadia/libtransform.py       |  9 +--------
 alphadia/outputtransform.py    |  4 +---
 alphadia/utils.py              | 24 ++++++++++++++++++++++++
 tests/unit_tests/test_utils.py | 12 ++++++++++++
 4 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/alphadia/libtransform.py b/alphadia/libtransform.py
index ba1e5045..27e9d009 100644
--- a/alphadia/libtransform.py
+++ b/alphadia/libtransform.py
@@ -298,14 +298,7 @@ def forward(self, input: SpecLibBase) -> SpecLibBase:
 
         input.charged_frag_types = charged_frag_types
 
-        # Check if CPU or GPU/MPS should be used
-        device = "cpu"
-        if self.use_gpu:
-            try:
-                device = "mps" if os.uname().sysname == "Darwin" else "gpu"
-            except AttributeError:
-                # Windows does not support uname
-                device = "gpu"
+        device = utils.get_torch_device(self.use_gpu)
 
         model_mgr = ModelManager(device=device)
         if self.checkpoint_folder_path is not None:
diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py
index 976642be..73e77b7f 100644
--- a/alphadia/outputtransform.py
+++ b/alphadia/outputtransform.py
@@ -399,9 +399,7 @@ def build_transfer_model(self):
             load_mod_seq=True,
         )
 
-        device = "cpu"
-        if self.config["general"]["use_gpu"]:
-            device = "mps" if os.uname().sysname == "Darwin" else "gpu"
+        device = utils.get_torch_device(self.config["general"]["use_gpu"])
 
         tune_mgr = FinetuneManager(
             device=device, settings=self.config["transfer_learning"]
diff --git a/alphadia/utils.py b/alphadia/utils.py
index 28048ef3..6bb4eb85 100644
--- a/alphadia/utils.py
+++ b/alphadia/utils.py
@@ -3,6 +3,7 @@
 from ctypes import Structure, c_double
 import typing
 import re
+import platform
 
 # alphadia imports
 
@@ -20,6 +21,29 @@
 ISOTOPE_DIFF = 1.0032999999999674
 
 
+def get_torch_device(use_gpu: bool = False):
+    """Get the torch device to be used.
+
+    Parameters
+    ----------
+
+    use_gpu : bool, optional
+        If True, use GPU if available, by default False
+
+    Returns
+    -------
+    str
+        Device to be used, either 'cpu', 'gpu' or 'mps'
+
+    """
+
+    device = "cpu"
+    if use_gpu:
+        device = "mps" if platform.system() == "Darwin" else "gpu"
+
+    return device
+
+
 @nb.njit
 def candidate_hash(precursor_idx, rank):
     # create a 64 bit hash from the precursor_idx, number and type
diff --git a/tests/unit_tests/test_utils.py b/tests/unit_tests/test_utils.py
index 399377e9..cdc05816 100644
--- a/tests/unit_tests/test_utils.py
+++ b/tests/unit_tests/test_utils.py
@@ -11,6 +11,7 @@
     wsl_to_windows,
     windows_to_wsl,
     merge_missing_columns,
+    get_torch_device,
 )
 
 
@@ -20,6 +21,17 @@
 import pytest
 
 
+@pytest.mark.parametrize("use_gpu", [True, False])
+def test_get_torch_device(use_gpu):
+    # given
+
+    # when
+    device = get_torch_device(use_gpu)
+
+    # then
+    assert device in ["gpu", "mps"] if use_gpu else "cpu"
+
+
 def test_amean0():
     test_array = np.random.random((10, 10))
 

From 1bd3ad5e30af6c8adf1931dceb63e384832de6f2 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Thu, 13 Jun 2024 12:29:38 -0700
Subject: [PATCH 38/48] assert output path

---
 alphadia/outputtransform.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py
index 73e77b7f..270580e0 100644
--- a/alphadia/outputtransform.py
+++ b/alphadia/outputtransform.py
@@ -388,10 +388,9 @@ def build_transfer_model(self):
         transfer_lib_path = os.path.join(
             self.output_folder, f"{self.TRANSFER_OUTPUT}.hdf"
         )
-        if not os.path.exists:
-            raise ValueError(
-                f"Transfer learning library was not found at {self.TRANSFER_OUTPUT}.hdf. Did you enable library generation?"
-            )
+        assert os.path.exists(
+            transfer_lib_path
+        ), f"Transfer library not found at {transfer_lib_path}, did you enable library generation?"
 
         transfer_lib = SpecLibBase()
         transfer_lib.load_hdf(

From f16f0b8d465f575cedf07f885e53b487b796329d Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Thu, 13 Jun 2024 12:40:45 -0700
Subject: [PATCH 39/48] check torch backends

---
 alphadia/utils.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/alphadia/utils.py b/alphadia/utils.py
index 6bb4eb85..db113a0c 100644
--- a/alphadia/utils.py
+++ b/alphadia/utils.py
@@ -4,6 +4,9 @@
 import typing
 import re
 import platform
+import torch
+
+logger = logging.getLogger()
 
 # alphadia imports
 
@@ -39,7 +42,12 @@ def get_torch_device(use_gpu: bool = False):
 
     device = "cpu"
     if use_gpu:
-        device = "mps" if platform.system() == "Darwin" else "gpu"
+        if platform.system() == "Darwin":
+            device = "mps" if torch.backends.mps.is_available() else "cpu"
+        else:
+            device = "gpu" if torch.cuda.is_available() else "cpu"
+
+    logger.info(f"Device set to {device}")
 
     return device
 

From c7a26e863196859c7b82fea3985db95ea2bd309c Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Thu, 13 Jun 2024 14:53:08 -0700
Subject: [PATCH 40/48] fix testcase

---
 tests/unit_tests/test_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit_tests/test_utils.py b/tests/unit_tests/test_utils.py
index cdc05816..df46e188 100644
--- a/tests/unit_tests/test_utils.py
+++ b/tests/unit_tests/test_utils.py
@@ -29,7 +29,7 @@ def test_get_torch_device(use_gpu):
     device = get_torch_device(use_gpu)
 
     # then
-    assert device in ["gpu", "mps"] if use_gpu else "cpu"
+    assert device in ["gpu", "mps", "cpu"]
 
 
 def test_amean0():

From 73ba9fb8ca267708fdf9cfef1dd9c8a0003f6211 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Thu, 13 Jun 2024 15:29:06 -0700
Subject: [PATCH 41/48] make tl accessible from gui

---
 gui/workflows/PeptideCentric.v1.json | 161 ++++++++++++++++++++++++++-
 1 file changed, 159 insertions(+), 2 deletions(-)

diff --git a/gui/workflows/PeptideCentric.v1.json b/gui/workflows/PeptideCentric.v1.json
index b891774c..1946b069 100644
--- a/gui/workflows/PeptideCentric.v1.json
+++ b/gui/workflows/PeptideCentric.v1.json
@@ -224,7 +224,7 @@
                 {
                     "id": "instrument",
                     "name": "Instrument",
-                    "value": "Fusion",
+                    "value": "Lumos",
                     "description": "Instrument used for ms2 spectrum prediction.",
                     "type": "dropdown",
                     "options": [
@@ -232,7 +232,7 @@
                         "QE",
                         "timsTOF",
                         "SciexTOF",
-                        "Fusion",
+                        "Lumos",
                         "Eclipse",
                         "Velos",
                         "Elite",
@@ -523,6 +523,163 @@
                     "type": "boolean"
                 }
             ]
+        },
+        {
+            "id": "transfer_library",
+            "name": "Transfer Library",
+            "hidden": false,
+            "parameters": [
+                {
+                    "id": "enabled",
+                    "name": "Enabled",
+                    "value": false,
+                    "description": "If true, transfer learnin training dataset is created.",
+                    "type": "boolean"
+                },
+                {
+                    "id": "fragment_types",
+                    "name": "Fragment types",
+                    "value": "b;y",
+                    "description": "Semicolon separated list of fragment types to include in the library. \n possible values are 'a', 'b', 'c', 'x', 'y', 'z'",
+                    "type": "string"
+                },
+                {
+                    "id": "max_charge",
+                    "name": "Maximum charge",
+                    "value": 2,
+                    "description": "Maximum charge for fragments.",
+                    "type": "integer"
+                },
+                {
+                    "id": "top_k_samples",
+                    "name": "Top k samples",
+                    "value": 3,
+                    "description": "If a given precursor appears multiple times in an experiment, only the top_k_samples with the highest scores are included in the library.",
+                    "type": "integer"
+                },
+                {
+                    "id": "norm_delta_max",
+                    "name": "Norm delta max",
+                    "value": true,
+                    "description": "Perform advanced rt calibration. \nIf set to false retention times will be normalised by the maximum retention time observed in the experiment. \nIf set to true, a combination of maximum normalisation and deviation from the calibration curve will be used.",
+                    "type": "boolean"
+                },
+                {
+                    "id": "precursor_correlation_cutoff",
+                    "name": "Precursor correlation cutoff",
+                    "value": 0.5,
+                    "description": "Use only precursors for ms2 training with a median XIC correlation above this threshold.",
+                    "type": "float"
+                },
+                {
+                    "id": "fragment_correlation_ratio",
+                    "name": "Fragment correlation ratio",
+                    "value": 0.75,
+                    "description": "Include only fragments with a XIC correlation at least 0.75 of the median for all fragments.",
+                    "type": "float"
+                }
+            ]
+        },
+        {
+            "id": "transfer_learning",
+            "name": "Transfer Learning",
+            "hidden": false,
+            "parameters": [
+                {
+                    "id": "enabled",
+                    "name": "Enabled",
+                    "value": false,
+                    "description": "If true, a custom peptdeep model will be created using the transfer learned library.",
+                    "type": "boolean"
+                },
+                {
+                    "id": "batch_size",
+                    "name": "Batch size",
+                    "value": 2000,
+                    "description": "Number of precursors per batch.",
+                    "type": "integer"
+                },
+                {
+                    "id": "max_lr",
+                    "name": "Maximum learning rate",
+                    "value": 0.0001,
+                    "description": "Maximum learning rate per batch. \nThe maximum learning rate will be reached after a warmup phase and decreased using a plateau scheduler.",
+                    "type": "float"
+                },
+                {
+                    "id": "train_ratio",
+                    "name": "Train ratio",
+                    "value": 0.8,
+                    "description": "TODO remove and replaced by fixed 70:20:10 split",
+                    "type": "float"
+                },
+                {
+                    "id": "test_interval",
+                    "name": "Test interval",
+                    "value": 1,
+                    "description": "Test every n intervals.",
+                    "type": "integer"
+                },
+                {
+                    "id": "lr_patience",
+                    "name": "Learning rate patience",
+                    "value": 3,
+                    "description": "Learning rate patience after which the lr will be halved.",
+                    "type": "integer"
+                },
+                {
+                    "id": "minimum_psms",
+                    "name": "Minimum precursor number",
+                    "value": 10000,
+                    "description": "Minimum precursor number to perform transfer learning.",
+                    "type": "integer"
+                },
+                {
+                    "id": "epochs",
+                    "name": "Number of epochs",
+                    "value": 51,
+                    "description": "Maximum number of epochs.",
+                    "type": "integer"
+                },
+                {
+                    "id": "warmup_epochs",
+                    "name": "Warmup epochs",
+                    "value": 5,
+                    "description": "Number of warmup epochs during which the lr is ramped up.",
+                    "type": "integer"
+                },
+                {
+                    "id": "nce",
+                    "name": "Normalized collision energy",
+                    "value": 25,
+                    "description": "Normalised collision energy encoded during training.",
+                    "type": "float"
+                },
+                {
+                    "id": "instrument",
+                    "name": "Instrument",
+                    "value": "Lumos",
+                    "description": "Instrument type encoded during training. \nThe same instrument type must be used for prediction using the trained model.",
+                    "type": "dropdown",
+                    "options": [
+                        "Astral",
+                        "QE",
+                        "timsTOF",
+                        "SciexTOF",
+                        "Lumos",
+                        "Eclipse",
+                        "Velos",
+                        "Elite",
+                        "OrbitrapTribrid",
+                        "ThermoTribrid",
+                        "QE+",
+                        "QEHF",
+                        "QEHFX",
+                        "Exploris",
+                        "Exploris480"
+                    ]
+                }
+            ]
         }
     ]
 }

From a10d8740fe54790bac4615219e343dc35fce4b2c Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Thu, 13 Jun 2024 15:30:16 -0700
Subject: [PATCH 42/48] hidden  by default

---
 gui/workflows/PeptideCentric.v1.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gui/workflows/PeptideCentric.v1.json b/gui/workflows/PeptideCentric.v1.json
index 1946b069..2ba11824 100644
--- a/gui/workflows/PeptideCentric.v1.json
+++ b/gui/workflows/PeptideCentric.v1.json
@@ -527,7 +527,7 @@
         {
             "id": "transfer_library",
             "name": "Transfer Library",
-            "hidden": false,
+            "hidden": true,
             "parameters": [
                 {
                     "id": "enabled",
@@ -583,7 +583,7 @@
         {
             "id": "transfer_learning",
             "name": "Transfer Learning",
-            "hidden": false,
+            "hidden": true,
             "parameters": [
                 {
                     "id": "enabled",

From 8e8378ac474b6d9c059c76914ed89c6fd978b6fd Mon Sep 17 00:00:00 2001
From: mschwoerer <82171591+mschwoer@users.noreply.github.com>
Date: Fri, 14 Jun 2024 09:48:58 +0200
Subject: [PATCH 43/48] BUG: fix issue with logging in python <3.11

---
 alphadia/planning.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/alphadia/planning.py b/alphadia/planning.py
index c2f2e976..51940f46 100644
--- a/alphadia/planning.py
+++ b/alphadia/planning.py
@@ -125,10 +125,8 @@ def __init__(
 
         # set log level
         level_to_set = self.config["general"]["log_level"]
-        if (level_code := logging.getLevelNamesMapping().get(level_to_set)) is None:
-            logger.error(f"Setting logging to unknown level {level_to_set}")
-        else:
-            logger.setLevel(level_code)
+        level_code = logging.getLevelName(level_to_set)
+        logger.setLevel(level_code)
 
         self.load_library()
 

From e71cc4ffddf493d347cfe0676d8bea0642682ebd Mon Sep 17 00:00:00 2001
From: mschwoerer <82171591+mschwoer@users.noreply.github.com>
Date: Fri, 14 Jun 2024 09:49:17 +0200
Subject: [PATCH 44/48] CHORE: nonzero exit status on error

---
 alphadia/cli.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/alphadia/cli.py b/alphadia/cli.py
index b0077a4d..8bf60b15 100644
--- a/alphadia/cli.py
+++ b/alphadia/cli.py
@@ -2,6 +2,8 @@
 
 # native imports
 import logging
+import sys
+
 import yaml
 import os
 import re
@@ -345,3 +347,4 @@ def run(*args, **kwargs):
 
         logger.info(traceback.format_exc())
         logger.error(e)
+        sys.exit(1)

From b74b79e77ddf841b9ef5388a301db15a7c691fca Mon Sep 17 00:00:00 2001
From: mschwoerer <82171591+mschwoer@users.noreply.github.com>
Date: Fri, 14 Jun 2024 09:50:05 +0200
Subject: [PATCH 45/48] CHORE: get rid of dots in log messages

---
 alphadia/cli.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/alphadia/cli.py b/alphadia/cli.py
index 8bf60b15..bd1009c9 100644
--- a/alphadia/cli.py
+++ b/alphadia/cli.py
@@ -316,13 +316,13 @@ def run(*args, **kwargs):
     for f in raw_path_list:
         logger.progress(f"  {os.path.basename(f)}")
 
-    logger.progress(f"Using library: {library_path}.")
+    logger.progress(f"Using library: {library_path}")
 
     logger.progress(f"Using {len(fasta_path_list)} fasta files:")
     for f in fasta_path_list:
         logger.progress(f"  {f}")
 
-    logger.progress(f"Saving output to {output_directory}.")
+    logger.progress(f"Saving output to: {output_directory}")
 
     try:
         import matplotlib

From 26e3ae13caf1bf544cce416acb042c275ec31223 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Fri, 14 Jun 2024 13:51:42 -0700
Subject: [PATCH 46/48] remove minimum_psms

---
 alphadia/constants/default.yaml      | 3 ---
 alphadia/transferlearning/train.py   | 3 +--
 gui/workflows/PeptideCentric.v1.json | 7 -------
 3 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/alphadia/constants/default.yaml b/alphadia/constants/default.yaml
index 86194da2..025c1bd8 100644
--- a/alphadia/constants/default.yaml
+++ b/alphadia/constants/default.yaml
@@ -208,9 +208,6 @@ transfer_learning:
   # learning rate patience after which the lr will be halved
   lr_patience: 3
 
-  # minimum precursor number to perform transfer learning
-  minimum_psms: 10000
-
   # maximum number of epochs
   epochs: 51
 
diff --git a/alphadia/transferlearning/train.py b/alphadia/transferlearning/train.py
index c558619b..23a80888 100644
--- a/alphadia/transferlearning/train.py
+++ b/alphadia/transferlearning/train.py
@@ -25,6 +25,7 @@
 import logging
 
 from alphadia.workflow import reporting
+
 logger = logging.getLogger()
 
 settings = {
@@ -35,7 +36,6 @@
     "test_interval": 1,
     "lr_patience": 3,
     # --------- Our settings ------------
-    "minimum_psms": 1200,
     "epochs": 51,
     "warmup_epochs": 5,
     # --------------------------
@@ -278,7 +278,6 @@ def _order_intensities(
         """
         reordered = unordered_frag_df.copy()
         for i in tqdm(range(len(reordered_precursor_df))):
-      
             new_start_idx = reordered_precursor_df.iloc[i]["frag_start_idx"]
             new_end_idx = reordered_precursor_df.iloc[i]["frag_stop_idx"]
 
diff --git a/gui/workflows/PeptideCentric.v1.json b/gui/workflows/PeptideCentric.v1.json
index 2ba11824..312f319a 100644
--- a/gui/workflows/PeptideCentric.v1.json
+++ b/gui/workflows/PeptideCentric.v1.json
@@ -627,13 +627,6 @@
                     "description": "Learning rate patience after which the lr will be halved.",
                     "type": "integer"
                 },
-                {
-                    "id": "minimum_psms",
-                    "name": "Minimum precursor number",
-                    "value": 10000,
-                    "description": "Minimum precursor number to perform transfer learning.",
-                    "type": "integer"
-                },
                 {
                     "id": "epochs",
                     "name": "Number of epochs",

From b278530cfc537c1fca04ecc46019856beb2513d7 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Fri, 14 Jun 2024 13:56:30 -0700
Subject: [PATCH 47/48] raise alphabase version

---
 requirements/requirements.txt       | 2 +-
 requirements/requirements_loose.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index c505c4e4..deb91af3 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -5,7 +5,7 @@ numba==0.59.1
 argparse==1.4.0
 alpharaw==0.4.5
 alphatims==1.0.8
-alphabase==1.2.4
+alphabase==1.2.5
 peptdeep==1.2.1
 progressbar==2.5
 neptune==1.10.4
diff --git a/requirements/requirements_loose.txt b/requirements/requirements_loose.txt
index 482be66c..5ce63f4c 100644
--- a/requirements/requirements_loose.txt
+++ b/requirements/requirements_loose.txt
@@ -4,7 +4,7 @@ numba
 argparse
 alpharaw>=0.3.1  # test: tolerate_version
 alphatims
-alphabase>=1.2.4 # test: tolerate_version
+alphabase>=1.2.5 # test: tolerate_version
 peptdeep>=1.2.1 # test: tolerate_version
 progressbar
 neptune

From 8b4cd6962e5e4a218ac8603a7dcd0f984800e7c4 Mon Sep 17 00:00:00 2001
From: GeorgWa <wallmann@biochem.mpg.de>
Date: Fri, 14 Jun 2024 15:05:36 -0700
Subject: [PATCH 48/48] =?UTF-8?q?Bump=20version:=201.6.2=20=E2=86=92=201.7?=
 =?UTF-8?q?.0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 alphadia/__init__.py                       | 2 +-
 docs/index.md                              | 2 +-
 gui/package.json                           | 2 +-
 gui/src/main/modules/profile.js            | 2 +-
 misc/.bumpversion.cfg                      | 2 +-
 release/macos/build_backend_macos.sh       | 2 +-
 release/macos/build_pkg_macos.sh           | 2 +-
 release/macos/build_zip_macos.sh           | 2 +-
 release/macos/distribution.xml             | 2 +-
 release/macos/info.plist                   | 4 ++--
 release/windows/alphadia_innoinstaller.iss | 4 ++--
 release/windows/build_backend.ps1          | 2 +-
 12 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/alphadia/__init__.py b/alphadia/__init__.py
index 363a78dc..25e7bf58 100644
--- a/alphadia/__init__.py
+++ b/alphadia/__init__.py
@@ -1,3 +1,3 @@
 #!python
 
-__version__ = "1.6.2"
+__version__ = "1.7.0"
diff --git a/docs/index.md b/docs/index.md
index 327dae11..88652f6d 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,6 +1,6 @@
 
 # AlphaDIA Documentation
-**Version:** 1.6.2 | [Github](https://github.com/MannLabs/alphadia)
+**Version:** 1.7.0 | [Github](https://github.com/MannLabs/alphadia)
 
 Open-source DIA search engine built with the alphaX ecosystem. Built with [alpharaw](https://github.com/MannLabs/alpharaw) and [alphatims](https://github.com/MannLabs/alphatims) for raw file acces. Spectral libraries are predicted with [peptdeep](https://github.com/MannLabs/alphapeptdeep) and managed by [alphabase](https://github.com/MannLabs/alphabase). Quantification is powered by [directLFQ](https://github.com/MannLabs/directLFQ).
 
diff --git a/gui/package.json b/gui/package.json
index bd21cbba..22f59176 100644
--- a/gui/package.json
+++ b/gui/package.json
@@ -1,7 +1,7 @@
 {
   "name": "alphadia",
   "productName": "alphadia-gui",
-  "version": "1.6.2",
+  "version": "1.7.0",
   "description": "Graphical user interface for DIA data analysis",
   "main": "dist/electron.js",
   "homepage": "./",
diff --git a/gui/src/main/modules/profile.js b/gui/src/main/modules/profile.js
index e92663f6..87a52193 100644
--- a/gui/src/main/modules/profile.js
+++ b/gui/src/main/modules/profile.js
@@ -7,7 +7,7 @@ const { dialog } = require('electron')
 const Profile = class {
 
     config = {
-        "version": "1.6.2",
+        "version": "1.7.0",
         "conda": {
             "envName": "alpha",
             "path": ""
diff --git a/misc/.bumpversion.cfg b/misc/.bumpversion.cfg
index 33021c64..9ab81833 100644
--- a/misc/.bumpversion.cfg
+++ b/misc/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.6.2
+current_version = 1.7.0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
diff --git a/release/macos/build_backend_macos.sh b/release/macos/build_backend_macos.sh
index 6a8b4d14..c8eff216 100755
--- a/release/macos/build_backend_macos.sh
+++ b/release/macos/build_backend_macos.sh
@@ -6,7 +6,7 @@ python -c "from huggingface_hub import get_full_repo_name; print('success')"
 
 pip install build
 python -m build
-pip install "dist/alphadia-1.6.2-py3-none-any.whl[stable]"
+pip install "dist/alphadia-1.7.0-py3-none-any.whl[stable]"
 
 # Creating the stand-alone pyinstaller folder
 pip install pyinstaller
diff --git a/release/macos/build_pkg_macos.sh b/release/macos/build_pkg_macos.sh
index f8254a55..d56e06b3 100755
--- a/release/macos/build_pkg_macos.sh
+++ b/release/macos/build_pkg_macos.sh
@@ -2,7 +2,7 @@
 
 # Set up package name and version
 PACKAGE_NAME="alphadia"
-PACKAGE_VERSION="1.6.2"
+PACKAGE_VERSION="1.7.0"
 
 ARCH=$(uname -m)
 if [ "$ARCH" == "x86_64" ]; then
diff --git a/release/macos/build_zip_macos.sh b/release/macos/build_zip_macos.sh
index 886b5b24..6a636b54 100755
--- a/release/macos/build_zip_macos.sh
+++ b/release/macos/build_zip_macos.sh
@@ -2,7 +2,7 @@
 
 # Set up package name and version
 PACKAGE_NAME="alphadia"
-PACKAGE_VERSION="1.6.2"
+PACKAGE_VERSION="1.7.0"
 
 ARCH=$(uname -m)
 if [ "$ARCH" == "x86_64" ]; then
diff --git a/release/macos/distribution.xml b/release/macos/distribution.xml
index 095ceb88..73121631 100644
--- a/release/macos/distribution.xml
+++ b/release/macos/distribution.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="utf-8" standalone="no"?>
 <installer-script minSpecVersion="1.000000">
-    <title>alphaDIA 1.6.2</title>
+    <title>alphaDIA 1.7.0</title>
     <background mime-type="image/png" file="alphadia.png" scaling="proportional"/>
     <welcome file="welcome.html" mime-type="text/html" />
     <conclusion file="conclusion.html" mime-type="text/html" />
diff --git a/release/macos/info.plist b/release/macos/info.plist
index 2d5e4643..0930aa58 100644
--- a/release/macos/info.plist
+++ b/release/macos/info.plist
@@ -9,9 +9,9 @@
 	<key>CFBundleIconFile</key>
 	<string>alphadia.icns</string>
 	<key>CFBundleIdentifier</key>
-	<string>alphadia.1.6.2</string>
+	<string>alphadia.1.7.0</string>
 	<key>CFBundleShortVersionString</key>
-	<string>1.6.2</string>
+	<string>1.7.0</string>
 	<key>CFBundleInfoDictionaryVersion</key>
 	<string>6.0</string>
 	<key>CFBundleName</key>
diff --git a/release/windows/alphadia_innoinstaller.iss b/release/windows/alphadia_innoinstaller.iss
index ad02b87c..10e3359c 100644
--- a/release/windows/alphadia_innoinstaller.iss
+++ b/release/windows/alphadia_innoinstaller.iss
@@ -2,7 +2,7 @@
 ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!
 
 #define MyAppName "AlphaDIA"
-#define MyAppVersion "1.6.2"
+#define MyAppVersion "1.7.0"
 #define MyAppPublisher "Max Planck Institute of Biochemistry, Mann Labs"
 #define MyAppURL "https://github.com/MannLabs/alphadia"
 #define MyAppExeName "alphadia-gui.exe"
@@ -25,7 +25,7 @@ LicenseFile=..\..\LICENSE.txt
 PrivilegesRequired=lowest
 PrivilegesRequiredOverridesAllowed=dialog
 OutputDir=..\..\dist
-OutputBaseFilename=alphadia-1.6.2-win-x64
+OutputBaseFilename=alphadia-1.7.0-win-x64
 SetupIconFile=..\logos\alphadia.ico
 Compression=lzma
 SolidCompression=yes
diff --git a/release/windows/build_backend.ps1 b/release/windows/build_backend.ps1
index dae15414..07705399 100644
--- a/release/windows/build_backend.ps1
+++ b/release/windows/build_backend.ps1
@@ -6,7 +6,7 @@ python -c 'from huggingface_hub import get_full_repo_name; print("success")'
 
 pip install build
 python -m build
-pip install "dist/alphadia-1.6.2-py3-none-any.whl[stable]"
+pip install "dist/alphadia-1.7.0-py3-none-any.whl[stable]"
 
 # Creating the stand-alone pyinstaller folder
 pip install pyinstaller tbb