Use constants

ICB-DCM · Feb 18, 2025 · aed2c04 · aed2c04
1 parent 3d71792
commit aed2c04
Show file tree

Hide file tree

Showing 6 changed files with 82 additions and 47 deletions.
diff --git a/src/ccompass/FDP.py b/src/ccompass/FDP.py
@@ -14,6 +14,7 @@
 from sklearn.preprocessing import MinMaxScaler
 
 from ._utils import unique_preserve_order
+from .core import IDENTIFIER, KEEP
 
 logger = logging.getLogger(__package__)
 
@@ -33,7 +34,7 @@ def create_dataset(
     - condition_id -> replicate_id -> DataFrame
     - all datasets have the same index (protein identifiers)
     """
-    conditions = [x for x in conditions if x not in ["", "[IDENTIFIER]"]]
+    conditions = [x for x in conditions if x not in ["", IDENTIFIER]]
 
     # collect all identifiers
     all_identifiers = list(
@@ -82,7 +83,7 @@ def create_dataset(
                     how="outer",
                 )
 
-                if condition == "[KEEP]":
+                if condition == KEEP:
                     if samplename + "_x" in data_new.columns:
                         # handle merge conflicts
                         for element in list(data_new.index):
@@ -129,9 +130,9 @@ def create_dataset(
         dataset[condition] = repdata
 
     data_keep = {}
-    if "[KEEP]" in dataset:
-        data_keep = dataset["[KEEP]"]
-        del dataset["[KEEP]"]
+    if KEEP in dataset:
+        data_keep = dataset[KEEP]
+        del dataset[KEEP]
 
     return dataset, data_keep, progress
 
@@ -528,7 +529,7 @@ def start_fract_data_processing(
         sample[1]
         for input_table in input_tables.values()
         for sample in input_table
-        if sample[1] != "[IDENTIFIER]"
+        if sample[1] != IDENTIFIER
     )
 
     # ---------------------------------------------------------------------
@@ -723,7 +724,7 @@ def sample_tables_are_valid(
 
     # validate samples table
     if not all(
-        any(sample[1] == "[IDENTIFIER]" for sample in input_table)
+        any(sample[1] == IDENTIFIER for sample in input_table)
         for input_table in input_tables.values()
     ):
         messagebox.showerror(

diff --git a/src/ccompass/MOA.py b/src/ccompass/MOA.py
@@ -9,7 +9,7 @@
 from scipy.stats import ttest_ind
 
 from ._utils import PrefixFilter
-from .core import ComparisonModel, ResultsModel, XYZ_Model
+from .core import KEEP, ComparisonModel, ResultsModel, XYZ_Model
 
 logger = logging.getLogger(__package__)
 
@@ -163,7 +163,7 @@ def stats_proteome(
 ):
     """Proteome prediction / statistics."""
     logger.info("Performing proteome prediction...")
-    conditions = [x for x in fract_conditions if x != "[KEEP]"]
+    conditions = [x for x in fract_conditions if x != KEEP]
     results = {}
 
     for condition in conditions:

diff --git a/src/ccompass/TPP.py b/src/ccompass/TPP.py
@@ -3,22 +3,23 @@
 import logging
 import math
 from tkinter import messagebox
-from typing import Any
+from typing import Any, Literal
 
 import FreeSimpleGUI as sg
 import numpy as np
 import pandas as pd
 from scipy.stats import pearsonr
 
 from ._utils import unique_preserve_order
+from .core import IDENTIFIER, KEEP
 
 logger = logging.getLogger(__package__)
 
 
 def create_dataset(
     tp_indata, tp_tables, tp_identifiers, tp_conditions, window
 ):
-    tp_conditions.remove("[IDENTIFIER]")
+    tp_conditions.remove(IDENTIFIER)
 
     idents = []
     for path in tp_tables:
@@ -50,7 +51,7 @@ def create_dataset(
                         left_index=True,
                         how="outer",
                     )
-                    if condition == "[KEEP]":
+                    if condition == KEEP:
                         if samplename + "_x" in data_new.columns:
                             for element in list(data_new.index):
                                 if pd.isnull(
@@ -87,9 +88,9 @@ def create_dataset(
 
         dataset[condition] = data_new
 
-    if "[KEEP]" in dataset:
-        data_keep = dataset["[KEEP]"]
-        del dataset["[KEEP]"]
+    if KEEP in dataset:
+        data_keep = dataset[KEEP]
+        del dataset[KEEP]
     else:
         data_keep = pd.DataFrame()
 
@@ -136,7 +137,9 @@ def transform_data(data, window):
     return data
 
 
-def impute_data(data, window, mode):
+def impute_data(
+    data, window: sg.Window | None, mode: Literal["normal", "constant"]
+):
     s = 1.8
     w = 0.3
     for condition in data:
@@ -253,7 +256,7 @@ def start_total_proteome_processing(
 ):
     # validate input
     if not all(
-        any("[IDENTIFIER]" == sample[1] for sample in table)
+        any(IDENTIFIER == sample[1] for sample in table)
         for table in tp_tables.values()
     ):
         messagebox.showerror("Error", "At least one Identifier is missing.")

diff --git a/src/ccompass/core.py b/src/ccompass/core.py
@@ -21,6 +21,18 @@
 logger = logging.getLogger(__name__)
 
 
+#: The value used in the sample tables as condition IDs to indicate the column
+#  that contains the protein IDs.
+IDENTIFIER = "[IDENTIFIER]"
+#: The value used in the sample tables as condition IDs to indicate the columns
+#  with values that should be carried forward to the final results.
+#  (And potentially be used for matching markers.)
+KEEP = "[KEEP]"
+#: The value used in the sample table for not applicable values.
+#  I.e. for "Replicate" and "Fraction" columns for KEEP and IDENTIFIER rows.
+NA = "-"
+
+
 class AppSettings(BaseModel):
     """Settings for the C-COMPASS application"""
 
@@ -315,10 +327,10 @@ class MarkerSet(BaseModel):
     #: column ID in `df` to match the fractionation data identifiers
     #  ("key column" in GUI)
     # "-" means unset
-    identifier_col: str = "-"
+    identifier_col: str = NA
     #: column ID in `df` that contains the class names
     # "-" means unset
-    class_col: str = "-"
+    class_col: str = NA
 
     @property
     def classes(self) -> list[str]:
@@ -392,7 +404,7 @@ class SessionModel(BaseModel):
     fract_preparams: dict[str, dict[str, Any]] = fract_default()
     #: The column ID of the fractionation DataFrame that is
     #  to be used for matching the markers (`marker_list["name"])
-    marker_fractkey: str = "[IDENTIFIER]"
+    marker_fractkey: str = IDENTIFIER
 
     ## User input markers
 

diff --git a/src/ccompass/main_gui.py b/src/ccompass/main_gui.py
@@ -16,6 +16,9 @@
 from . import MOA, RP, app_name, readthedocs_url, repository_url
 from ._gui_utils import wait_cursor
 from .core import (
+    IDENTIFIER,
+    KEEP,
+    NA,
     AppSettings,
     MarkerSet,
     SessionModel,
@@ -683,8 +686,8 @@ def create_spatial_prediction_frame() -> sg.Frame:
 def create_marker_selection_frame() -> sg.Frame:
     """Create the "Marker Selection" frame."""
     tt_fract_key = (
-        "The column in the fractionation data (marked [IDENTIFIER] or [KEEP]) "
-        "to match the key column of the marker table."
+        f"The column in the fractionation data (marked {IDENTIFIER} or {KEEP})"
+        " to match the key column of the marker table."
     )
     tt_marker_key = (
         "The column in the marker table to match the key column "
@@ -788,7 +791,7 @@ def create_marker_selection_frame() -> sg.Frame:
                             sg.Text("Fract. Key:", tooltip=tt_fract_key),
                             sg.Push(),
                             sg.Combo(
-                                ["[IDENTIFIER]"],
+                                [IDENTIFIER],
                                 key="-marker_fractkey-",
                                 size=(18, 1),
                                 readonly=True,
@@ -1436,7 +1439,7 @@ def _handle_match_markers(self, values: dict):
                     self.model.fract_test,
                 ) = create_marker_profiles(
                     self.model.fract_data,
-                    values["-marker_fractkey-"],
+                    self.model.marker_fractkey,
                     self.model.fract_info,
                     self.model.marker_list,
                 )
@@ -1453,12 +1456,12 @@ def _handle_match_markers(self, values: dict):
     def _handle_training(self, key: str):
         """Handle click on "Train C-COMPASS!" button."""
         # FIXME: `stds` is not in the format expected for upsampling
-        if key == "[IDENTIFIER]":
+        if key == IDENTIFIER:
             stds = self.model.fract_std
         else:
             # Add the required column and set as index
             conditions_std = [
-                x for x in self.model.fract_conditions if x != "[KEEP]"
+                x for x in self.model.fract_conditions if x != KEEP
             ]
             stds = {}
             for condition in conditions_std:
@@ -1584,7 +1587,7 @@ def _handle_session_new(self):
             values=[], size=self.main_window["-marker_class-"].Size
         )
         self.main_window["-marker_fractkey-"].update(
-            values=["[IDENTIFIER]"] + list(self.model.fract_info)
+            values=[IDENTIFIER] + list(self.model.fract_info)
         )
 
     def _handle_session_open(self):
@@ -1613,7 +1616,7 @@ def _handle_session_open(self):
             )
 
         self.main_window["-marker_fractkey-"].update(
-            values=["[IDENTIFIER]"] + list(self.model.fract_info),
+            values=[IDENTIFIER] + list(self.model.fract_info),
             value=self.model.marker_fractkey,
         )
         self.app_settings.last_session_dir = Path(filename).parent
@@ -1691,7 +1694,7 @@ def _handle_process_fract_data(self):
         )
 
         self.main_window["-marker_fractkey-"].update(
-            values=["[IDENTIFIER]"] + list(self.model.fract_info)
+            values=[IDENTIFIER] + list(self.model.fract_info)
         )
 
         if self.model.fract_data["class"]:
@@ -1710,7 +1713,7 @@ def _handle_reset_fract_data(self):
         fract_buttons(self.main_window, False)
 
         self.main_window["-marker_fractkey-"].update(
-            values=["[IDENTIFIER]"], value=""
+            values=[IDENTIFIER], value=""
         )
 
     def _handle_reset_total_proteome(self):
@@ -1911,9 +1914,9 @@ def fract_set_keep(values, window, fract_tables):
     path = values["-fractionation_path-"]
     table = fract_tables[path]
     for pos in values["-fractionation_table-"]:
-        table[pos][1] = "[KEEP]"
-        table[pos][2] = "-"
-        table[pos][3] = "-"
+        table[pos][1] = KEEP
+        table[pos][2] = NA
+        table[pos][3] = NA
     fract_tables[path] = table
     window["-fractionation_table-"].update(values=fract_tables[path])
 
@@ -1983,9 +1986,9 @@ def fract_handle_set_identifier(
                 table[ident_pos[path][0]][3] = ""
             identifiers[path] = table[pos[0]][0]
             ident_pos[path] = pos
-            table[pos[0]][1] = "[IDENTIFIER]"
-            table[pos[0]][2] = "-"
-            table[pos[0]][3] = "-"
+            table[pos[0]][1] = IDENTIFIER
+            table[pos[0]][2] = NA
+            table[pos[0]][3] = NA
             input_tables[path] = table
             window["-fractionation_table-"].update(
                 values=input_tables[values["-fractionation_path-"]]
@@ -2081,7 +2084,7 @@ def tp_set_keep(values, window, tp_tables):
     path = values["-tp_path-"]
     table = tp_tables[path]
     for pos in values["-tp_table-"]:
-        table[pos][1] = "[KEEP]"
+        table[pos][1] = KEEP
     tp_tables[path] = table
     window["-tp_table-"].update(values=tp_tables[path])
 
@@ -2114,7 +2117,7 @@ def tp_set_identifier(values, window, tp_tables, tp_pos, tp_identifiers):
                 table[tp_pos[path][0]][1] = ""
             tp_identifiers[path] = table[pos[0]][0]
             tp_pos[path] = pos
-            table[pos[0]][1] = "[IDENTIFIER]"
+            table[pos[0]][1] = IDENTIFIER
             tp_tables[path] = table
             window["-tp_table-"].update(values=tp_tables[values["-tp_path-"]])
         else:
@@ -2178,7 +2181,7 @@ def check_markers(marker_sets: dict[str, MarkerSet]) -> bool:
         return False
 
     for marker_set in marker_sets.values():
-        if marker_set.identifier_col == "-" or marker_set.class_col == "-":
+        if marker_set.identifier_col == NA or marker_set.class_col == NA:
             return False
 
     return True
@@ -2224,10 +2227,10 @@ def refresh_markercols(window, values, marker_sets: dict[str, MarkerSet]):
         logger.exception("Error")
 
         window["-marker_key-"].update(
-            values=[], value="-", size=window["-marker_key-"].Size
+            values=[], value=NA, size=window["-marker_key-"].Size
         )
         window["-marker_class-"].update(
-            values=[], value="-", size=window["-marker_class-"].Size
+            values=[], value=NA, size=window["-marker_class-"].Size
         )
 
 
@@ -2412,7 +2415,7 @@ def create_marker_profiles(fract_data, key: str, fract_info, marker_list):
     fract_test = {}
 
     for condition in profiles:
-        if key == "[IDENTIFIER]":
+        if key == IDENTIFIER:
             profile_full = pd.merge(
                 profiles[condition],
                 marker_list,
@@ -2445,7 +2448,7 @@ def create_marker_profiles(fract_data, key: str, fract_info, marker_list):
 
     fract_marker_vis = {}
     for condition in profiles_vis:
-        if key == "[IDENTIFIER]":
+        if key == IDENTIFIER:
             fract_marker_vis[condition] = pd.merge(
                 profiles_vis[condition],
                 marker_list,