diff --git a/src/ccompass/__main__.py b/src/ccompass/__main__.py index 13abd26..8c15c52 100644 --- a/src/ccompass/__main__.py +++ b/src/ccompass/__main__.py @@ -21,7 +21,19 @@ def main(): action="version", version=f"{app_name} {version('ccompass')}", ) - parser.parse_args() + parser.add_argument( + "--test-run", + action="store_true", + help="For testing purposes only. " + "Perform a test run of the application.", + ) + args = parser.parse_args() + + if args.test_run: + from ._testing import do_test_run + + do_test_run() + return launch_gui() @@ -58,7 +70,6 @@ def launch_gui(): from .core import SessionModel from .main_gui import MainController - logger = init_logging() logger.info(f"Launching {app_name} GUI") # GUI theme @@ -69,5 +80,8 @@ def launch_gui(): controller.run() +logger = init_logging() + + if __name__ == "__main__": main() diff --git a/src/ccompass/_testing/__init__.py b/src/ccompass/_testing/__init__.py index 6af78d6..9eb4d5c 100644 --- a/src/ccompass/_testing/__init__.py +++ b/src/ccompass/_testing/__init__.py @@ -1 +1,178 @@ """Various private utilities for testing the ccompass package.""" + +import os +import tempfile + + +def do_test_run(): + """Perform a test run of most functionality based on a small + synthetic dataset. + + Mostly intended for testing frozen executables, to ensure all dependencies + are included. + """ + from pathlib import Path + + from ..core import ( + FractDataset, + MarkerSet, + NeuralNetworkParametersModel, + SessionModel, + TotalProtDataset, + create_fullprofiles, + create_identity_conversion, + create_marker_profiles, + create_markerlist, + ) + from ..FDP import start_fract_data_processing + from ..main_gui import ( + logger, + ) + from ..MOA import class_comparisons, global_comparisons, stats_proteome + from ..TPP import start_total_proteome_processing + from .synthetic_data import ( + SyntheticDataConfig, + create_profiles, + fract_col_id_to_row, + total_proteome, + tp_col_id_to_row, + ) + + max_procs = os.cpu_count() + + # generate synthetic data + c = SyntheticDataConfig( + num_compartments=2, conditions=2, fractions=4, unknown_triple=[0, 0] + ) + fractionation_df0, marker_df = create_profiles(c=c) + total_prot_df = total_proteome( + proteins=list(fractionation_df0[c.protein_id_col]), c=c + ) + fractionation_df = fractionation_df0.drop(columns=[c.class_id_col]) + # uppercase is expected elsewhere + marker_df = marker_df.apply(lambda x: x.astype(str).str.upper()) + + # simulate user input + fract_filepath = "bla/fract.csv" + marker_filepath = "bla/marker.csv" + total_prot_filepath = "bla/total_prot.csv" + fract_dset = FractDataset( + df=fractionation_df, + table=[ + fract_col_id_to_row(col_id, c) + for col_id in fractionation_df + if not col_id.startswith("Amount_") + ], + ) + tp_dset = TotalProtDataset( + df=total_prot_df, + table=[ + tp_col_id_to_row(col_id, c=c) + for col_id in total_prot_df + if not col_id.startswith("RelativeRegulation") + ], + ) + sess = SessionModel( + fract_input={fract_filepath: fract_dset}, + ) + + # process fractionation data + ( + sess.fract_data, + sess.fract_std, + sess.fract_info, + sess.fract_conditions, + ) = start_fract_data_processing( + sess.fract_input, + sess.fract_preparams, + ) + + # process marker data + sess.marker_sets = { + marker_filepath: MarkerSet( + df=marker_df, + identifier_col=c.gene_id_col, + class_col=c.class_id_col, + ) + } + sess.marker_fractkey = c.gene_id_col + sess.marker_conv = create_identity_conversion(sess.marker_sets.values()) + + sess.marker_list = create_markerlist( + sess.marker_sets, + sess.marker_conv, + **sess.marker_params, + ) + + logger.info("Marker list created") + ( + sess.fract_marker, + sess.fract_marker_vis, + sess.fract_test, + ) = create_marker_profiles( + sess.fract_data, + sess.marker_fractkey, + sess.fract_info, + sess.marker_list, + ) + logger.info("Marker profiles created") + sess.fract_full = create_fullprofiles(sess.fract_marker, sess.fract_test) + logger.info("Full profiles created") + + # process total proteome data + sess.tp_input = {total_prot_filepath: tp_dset} + + sess.tp_data, sess.tp_info, sess.tp_icorr = ( + start_total_proteome_processing( + sess.tp_input, + sess.tp_preparams, + sess.tp_data, + sess.tp_info, + sess.tp_icorr, + ) + ) + + # train model + from ccompass.MOP import multi_organelle_prediction + + sess.NN_params = NeuralNetworkParametersModel( + rounds=1, + subrounds=3, + optimizers=["adam"], + NN_epochs=2, + NN_optimization="short", + ) + sess.learning_xyz = multi_organelle_prediction( + sess.fract_full, + sess.fract_marker, + sess.fract_test, + sess.fract_std, + sess.NN_params, + max_procs, + ) + + # "static statistics" + sess.results = stats_proteome( + sess.learning_xyz, + sess.fract_data, + sess.fract_conditions, + sess.NN_params.reliability, + ) + assert sess.results + + # "global changes" + sess.comparison = global_comparisons( + sess.results, + max_procs, + ) + assert sess.comparison + + # "class-centric changes" + class_comparisons( + sess.tp_data, + sess.results, + sess.comparison, + ) + + with tempfile.TemporaryDirectory() as tmpdir: + sess.to_numpy(Path(tmpdir, "session.npy")) diff --git a/src/ccompass/_testing/synthetic_data.py b/src/ccompass/_testing/synthetic_data.py index 74a6889..78f6dfa 100644 --- a/src/ccompass/_testing/synthetic_data.py +++ b/src/ccompass/_testing/synthetic_data.py @@ -1,11 +1,18 @@ """Create synthetic datasets for C-COMPASS testing.""" +import re from collections import Counter import numpy as np import pandas as pd from pydantic import BaseModel, ConfigDict, Field +from ccompass.core import ( + IDENTIFIER, + KEEP, + NA, +) + class SyntheticDataConfig(BaseModel): """Configuration for synthetic data generation.""" @@ -231,6 +238,10 @@ def create_profiles(c: SyntheticDataConfig): # CREATE UNKNOWN DOUBLE LOCALIZATIONS: for comp in comp_list: comp_others = [c for c in comp_list if c != comp] + assert ( + c.num_compartments >= 2 + or comp_specs[cond][comp]["number_double"] == 0 + ) for d in range(comp_specs[cond][comp]["number_double"]): count = count + 1 prot_name = f"Prot{count}" @@ -291,6 +302,9 @@ def create_profiles(c: SyntheticDataConfig): all_profiles.append(profile_conc) # CREATE UNKNOWN TRIPLE LOCALIZATIONS: + assert ( + c.num_compartments >= 3 or comp_specs[cond][comp]["number_triple"] == 0 + ) for comp in comp_list: comp_others = [c for c in comp_list if c != comp] for d in range(comp_specs[cond][comp]["number_triple"]): @@ -419,6 +433,41 @@ def total_proteome( return pd.DataFrame(all_values, columns=tp_columns) +# regexes to parse column IDs +fract_id_rx = re.compile( + r"(?PCon\d+)_Rep(?P\d+)_Fr(?P\d+)" +) +tp_id_rx = re.compile(r"(?PCon\d+)_Rep(?P\d+)") + + +def fract_col_id_to_row(col_id: str, c: SyntheticDataConfig) -> list: + """Convert fractionation data column id to fractionation table rows.""" + if col_id == c.protein_id_col: + return [col_id, IDENTIFIER, NA, NA] + if col_id == c.gene_id_col: + return [col_id, KEEP, NA, NA] + + if not (match := fract_id_rx.match(col_id)): + raise ValueError(f"Invalid fractionation ID: {col_id}") + + condition = match["condition"] + replicate = int(match["replicate"]) + fraction = int(match["fraction"]) + return [col_id, condition, replicate, fraction] + + +def tp_col_id_to_row(col_id: str, c: SyntheticDataConfig) -> list: + """Convert total proteome data column id to total proteome table rows.""" + if col_id == c.protein_id_col: + return [col_id, IDENTIFIER] + + if not (match := tp_id_rx.match(col_id)): + raise ValueError(f"Invalid total proteome ID: {col_id}") + + condition = match["condition"] + return [col_id, condition] + + def main(): # filenames for the synthetic data filename_fract = "sim_Fractionation.txt" diff --git a/tests/test_full_analysis.py b/tests/test_full_analysis.py index dfd56df..5eaea62 100644 --- a/tests/test_full_analysis.py +++ b/tests/test_full_analysis.py @@ -1,16 +1,16 @@ import os -import re from pathlib import Path from ccompass._testing.synthetic_data import ( SyntheticDataConfig, create_profiles, + fract_col_id_to_row, total_proteome, + tp_col_id_to_row, ) from ccompass.core import ( IDENTIFIER, KEEP, - NA, FractDataset, MarkerSet, NeuralNetworkParametersModel, @@ -28,40 +28,6 @@ from ccompass.MOA import class_comparisons, global_comparisons, stats_proteome from ccompass.TPP import start_total_proteome_processing -# regexes to parse column IDs -fract_id_rx = re.compile( - r"(?PCon\d+)_Rep(?P\d+)_Fr(?P\d+)" -) -tp_id_rx = re.compile(r"(?PCon\d+)_Rep(?P\d+)") - - -def fract_col_id_to_row(col_id: str, c: SyntheticDataConfig) -> list: - """Convert fractionation data column id to fractionation table rows.""" - if col_id == c.protein_id_col: - return [col_id, IDENTIFIER, NA, NA] - if col_id == c.gene_id_col: - return [col_id, KEEP, NA, NA] - - if not (match := fract_id_rx.match(col_id)): - raise ValueError(f"Invalid fractionation ID: {col_id}") - - condition = match["condition"] - replicate = int(match["replicate"]) - fraction = int(match["fraction"]) - return [col_id, condition, replicate, fraction] - - -def tp_col_id_to_row(col_id: str, c: SyntheticDataConfig) -> list: - """Convert total proteome data column id to total proteome table rows.""" - if col_id == c.protein_id_col: - return [col_id, IDENTIFIER] - - if not (match := tp_id_rx.match(col_id)): - raise ValueError(f"Invalid total proteome ID: {col_id}") - - condition = match["condition"] - return [col_id, condition] - def test_full(): """Check that we can run the full analysis. diff --git a/tests/test_misc.py b/tests/test_misc.py index e6e228b..f09c9de 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -110,7 +110,7 @@ def test_create_markerlist(): def test_synth_data_deterministic(): """Test that the synthetic data generation is deterministic.""" - c = SyntheticDataConfig() + c = SyntheticDataConfig(num_compartments=2, conditions=2, fractions=3) fractionation_df1, marker_df1 = create_profiles(c=c) total_prot_df1 = total_proteome( proteins=list(fractionation_df1[c.protein_id_col]), c=c