Skip to content

Commit

Permalink
Add --test-run CLI option
Browse files Browse the repository at this point in the history
  • Loading branch information
dweindl committed Feb 20, 2025
1 parent e591960 commit 754303e
Show file tree
Hide file tree
Showing 5 changed files with 245 additions and 39 deletions.
18 changes: 16 additions & 2 deletions src/ccompass/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,19 @@ def main():
action="version",
version=f"{app_name} {version('ccompass')}",
)
parser.parse_args()
parser.add_argument(
"--test-run",
action="store_true",
help="For testing purposes only. "
"Perform a test run of the application.",
)
args = parser.parse_args()

if args.test_run:
from ._testing import do_test_run

do_test_run()
return

launch_gui()

Expand Down Expand Up @@ -58,7 +70,6 @@ def launch_gui():
from .core import SessionModel
from .main_gui import MainController

logger = init_logging()
logger.info(f"Launching {app_name} GUI")

# GUI theme
Expand All @@ -69,5 +80,8 @@ def launch_gui():
controller.run()


logger = init_logging()


if __name__ == "__main__":
main()
177 changes: 177 additions & 0 deletions src/ccompass/_testing/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,178 @@
"""Various private utilities for testing the ccompass package."""

import os
import tempfile


def do_test_run():
"""Perform a test run of most functionality based on a small
synthetic dataset.
Mostly intended for testing frozen executables, to ensure all dependencies
are included.
"""
from pathlib import Path

from ..core import (
FractDataset,
MarkerSet,
NeuralNetworkParametersModel,
SessionModel,
TotalProtDataset,
create_fullprofiles,
create_identity_conversion,
create_marker_profiles,
create_markerlist,
)
from ..FDP import start_fract_data_processing
from ..main_gui import (
logger,
)
from ..MOA import class_comparisons, global_comparisons, stats_proteome
from ..TPP import start_total_proteome_processing
from .synthetic_data import (
SyntheticDataConfig,
create_profiles,
fract_col_id_to_row,
total_proteome,
tp_col_id_to_row,
)

max_procs = os.cpu_count()

# generate synthetic data
c = SyntheticDataConfig(
num_compartments=2, conditions=2, fractions=4, unknown_triple=[0, 0]
)
fractionation_df0, marker_df = create_profiles(c=c)
total_prot_df = total_proteome(
proteins=list(fractionation_df0[c.protein_id_col]), c=c
)
fractionation_df = fractionation_df0.drop(columns=[c.class_id_col])
# uppercase is expected elsewhere
marker_df = marker_df.apply(lambda x: x.astype(str).str.upper())

# simulate user input
fract_filepath = "bla/fract.csv"
marker_filepath = "bla/marker.csv"
total_prot_filepath = "bla/total_prot.csv"
fract_dset = FractDataset(
df=fractionation_df,
table=[
fract_col_id_to_row(col_id, c)
for col_id in fractionation_df
if not col_id.startswith("Amount_")
],
)
tp_dset = TotalProtDataset(
df=total_prot_df,
table=[
tp_col_id_to_row(col_id, c=c)
for col_id in total_prot_df
if not col_id.startswith("RelativeRegulation")
],
)
sess = SessionModel(
fract_input={fract_filepath: fract_dset},
)

# process fractionation data
(
sess.fract_data,
sess.fract_std,
sess.fract_info,
sess.fract_conditions,
) = start_fract_data_processing(
sess.fract_input,
sess.fract_preparams,
)

# process marker data
sess.marker_sets = {
marker_filepath: MarkerSet(
df=marker_df,
identifier_col=c.gene_id_col,
class_col=c.class_id_col,
)
}
sess.marker_fractkey = c.gene_id_col
sess.marker_conv = create_identity_conversion(sess.marker_sets.values())

sess.marker_list = create_markerlist(
sess.marker_sets,
sess.marker_conv,
**sess.marker_params,
)

logger.info("Marker list created")
(
sess.fract_marker,
sess.fract_marker_vis,
sess.fract_test,
) = create_marker_profiles(
sess.fract_data,
sess.marker_fractkey,
sess.fract_info,
sess.marker_list,
)
logger.info("Marker profiles created")
sess.fract_full = create_fullprofiles(sess.fract_marker, sess.fract_test)
logger.info("Full profiles created")

# process total proteome data
sess.tp_input = {total_prot_filepath: tp_dset}

sess.tp_data, sess.tp_info, sess.tp_icorr = (
start_total_proteome_processing(
sess.tp_input,
sess.tp_preparams,
sess.tp_data,
sess.tp_info,
sess.tp_icorr,
)
)

# train model
from ccompass.MOP import multi_organelle_prediction

sess.NN_params = NeuralNetworkParametersModel(
rounds=1,
subrounds=3,
optimizers=["adam"],
NN_epochs=2,
NN_optimization="short",
)
sess.learning_xyz = multi_organelle_prediction(
sess.fract_full,
sess.fract_marker,
sess.fract_test,
sess.fract_std,
sess.NN_params,
max_procs,
)

# "static statistics"
sess.results = stats_proteome(
sess.learning_xyz,
sess.fract_data,
sess.fract_conditions,
sess.NN_params.reliability,
)
assert sess.results

# "global changes"
sess.comparison = global_comparisons(
sess.results,
max_procs,
)
assert sess.comparison

# "class-centric changes"
class_comparisons(
sess.tp_data,
sess.results,
sess.comparison,
)

with tempfile.TemporaryDirectory() as tmpdir:
sess.to_numpy(Path(tmpdir, "session.npy"))
49 changes: 49 additions & 0 deletions src/ccompass/_testing/synthetic_data.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
"""Create synthetic datasets for C-COMPASS testing."""

import re
from collections import Counter

import numpy as np
import pandas as pd
from pydantic import BaseModel, ConfigDict, Field

from ccompass.core import (
IDENTIFIER,
KEEP,
NA,
)


class SyntheticDataConfig(BaseModel):
"""Configuration for synthetic data generation."""
Expand Down Expand Up @@ -231,6 +238,10 @@ def create_profiles(c: SyntheticDataConfig):
# CREATE UNKNOWN DOUBLE LOCALIZATIONS:
for comp in comp_list:
comp_others = [c for c in comp_list if c != comp]
assert (
c.num_compartments >= 2
or comp_specs[cond][comp]["number_double"] == 0
)
for d in range(comp_specs[cond][comp]["number_double"]):
count = count + 1
prot_name = f"Prot{count}"
Expand Down Expand Up @@ -291,6 +302,9 @@ def create_profiles(c: SyntheticDataConfig):
all_profiles.append(profile_conc)

# CREATE UNKNOWN TRIPLE LOCALIZATIONS:
assert (
c.num_compartments >= 3 or comp_specs[cond][comp]["number_triple"] == 0
)
for comp in comp_list:
comp_others = [c for c in comp_list if c != comp]
for d in range(comp_specs[cond][comp]["number_triple"]):
Expand Down Expand Up @@ -419,6 +433,41 @@ def total_proteome(
return pd.DataFrame(all_values, columns=tp_columns)


# regexes to parse column IDs
fract_id_rx = re.compile(
r"(?P<condition>Con\d+)_Rep(?P<replicate>\d+)_Fr(?P<fraction>\d+)"
)
tp_id_rx = re.compile(r"(?P<condition>Con\d+)_Rep(?P<replicate>\d+)")


def fract_col_id_to_row(col_id: str, c: SyntheticDataConfig) -> list:
"""Convert fractionation data column id to fractionation table rows."""
if col_id == c.protein_id_col:
return [col_id, IDENTIFIER, NA, NA]
if col_id == c.gene_id_col:
return [col_id, KEEP, NA, NA]

if not (match := fract_id_rx.match(col_id)):
raise ValueError(f"Invalid fractionation ID: {col_id}")

condition = match["condition"]
replicate = int(match["replicate"])
fraction = int(match["fraction"])
return [col_id, condition, replicate, fraction]


def tp_col_id_to_row(col_id: str, c: SyntheticDataConfig) -> list:
"""Convert total proteome data column id to total proteome table rows."""
if col_id == c.protein_id_col:
return [col_id, IDENTIFIER]

if not (match := tp_id_rx.match(col_id)):
raise ValueError(f"Invalid total proteome ID: {col_id}")

condition = match["condition"]
return [col_id, condition]


def main():
# filenames for the synthetic data
filename_fract = "sim_Fractionation.txt"
Expand Down
38 changes: 2 additions & 36 deletions tests/test_full_analysis.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import os
import re
from pathlib import Path

from ccompass._testing.synthetic_data import (
SyntheticDataConfig,
create_profiles,
fract_col_id_to_row,
total_proteome,
tp_col_id_to_row,
)
from ccompass.core import (
IDENTIFIER,
KEEP,
NA,
FractDataset,
MarkerSet,
NeuralNetworkParametersModel,
Expand All @@ -28,40 +28,6 @@
from ccompass.MOA import class_comparisons, global_comparisons, stats_proteome
from ccompass.TPP import start_total_proteome_processing

# regexes to parse column IDs
fract_id_rx = re.compile(
r"(?P<condition>Con\d+)_Rep(?P<replicate>\d+)_Fr(?P<fraction>\d+)"
)
tp_id_rx = re.compile(r"(?P<condition>Con\d+)_Rep(?P<replicate>\d+)")


def fract_col_id_to_row(col_id: str, c: SyntheticDataConfig) -> list:
"""Convert fractionation data column id to fractionation table rows."""
if col_id == c.protein_id_col:
return [col_id, IDENTIFIER, NA, NA]
if col_id == c.gene_id_col:
return [col_id, KEEP, NA, NA]

if not (match := fract_id_rx.match(col_id)):
raise ValueError(f"Invalid fractionation ID: {col_id}")

condition = match["condition"]
replicate = int(match["replicate"])
fraction = int(match["fraction"])
return [col_id, condition, replicate, fraction]


def tp_col_id_to_row(col_id: str, c: SyntheticDataConfig) -> list:
"""Convert total proteome data column id to total proteome table rows."""
if col_id == c.protein_id_col:
return [col_id, IDENTIFIER]

if not (match := tp_id_rx.match(col_id)):
raise ValueError(f"Invalid total proteome ID: {col_id}")

condition = match["condition"]
return [col_id, condition]


def test_full():
"""Check that we can run the full analysis.
Expand Down
2 changes: 1 addition & 1 deletion tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def test_create_markerlist():

def test_synth_data_deterministic():
"""Test that the synthetic data generation is deterministic."""
c = SyntheticDataConfig()
c = SyntheticDataConfig(num_compartments=2, conditions=2, fractions=3)
fractionation_df1, marker_df1 = create_profiles(c=c)
total_prot_df1 = total_proteome(
proteins=list(fractionation_df1[c.protein_id_col]), c=c
Expand Down

0 comments on commit 754303e

Please sign in to comment.