Merge pull request #388 from MannLabs/volcanovalidation

Refactoring of differential expression analysis - new backend and ttest.
MannLabs · Jan 8, 2025 · 808c90f · 808c90f
2 parents 0842f86 + 9892bb3
commit 808c90f
Show file tree

Hide file tree

Showing 6 changed files with 786 additions and 0 deletions.
diff --git a/alphastats/dataset/dataset.py b/alphastats/dataset/dataset.py
@@ -90,6 +90,8 @@ def __init__(
 
         # self.evidence_df: pd.DataFrame = loader.evidence_df  # TODO unused
 
+        # TODO: Add a store for dea results here
+
         self._dataset_factory = DataSetFactory(
             rawinput=self.rawinput,
             intensity_column=self._intensity_column,
@@ -269,6 +271,7 @@ def _get_statistics(self) -> Statistics:
             preprocessing_info=self.preprocessing_info,
         )
 
+    # TODO: Add function get_differential_expression_analysis() which will handle the dea store and run diff_expression_analysis() if necessary
     def diff_expression_analysis(
         self,
         group1: Union[str, list],
@@ -279,6 +282,7 @@ def diff_expression_analysis(
         fdr: float = 0.05,
     ) -> pd.DataFrame:
         """A wrapper for the Statistics.diff_expression_analysis(), see documentation there."""
+        # TODO: This method is the one which will be called if a dea result is not yet in the store.
         return self._get_statistics().diff_expression_analysis(
             group1,
             group2,

diff --git a/alphastats/gui/utils/analysis.py b/alphastats/gui/utils/analysis.py
@@ -80,6 +80,7 @@ def _nan_check(self) -> None:  # noqa: B027
         """Raise ValueError for methods that do not tolerate NaNs if there are any."""
         if not self._works_with_nans and self._dataset.mat.isnan().values.any():
             raise ValueError("This analysis does not work with NaN values.")
+        # TODO: raises attribute error for isnan during wald analysis
 
     def _pre_analysis_check(self) -> None:  # noqa: B027
         """Perform pre-analysis check, raise ValueError on fail."""
@@ -327,6 +328,7 @@ def _do_analysis(self):
         Returns a tuple(figure, analysis_object, parameters) where figure is the plot,
         analysis_object is the underlying object, parameters is a dictionary of the parameters used.
         """
+        # TODO: This is the place, where the new workflow of run/fetch DEA, filter significance, create plot should live. 1. self._dataset.get_dea(**parameters1), 2. dea.get_signficance(result, parameters2), 3. plot_volcano(result, significance, parameters3)
         # Note that currently, values that are not set by they UI would still be passed as None to the VolcanoPlot class,
         # thus overwriting the default values set therein.
         # If we introduce optional parameters in the UI, either use `inspect` to get the defaults from the class,
@@ -381,6 +383,8 @@ def _do_analysis(self):
 class DifferentialExpressionAnalysis(AbstractGroupCompareAnalysis):
     """Widget for differential expression analysis."""
 
+    # TODO: This functionality will disappear and become a part of the VolcanoPlot class. This will produce a widget to select whether the result should be displayed as table or as plot.
+
     def show_widget(self):
         """Show the widget and gather parameters."""
 

diff --git a/alphastats/multicova/multicova.py b/alphastats/multicova/multicova.py
@@ -279,6 +279,7 @@ def get_tstat_limit(stats, fdr=0.01):
     return t_limit
 
 
+# TODO: Separate q-value and FDR annotation
 def annotate_fdr_significance(res_real, stats, fdr=0.01):
     t_limit = np.min(stats[stats.fdr <= fdr].t_cut)
     res_real["qval"] = [

diff --git a/alphastats/statistics/differential_expression_analysis.py b/alphastats/statistics/differential_expression_analysis.py
@@ -99,6 +99,7 @@ def sam(self) -> Tuple[pd.DataFrame, float]:
             c2=list(
                 self.metadata[self.metadata[self.column] == self.group2][Cols.SAMPLE]
             ),
+            # TODO: Remove hardcoded values
             s0=0.05,
             n_perm=self.perm,
             fdr=self.fdr,
@@ -117,6 +118,7 @@ def sam(self) -> Tuple[pd.DataFrame, float]:
                 "qval",
             ]
         ]
+        # TODO: these can just be a renames
         df["log2fc"] = res_ttest["fc"]
         df["FDR"] = res_ttest[fdr_column]
 
@@ -139,6 +141,7 @@ def _welch_ttest(self) -> pd.DataFrame:
 
         d = self._prepare_anndata()
 
+        # TODO: pass log flag correctly
         test = de.test.t_test(data=d, grouping=self.column)
         df = test.summary().rename(columns={"gene": Cols.INDEX})
         return df