Merge pull request #30 from mmzdouc/dev_mmz

version 0.2.2
mmzdouc · May 27, 2024 · 82924df · 82924df
2 parents 82cde8e + 6670065
commit 82924df
Show file tree

Hide file tree

Showing 13 changed files with 322 additions and 112 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,13 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 
 N/A
 
+## [0.2.1] 27-05-2024
+
+### Changed
+
+- Removed 'phenotypes' as separate Feature attribute: write to Annotation object instead.
+- Implemented sorting of annotation entries in descending order
+
 ## [0.2.1] 26-05-2024
 
 ### Fixed

diff --git a/fermo_core/data_analysis/phenotype_manager/class_phen_qual_assigner.py b/fermo_core/data_analysis/phenotype_manager/class_phen_qual_assigner.py
@@ -27,7 +27,11 @@
 
 from pydantic import BaseModel
 
-from fermo_core.data_processing.builder_feature.dataclass_feature import Phenotype
+from fermo_core.data_processing.builder_feature.dataclass_feature import (
+    Annotations,
+    Feature,
+    Phenotype,
+)
 from fermo_core.data_processing.class_repository import Repository
 from fermo_core.data_processing.class_stats import Stats
 from fermo_core.input_output.class_parameter_manager import ParameterManager
@@ -60,6 +64,22 @@ def return_values(self: Self) -> tuple[Stats, Repository]:
         """
         return self.stats, self.features
 
+    @staticmethod
+    def add_annotation_attribute(feature: Feature) -> Feature:
+        """Add annotation attribute to feature if not existing
+
+        Arguments:
+            feature: the Feature object to modify
+
+        Returns:
+            The modified feature object
+        """
+        if feature.Annotations is None:
+            feature.Annotations = Annotations()
+        if feature.Annotations.phenotypes is None:
+            feature.Annotations.phenotypes = []
+        return feature
+
     def collect_sets(self: Self):
         """Collect sets of active and inactive features and assign actives"""
         f_ids_all_actives = set()
@@ -80,11 +100,12 @@ def collect_sets(self: Self):
         self.stats.phenotypes[0].f_ids_positive.update(f_ids_only_actives)
         for f_id in f_ids_only_actives:
             feature = self.features.get(f_id)
-            feature.phenotypes = [
+            feature = self.add_annotation_attribute(feature=feature)
+            feature.Annotations.phenotypes.append(
                 Phenotype(
                     score=0, format="qualitative", descr="only in positive samples"
                 )
-            ]
+            )
             self.features.modify(f_id, feature)
 
         self.f_ids_intersect = f_ids_all_actives.intersection(f_ids_all_inactives)
@@ -145,23 +166,26 @@ def bin_intersection(self: Self):
                 case "minmax":
                     factor = min(vals_act) / max(vals_inact)
                     if factor >= self.params.PhenoQualAssgnParams.factor:
-                        feature.phenotypes = [
+                        feature = self.add_annotation_attribute(feature=feature)
+                        feature.Annotations.phenotypes.append(
                             Phenotype(score=factor, format="qualitative")
-                        ]
+                        )
                         self.stats.phenotypes[0].f_ids_positive.add(f_id)
                 case "mean":
                     factor = mean(vals_act) / mean(vals_inact)
                     if factor >= self.params.PhenoQualAssgnParams.factor:
-                        feature.phenotypes = [
+                        feature = self.add_annotation_attribute(feature=feature)
+                        feature.Annotations.phenotypes.append(
                             Phenotype(score=factor, format="qualitative")
-                        ]
+                        )
                         self.stats.phenotypes[0].f_ids_positive.add(f_id)
                 case "median":
                     factor = median(vals_act) / median(vals_inact)
                     if factor >= self.params.PhenoQualAssgnParams.factor:
-                        feature.phenotypes = [
+                        feature = self.add_annotation_attribute(feature=feature)
+                        feature.Annotations.phenotypes.append(
                             Phenotype(score=factor, format="qualitative")
-                        ]
+                        )
                         self.stats.phenotypes[0].f_ids_positive.add(f_id)
                 case _:
                     raise RuntimeError("'PhenQualAssigner': Unsupported algorithm.")

diff --git a/fermo_core/data_analysis/phenotype_manager/class_phen_quant_conc_assigner.py b/fermo_core/data_analysis/phenotype_manager/class_phen_quant_conc_assigner.py
@@ -27,7 +27,11 @@
 from pydantic import BaseModel
 from scipy.stats import pearsonr, zscore
 
-from fermo_core.data_processing.builder_feature.dataclass_feature import Phenotype
+from fermo_core.data_processing.builder_feature.dataclass_feature import (
+    Annotations,
+    Feature,
+    Phenotype,
+)
 from fermo_core.data_processing.class_repository import Repository
 from fermo_core.data_processing.class_stats import Stats
 from fermo_core.input_output.class_parameter_manager import ParameterManager
@@ -60,6 +64,22 @@ def return_values(self: Self) -> tuple[Stats, Repository]:
         """
         return self.stats, self.features
 
+    @staticmethod
+    def add_annotation_attribute(feature: Feature) -> Feature:
+        """Add annotation attribute to feature if not existing
+
+        Arguments:
+            feature: the Feature object to modify
+
+        Returns:
+            The modified feature object
+        """
+        if feature.Annotations is None:
+            feature.Annotations = Annotations()
+        if feature.Annotations.phenotypes is None:
+            feature.Annotations.phenotypes = []
+        return feature
+
     def find_relevant_f_ids(self: Self):
         """Determines features detected in > 3 samples"""
         for f_id in self.stats.active_features:
@@ -120,9 +140,8 @@ def calculate_correlation(self: Self):
                     self.params.PhenoQuantConcAssgnParams.coeff_cutoff == 0
                     or self.params.PhenoQuantConcAssgnParams.p_val_cutoff == 0
                 ):
-                    if feature.phenotypes is None:
-                        feature.phenotypes = []
-                    feature.phenotypes.append(
+                    feature = self.add_annotation_attribute(feature=feature)
+                    feature.Annotations.phenotypes.append(
                         Phenotype(
                             format=assay.datatype,
                             category=assay.category,
@@ -136,9 +155,8 @@ def calculate_correlation(self: Self):
                     pearson_s > self.params.PhenoQuantConcAssgnParams.coeff_cutoff
                     and p_val_cor < self.params.PhenoQuantConcAssgnParams.p_val_cutoff
                 ):
-                    if feature.phenotypes is None:
-                        feature.phenotypes = []
-                    feature.phenotypes.append(
+                    feature = self.add_annotation_attribute(feature=feature)
+                    feature.Annotations.phenotypes.append(
                         Phenotype(
                             format=assay.datatype,
                             category=assay.category,

diff --git a/fermo_core/data_analysis/phenotype_manager/class_phen_quant_perc_assigner.py b/fermo_core/data_analysis/phenotype_manager/class_phen_quant_perc_assigner.py
@@ -27,7 +27,11 @@
 from pydantic import BaseModel
 from scipy.stats import pearsonr, zscore
 
-from fermo_core.data_processing.builder_feature.dataclass_feature import Phenotype
+from fermo_core.data_processing.builder_feature.dataclass_feature import (
+    Annotations,
+    Feature,
+    Phenotype,
+)
 from fermo_core.data_processing.class_repository import Repository
 from fermo_core.data_processing.class_stats import Stats
 from fermo_core.input_output.class_parameter_manager import ParameterManager
@@ -60,6 +64,22 @@ def return_values(self: Self) -> tuple[Stats, Repository]:
         """
         return self.stats, self.features
 
+    @staticmethod
+    def add_annotation_attribute(feature: Feature) -> Feature:
+        """Add annotation attribute to feature if not existing
+
+        Arguments:
+            feature: the Feature object to modify
+
+        Returns:
+            The modified feature object
+        """
+        if feature.Annotations is None:
+            feature.Annotations = Annotations()
+        if feature.Annotations.phenotypes is None:
+            feature.Annotations.phenotypes = []
+        return feature
+
     def find_relevant_f_ids(self: Self):
         """Determines features detected in > 3 samples"""
         for f_id in self.stats.active_features:
@@ -118,9 +138,8 @@ def calculate_correlation(self: Self):
                     self.params.PhenoQuantPercentAssgnParams.coeff_cutoff == 0
                     or self.params.PhenoQuantPercentAssgnParams.p_val_cutoff == 0
                 ):
-                    if feature.phenotypes is None:
-                        feature.phenotypes = []
-                    feature.phenotypes.append(
+                    feature = self.add_annotation_attribute(feature=feature)
+                    feature.Annotations.phenotypes.append(
                         Phenotype(
                             format=assay.datatype,
                             category=assay.category,
@@ -135,9 +154,8 @@ def calculate_correlation(self: Self):
                     and p_val_cor
                     < self.params.PhenoQuantPercentAssgnParams.p_val_cutoff
                 ):
-                    if feature.phenotypes is None:
-                        feature.phenotypes = []
-                    feature.phenotypes.append(
+                    feature = self.add_annotation_attribute(feature=feature)
+                    feature.Annotations.phenotypes.append(
                         Phenotype(
                             format=assay.datatype,
                             category=assay.category,

diff --git a/fermo_core/data_analysis/phenotype_manager/class_phenotype_manager.py b/fermo_core/data_analysis/phenotype_manager/class_phenotype_manager.py
@@ -146,7 +146,7 @@ def run_assigner_quant_percentage(self: Self):
             return
 
         logger.info(
-            "'PhenotypeManager': started quantitative phenotype data analysis for "
+            "'PhenotypeManager': completed quantitative phenotype data analysis for "
             "percentage data."
         )
 

diff --git a/fermo_core/data_analysis/score_assigner/class_score_assigner.py b/fermo_core/data_analysis/score_assigner/class_score_assigner.py
@@ -71,8 +71,12 @@ def assign_feature_scores(self: Self):
             feature = self.features.get(f_id)
             feature.Scores = FeatureScores()
 
-            if feature.phenotypes is not None and len(feature.phenotypes) != 0:
-                phen_scores = [assay.score for assay in feature.phenotypes]
+            if (
+                feature.Annotations is not None
+                and feature.Annotations.phenotypes is not None
+                and len(feature.Annotations.phenotypes) != 0
+            ):
+                phen_scores = [assay.score for assay in feature.Annotations.phenotypes]
                 feature.Scores.phenotype = max(phen_scores)
 
             if (