Merge pull request #31 from mmzdouc/dev_mmz

Dev mmz
mmzdouc · Jun 3, 2024 · 00eab95 · 00eab95
2 parents 82924df + ab75578
commit 00eab95
Show file tree

Hide file tree

Showing 24 changed files with 487 additions and 267 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,7 +10,16 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 
 N/A
 
-## [0.2.1] 27-05-2024
+## [0.3.0] 03-06-2024
+
+### Changed
+
+- [Breaking change] Parameter settings for `additional_modules/feature_filtering` were changed from a list of ranges to a dictionary with explicit values.
+- Reworked score assignment for qualitative phenotype data: phenotype-associated features now always receive a score of 1.0, and non-associated ones a score of 0.0.
+- For all modules with runtime restriction, the 'maximum_runtime' parameter was set to a default of '0' (unlimited runtime). Therefore, runtime restriction must now be specified explicitly.
+- Added a 'module_passed' parameter to all modules. This allows a more accurate description via the SummaryWriter (e.g. module was activated but timed out, and lack of e.g. annotation is due to premature ending and not because there were no hits).
+
+## [0.2.2] 27-05-2024
 
 ### Changed
 
@@ -30,7 +39,7 @@ N/A
 - Replaced global logger with logger specific for `main_cli()`. `main()` now needs an argument `logger`
 - Reworked output file naming: all output files now start with `out.fermo.` and a suffix specifying their type
 - Removed output directory selection: the output directory is now always `results` located in the directory in which the peaktable resides.
-- Features now always have default result values (before, some Features could have an empty dictionary)
+- Features now always have default result values (previously, some Features could have an empty dictionary)
 - MS2Query assignment now uses temporary directories for data reading/writing
 
 ### Fixed
@@ -45,4 +54,4 @@ N/A
 
 ## [0.1.0] 19-05-2024
 
-First public release of `fermo_core`
+- First public release of `fermo_core`
diff --git a/example_data/case_study_parameters.json b/example_data/case_study_parameters.json
@@ -66,14 +66,10 @@
     "additional_modules": {
         "feature_filtering": {
             "activate_module": true,
-            "filter_rel_int_range": [
-                0.1,
-                1.0
-            ],
-            "filter_rel_area_range": [
-                0.1,
-                1.0
-            ]
+            "filter_rel_int_range_min": 0.1,
+            "filter_rel_int_range_max": 1.0,
+            "filter_rel_area_range_min": 0.1,
+            "filter_rel_area_range_max": 1.0
         },
         "blank_assignment": {
             "activate_module": true,
@@ -148,4 +144,4 @@
             }
         }
     }
-}
+}
diff --git a/fermo_core/config/schema.json b/fermo_core/config/schema.json
@@ -351,27 +351,29 @@
                             "title": "Specifies whether this module should be run.",
                             "type": "boolean"
                         },
-                        "filter_rel_int_range": {
-                            "title": "(Optional): restrict data analysis to molecular features that have a relative intensity (intensity relative to the highest feature intensity per sample) that is inside the specified range. If a feature is detected in multiple samples, at least one occurrence must be inside the specified range. Else, the feature is not considered.",
-                            "type": "array",
-                            "minItems": 2,
-                            "maxItems": 2,
-                            "items": {
-                                "type": "number",
-                                "minimum": 0.0,
-                                "maximum": 1.0
-                            }
+                        "filter_rel_int_range_min": {
+                            "title": "(Optional): restrict data analysis to molecular features that have a relative intensity (intensity relative to the highest feature intensity per sample) that greater or equal to the specified value. If a feature is detected in multiple samples, at least one occurrence must be inside the specified range. Else, the feature is not considered.",
+                            "type": "number",
+                            "minimum": 0.0,
+                            "maximum": 1.0
                         },
-                        "filter_rel_area_range": {
-                            "title": "(Optional): restrict data analysis to molecular features that have a relative area (area relative to the highest feature area per sample that is inside the specified range. If a feature is detected in multiple samples, at least one occurrence must be inside the specified range. Else, the feature is not considered.",
-                            "type": "array",
-                            "minItems": 2,
-                            "maxItems": 2,
-                            "items": {
-                                "type": "number",
-                                "minimum": 0.0,
-                                "maximum": 1.0
-                            }
+                        "filter_rel_int_range_max": {
+                            "title": "(Optional): restrict data analysis to molecular features that have a relative intensity (intensity relative to the highest feature intensity per sample) that less or equal to the specified value. If a feature is detected in multiple samples, at least one occurrence must be inside the specified range. Else, the feature is not considered.",
+                            "type": "number",
+                            "minimum": 0.0,
+                            "maximum": 1.0
+                        },
+                        "filter_rel_area_range_min": {
+                            "title": "(Optional): restrict data analysis to molecular features that have a relative area (area relative to the highest feature area per sample) that greater or equal to the specified value. If a feature is detected in multiple samples, at least one occurrence must be inside the specified range. Else, the feature is not considered.",
+                            "type": "number",
+                            "minimum": 0.0,
+                            "maximum": 1.0
+                        },
+                        "filter_rel_area_range_max": {
+                            "title": "(Optional): restrict data analysis to molecular features that have a relative area (area relative to the highest feature area per sample) that less or equal to the specified value. If a feature is detected in multiple samples, at least one occurrence must be inside the specified range. Else, the feature is not considered.",
+                            "type": "number",
+                            "minimum": 0.0,
+                            "maximum": 1.0
                         }
                     }
                 },
@@ -518,7 +520,7 @@
                                     ]
                                 },
                                 "p_val_cutoff": {
-                                    "title": "(Optional): Minimum Bonferroni-corrected p-value to consider. A value of zero disables the cutoff filtering (automatically applies to both score and p-value).",
+                                    "title": "(Optional): Maximum Bonferroni-corrected p-value to consider. A value of zero disables the cutoff filtering (automatically applies to both score and p-value).",
                                     "type": "number",
                                     "minimum": 0.0,
                                     "maximum": 1.0
@@ -566,7 +568,7 @@
                                     ]
                                 },
                                 "p_val_cutoff": {
-                                    "title": "(Optional): Minimum Bonferroni-corrected p-value to consider. A value of zero disables the cutoff filtering (automatically applies to both score and p-value).",
+                                    "title": "(Optional): Maximum Bonferroni-corrected p-value to consider. A value of zero disables the cutoff filtering (automatically applies to both score and p-value).",
                                     "type": "number",
                                     "minimum": 0.0,
                                     "maximum": 1.0

diff --git a/fermo_core/data_analysis/annotation_manager/class_annotation_manager.py b/fermo_core/data_analysis/annotation_manager/class_annotation_manager.py
@@ -68,13 +68,15 @@ class AnnotationManager(BaseModel):
     features: Repository
     samples: Repository
 
-    def return_attrs(self: Self) -> tuple[Stats, Repository, Repository]:
+    def return_attrs(
+        self: Self,
+    ) -> tuple[Stats, Repository, Repository, ParameterManager]:
         """Returns modified attributes from AnnotationManager to the calling function
 
         Returns:
             Tuple containing Stats, Feature Repository and Sample Repository objects.
         """
-        return self.stats, self.features, self.samples
+        return self.stats, self.features, self.samples, self.params
 
     def run_analysis(self: Self):
         """Organizes calling of data analysis steps."""
@@ -179,6 +181,7 @@ def run_user_lib_mod_cosine_matching(self: Self):
             mod_cosine_annotator.calculate_scores_mod_cosine()
             mod_cosine_annotator.extract_userlib_scores()
             self.features = mod_cosine_annotator.return_features()
+            self.params.SpectralLibMatchingCosineParameters.module_passed = True
         except Exception as e:
             logger.error(str(e))
             logger.error(
@@ -216,6 +219,7 @@ def run_user_lib_ms2deepscore_matching(self: Self):
             ms2deepscore_annotator.calculate_scores_ms2deepscore()
             ms2deepscore_annotator.extract_userlib_scores()
             self.features = ms2deepscore_annotator.return_features()
+            self.params.SpectralLibMatchingDeepscoreParameters.module_passed = True
         except Exception as e:
             logger.error(str(e))
             logger.error(
@@ -242,6 +246,7 @@ def run_feature_adduct_annotation(self: Self):
             )
             adduct_annotator.run_analysis()
             self.features = adduct_annotator.return_features()
+            self.params.AdductAnnotationParameters.module_passed = True
         except Exception as e:
             logger.error(str(e))
             logger.error(
@@ -264,6 +269,7 @@ def run_neutral_loss_annotation(self: Self):
             )
             neutralloss_annotator.run_analysis()
             self.features = neutralloss_annotator.return_features()
+            self.params.NeutralLossParameters.module_passed = True
         except Exception as e:
             logger.error(str(e))
             logger.error(
@@ -287,6 +293,7 @@ def run_fragment_annotation(self: Self):
             )
             fragment_annotator.run_analysis()
             self.features = fragment_annotator.return_features()
+            self.params.FragmentAnnParameters.module_passed = True
         except Exception as e:
             logger.error(str(e))
             logger.error(
@@ -323,6 +330,7 @@ def run_ms2query_results_assignment(self: Self):
                 self.params.MS2QueryResultsParameters.filepath,
             )
             self.features = ms2query_annotator.return_features()
+
         except Exception as e:
             logger.error(str(e))
             logger.error(
@@ -331,8 +339,7 @@ def run_ms2query_results_assignment(self: Self):
             return
 
         logger.info(
-            "'AnnotationManager': completed annotation from existing MS2Query "
-            "results"
+            "'AnnotationManager': completed annotation from existing MS2Query results."
         )
 
     def run_ms2query_annotation(self: Self):
@@ -359,6 +366,7 @@ def run_ms2query_annotation(self: Self):
             )
             ms2query_annotator.run_ms2query()
             self.features = ms2query_annotator.return_features()
+            self.params.Ms2QueryAnnotationParameters.module_passed = True
         except Exception as e:
             logger.error(str(e))
             logger.error(
@@ -401,7 +409,6 @@ def run_as_kcb_cosine_annotation(self: Self):
             )
             mibig_bgcs = {key for key, value in kcb_results.items()}
             spec_library = UtilityMethodManager().create_mibig_spec_lib(mibig_bgcs)
-
             kcb_annotator = ModCosAnnotator(
                 features=self.features,
                 active_features=self.stats.active_features,
@@ -417,6 +424,7 @@ def run_as_kcb_cosine_annotation(self: Self):
             kcb_annotator.calculate_scores_mod_cosine()
             kcb_annotator.extract_mibig_scores(kcb_results)
             self.features = kcb_annotator.return_features()
+            self.params.AsKcbCosineMatchingParams.module_passed = True
         except Exception as e:
             logger.error(str(e))
             logger.error(
@@ -478,6 +486,7 @@ def run_as_kcb_deepscore_annotation(self: Self):
             kcb_annotator.calculate_scores_ms2deepscore()
             kcb_annotator.extract_mibig_scores(kcb_results)
             self.features = kcb_annotator.return_features()
+            self.params.AsKcbDeepscoreMatchingParams.module_passed = True
         except Exception as e:
             logger.error(str(e))
             logger.error(

diff --git a/fermo_core/data_analysis/class_analysis_manager.py b/fermo_core/data_analysis/class_analysis_manager.py
@@ -67,13 +67,15 @@ class AnalysisManager(BaseModel):
     features: Repository
     samples: Repository
 
-    def return_attributes(self: Self) -> tuple[Stats, Repository, Repository]:
+    def return_attributes(
+        self: Self,
+    ) -> tuple[Stats, Repository, Repository, ParameterManager]:
         """Returns modified attributes to the calling function
 
         Returns:
             Tuple containing Stats, Feature Repository and Sample Repository objects.
         """
-        return self.stats, self.features, self.samples
+        return self.stats, self.features, self.samples, self.params
 
     def analyze(self: Self):
         """Organizes calling of data analysis steps."""
@@ -108,6 +110,7 @@ def run_feature_filter(self: Self):
             )
             feature_filter.filter()
             self.stats, self.features, self.samples = feature_filter.return_values()
+            self.params.FeatureFilteringParameters.module_passed = True
         except Exception as e:
             logger.warning(str(e))
             return
@@ -138,6 +141,7 @@ def run_blank_assignment(self: Self):
             )
             blank_assigner.run_analysis()
             self.stats, self.features = blank_assigner.return_attrs()
+            self.params.BlankAssignmentParameters.module_passed = True
         except Exception as e:
             logger.warning(str(e))
             return
@@ -181,6 +185,7 @@ def run_group_factor_assignment(self: Self):
             )
             group_fact_ass.run_analysis()
             self.features = group_fact_ass.return_features()
+            self.params.GroupFactAssignmentParameters.module_passed = True
         except Exception as e:
             logger.warning(str(e))
             return
@@ -211,7 +216,7 @@ def run_phenotype_manager(self: Self):
                 samples=self.samples,
             )
             phenotype_manager.run_analysis()
-            self.stats, self.features = phenotype_manager.return_attrs()
+            self.stats, self.features, self.params = phenotype_manager.return_attrs()
         except Exception as e:
             logger.warning(str(e))
             return
@@ -241,11 +246,9 @@ def run_sim_networks_manager(self: Self):
                 samples=self.samples,
             )
             sim_networks_manager.run_analysis()
-            (
-                self.stats,
-                self.features,
-                self.samples,
-            ) = sim_networks_manager.return_attrs()
+            (self.stats, self.features, self.samples, self.params) = (
+                sim_networks_manager.return_attrs()
+            )
         except Exception as e:
             logger.warning(str(e))
             return
@@ -261,7 +264,9 @@ def run_annotation_manager(self: Self):
                 samples=self.samples,
             )
             annotation_manager.run_analysis()
-            self.stats, self.features, self.samples = annotation_manager.return_attrs()
+            self.stats, self.features, self.samples, self.params = (
+                annotation_manager.return_attrs()
+            )
         except Exception as e:
             logger.warning(str(e))
             return