Merge pull request #13 from cbib/prep-0.1.4

johaGL · web-flow · commit 52fa302973fc · 2024-08-06T15:52:12.000+02:00
Prep 0.1.4
diff --git a/README.md b/README.md
@@ -33,7 +33,7 @@ Your data must be already processed by another software that performs such corre
 > [!IMPORTANT]
 > When using TraceGroomer, please cite:
 > 
-> Galvis J, Guyon J, Dartigues B, Hecht H, Grüning B, Specque F,  Soueidan H, Karkar S,  Daubon T, Nikolski M. DIMet: An open-source tool for Differential analysis of targeted Isotope-labeled Metabolomics data. _Bioinformatics_ 2024; btae282. [https://doi.org/10.1093/bioinformatics/btae282](https://doi.org/10.1093/bioinformatics/btae282)
+> Galvis J, Guyon J, Dartigues B, Hecht H, Grüning B, Specque F,  Soueidan H, Karkar S,  Daubon T, Nikolski M. DIMet: An open-source tool for Differential analysis of targeted Isotope-labeled Metabolomics data. _Bioinformatics_ 2024; 40(5) btae282. [https://doi.org/10.1093/bioinformatics/btae282](https://doi.org/10.1093/bioinformatics/btae282)
 
 --------------------------
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "tracegroomer"
-version = "0.1.3"
+version = "0.1.4"
 description = "Convert and normalise metabolomics data formats (preprocessing for DIMet)"
 authors = [
   "Johanna Galvis Rodriguez <deisy-johanna.galvis-rodriguez@u-bordeaux.fr>",
@@ -10,6 +10,10 @@ authors = [
   "Bjorn Gruening <bjoern.gruening@gmail.com>",
   "Macha Nikolski <macha.nikolski@u-bordeaux.fr>"
 ]
+maintainers = [ "Benjamin Dartigues <benjamin.dartigues@u-bordeaux.fr>" ,
+  "Johanna Galvis Rodriguez <deisy-johanna.galvis-rodriguez@u-bordeaux.fr>"
+]
+repository = "https://github.com/cbib/TraceGroomer.git"
 license = "MIT"
 readme = "README.md"
 
diff --git a/tracegroomer/__main__.py b/tracegroomer/__main__.py
@@ -66,7 +66,7 @@ def prep_args() -> argparse.ArgumentParser:
                         action=argparse.BooleanOptionalAction, default=True,
                         help="Stomps fractional contributions (synonym: \
                         mean enrichment), and isotopologue proportions, \
-                        to max 1.0 and min 0.0")  #meanenrich_or_fracfontrib
+                        to max 1.0 and min 0.0")  # meanenrich_or_fracfontrib
 
     # for total abundance only if VIB data
     parser.add_argument("--under_detection_limit_set_nan",
@@ -95,7 +95,7 @@ def main() -> int:
     parser = prep_args()
     args = parser.parse_args()
     logger.info(
-        f"Running TraceGroomer with the following parameters:")
+        "Running TraceGroomer with the following parameters:")
     for x in vars(args).keys():
         logger.info(f"{x} = {vars(args)[x]} ")
 
diff --git a/tracegroomer/tests/test_tidy.py b/tracegroomer/tests/test_tidy.py
@@ -84,7 +84,7 @@ def test_true_key_value_available_frames(self):
         config = {"isotopologues": None, "mean_enrichment": None,
                   "abundances": "totalAbunds",
                   "isotopologue_proportions": "FractionsIsotopic"}
-        myobj.true_key_value_available_frames(config)
+        myobj.update_truly_available_frames(config)
         result1 = myobj.available_frames
         result2 = myobj.reverse_available_frames
         self.assertListEqual(list(result1.values()), list(result2.keys()))
@@ -126,8 +126,8 @@ def test_stomp_fraction_values(self):
         )
 
     def test_pull_internal_standard(self):
-        df = pd.DataFrame({ "sample-a": [87, 64, 14],
-                            "sample-b": [85, 37, 17]})
+        df = pd.DataFrame({"sample-a": [87, 64, 14],
+                           "sample-b": [85, 37, 17]})
         df.index = ["X", "W_acid", "Z"]
         config = {
             "mean_enrichment": "FracContribs", "isotopologues": None,
diff --git a/tracegroomer/tests/test_utils.py b/tracegroomer/tests/test_utils.py
@@ -69,7 +69,7 @@ def test_compute_isotopologues_proportions_from_absolute(self):
             df, metabolites2isotopologues_df
         )
 
-        self.assertAlmostEqual(result.loc['acCoA_m+0','sampleA2'],
+        self.assertAlmostEqual(result.loc['acCoA_m+0', 'sampleA2'],
                                0.529913, places=6)
         self.assertAlmostEqual(result.loc['acCoA_m+1', 'sampleB1'],
                                0.313908, places=6)
@@ -121,14 +121,14 @@ def test_compute_sums_isotopol_props(self):
     def test_impute_custom_levels_to_df(self):
         melted_df = pd.DataFrame({
             "metabolite": ["AcCoA", "AcCoA", "AcCoA", "unknown", "unknown",
-                         "gly", "gly", "gly", "AcCoA", "AcCoA", "AcCoA",
-                         "unknown", "unknown", "gly", "gly", "gly"  ],
-          "isotopologue_type": [0, 1, 2, 0, 1, 0, 1, 2, 0, 1, 2, 0, 1, 0, 1,
-                                2],
-          "samples": ["s1", "s1", "s1", "s1", "s1", "s1", "s1", "s1", "s2",
-                      "s2", "s2", "s2", "s2", "s2", "s2", "s2"],
-          "value": [0.3, 0.6, 0.1, 0.4, 0.6, 0.2, 0.5, 0.3,
-                    0.25, 0.62, 0.15, 0.5, 0.5, 0.23, 0.46, 0.31]
+                           "gly", "gly", "gly", "AcCoA", "AcCoA", "AcCoA",
+                           "unknown", "unknown", "gly", "gly", "gly"],
+            "isotopologue_type": [0, 1, 2, 0, 1, 0, 1, 2, 0, 1, 2,
+                                  0, 1, 0, 1, 2],
+            "samples": ["s1", "s1", "s1", "s1", "s1", "s1", "s1", "s1", "s2",
+                        "s2", "s2", "s2", "s2", "s2", "s2", "s2"],
+            "value": [0.3, 0.6, 0.1, 0.4, 0.6, 0.2, 0.5, 0.3,
+                      0.25, 0.62, 0.15, 0.5, 0.5, 0.23, 0.46, 0.31]
           })
         result = utils.impute_custom_levels_to_df(melted_df)
 
@@ -188,8 +188,8 @@ def test_divide_by_amount_material(self):
         confdict = {"isotopologues": "MyIsotopes"}
 
         result = utils.divide_by_amount_material(
-            frames_dict,confdict, material_df=micrograms_weight,
-            alternative_method=True, metric="isotopologues" )
+            frames_dict, confdict, material_df=micrograms_weight,
+            alternative_method=True, metric="isotopologues")
 
         witness = (df.loc["acCoA_m+1", "sampleA2"] / micrograms_weight.loc[
             "sampleA2", "0"]) * micrograms_weight["0"].mean()
@@ -202,7 +202,7 @@ def test_divide_by_amount_material(self):
             np.around(
                 result['MyIsotopes'].loc['acCoA_m+0', :], 6
             ) == np.array(
-                [ 10277.580938, 8869.973775, 6527.904882, 56000.880682]))
+                [10277.580938, 8869.973775, 6527.904882, 56000.880682]))
         )
         self.assertTrue(np.all(
             np.around(
diff --git a/tracegroomer/tidy.py b/tracegroomer/tidy.py
@@ -30,6 +30,8 @@ def __init__(self, type_of_file):
                                        'isotopologue_proportions',
                                        'isotopologues',
                                        'abundances']
+        self.metabolites2isotopologues_df = None
+        self.user_given_names = dict()
 
     def load_metadata(self, metadata_path):
         self.metadata = ut.open_metadata(metadata_path)
@@ -123,48 +125,79 @@ def transpose_frames(self):
     def load_metabolite_to_isotopologue_df(self, confdict):
         """df of correspondences between isotopologues and metabolites
         proper to the given data"""
-        try:
-            isotopologues_full = list(self.frames_dict[confdict[
-                                     "isotopologue_proportions"]].index)
-        # ok apply same proposed solution whether Key or Type error:
-        except TypeError:
-            isotopologues_full = list(self.frames_dict[confdict[
-                                      "isotopologues"]].index)
-        except KeyError:
-            isotopologues_full = list(self.frames_dict[confdict[
-                                    "isotopologues"]].index)
-
-        self.metabolites2isotopologues_df = ut.isotopologues_meaning_df(
-            isotopologues_full)
+        if (confdict['isotopologue_proportions'] is not None) or (
+                confdict['isotopologues'] is not None):
+            try:
+                isotopologues_full = list(self.frames_dict[confdict[
+                                         "isotopologue_proportions"]].index)
+            # ok apply same proposed solution whether Key or Type error:
+            except TypeError:
+                isotopologues_full = list(self.frames_dict[confdict[
+                                          "isotopologues"]].index)
+            except KeyError:
+                isotopologues_full = list(self.frames_dict[confdict[
+                                        "isotopologues"]].index)
+
+            self.metabolites2isotopologues_df = ut.isotopologues_meaning_df(
+                isotopologues_full)
+
+    def set_user_given_names(self, confdict):
+        """
+        Set user given names of quantifications only.
+        This will be useful for final_files_names property"""
+        user_given_names = dict()
+        for keyname_quantif in self.expected_keys_confdict:
+            if confdict[keyname_quantif] is not None:
+                user_given_names[keyname_quantif] = confdict[
+                    keyname_quantif]
+        self.user_given_names = user_given_names
 
     def fill_missing_data(self, confdict) -> Dict[str, str]:
+        """
+        Computes the quantification that the user set None in confdict.
+        This is done using the absolute isotopic values, and if not provided,
+        will compute mean enrichment from isotopic proportions.
+        """
         tmp, confdict_new = ut.complete_missing_frames(
             confdict, self.frames_dict, self.metabolites2isotopologues_df)
         self.frames_dict = tmp
 
         return confdict_new
 
-    def true_key_value_available_frames(self, confdict):
+    def update_truly_available_frames(self, confdict):
+        """
+        Sets the properties .available_frames and .reverse_available_frames,
+        which are dictionaries of quantifications names that truly exist
+          in the object (in the frames_dict)
+        """
         reverse_dict = dict()
         avail_dict = dict()
         true_reverse_dict = dict()
         for m in self.expected_keys_confdict:
-            reverse_dict[confdict[m]] = m
+            try:
+                reverse_dict[confdict[m]] = m
+            except KeyError:
+                continue
         for h in self.frames_dict.keys():
+            # if the quantification content and key exists
             if (self.frames_dict[h] is not None) and (h is not None):
-                avail_dict[reverse_dict[h]] = h
-                true_reverse_dict[h] = reverse_dict[h]
+                try:
+                    avail_dict[reverse_dict[h]] = h
+                    true_reverse_dict[h] = reverse_dict[h]
+                except Exception as e:
+                    print(e)
+                    continue
 
         self.available_frames = avail_dict
         self.reverse_available_frames = true_reverse_dict
 
     def save_isotopologues_preview(self, args, confdict, groom_out_path):
-        compartmentalized_dict = ut.df_to__dic_bycomp(
-            self.frames_dict[confdict['isotopologue_proportions']],
-            self.metadata)
-        output_plots_dir = os.path.join(groom_out_path, "preview_plots")
         if args.isotopologues_preview:
-            logger.info(f"prepare isotopologue proportions overview figures")
+            compartmentalized_dict = ut.df_to__dic_bycomp(
+                self.frames_dict[confdict['isotopologue_proportions']],
+                self.metadata)
+            output_plots_dir = os.path.join(groom_out_path, "preview_plots")
+            logger.info("prepare isotopologue proportions overview figures")
             if not os.path.exists(output_plots_dir):
                 os.makedirs(output_plots_dir)
             ut.save_isos_preview(
@@ -178,8 +211,8 @@ def pull_internal_standard(self, confdict, args):
         ):
             try:
                 x = self.frames_dict[confdict['abundances']].columns.tolist()
-                y = self.frames_dict[confdict['abundances']
-                                     ].loc[args.use_internal_standard, :].tolist()
+                y = self.frames_dict[confdict['abundances']].loc[
+                    args.use_internal_standard, :].tolist()
                 instandard_abun_df = pd.DataFrame(
                     {"sample": x,
                      args.use_internal_standard: y
@@ -228,6 +261,41 @@ def normalize_by_internal_standard(self, args, confdict):
                 args.use_internal_standard)
             self.frames_dict = frames_dict
 
+    def set_final_files_names(self):
+        """
+        Set final names of output files:
+          1. set final names dictionary and add as attribute, and
+          2. assign the values of 1. to the keys of the object.frames_dict
+        """
+        not_user_defined_dict = ut.retrieve_dict_not_user_defined()
+        final_files_names_d = dict()  # set final names dictionary
+        for valuename in self.reverse_available_frames.keys():
+            keyname = self.reverse_available_frames[valuename]
+            if keyname in list(self.user_given_names.keys()):
+                final_files_names_d[keyname] = self.user_given_names[keyname]
+            else:
+                if self.available_frames[keyname] is not None:
+                    final_files_names_d[keyname] = not_user_defined_dict[
+                        keyname]
+        # end for
+        frames_names_list = list(self.frames_dict.keys())
+        for frame_name in frames_names_list:  # assign final names to frames
+            if frame_name == "abundances_computed":
+                self.frames_dict[final_files_names_d[
+                    "abundances"]] = self.frames_dict[frame_name]
+                del self.frames_dict[frame_name]
+            if frame_name == "mean_enrichment_computed":
+                self.frames_dict[final_files_names_d[
+                    "mean_enrichment"]] = self.frames_dict[frame_name]
+                del self.frames_dict[frame_name]
+            if frame_name == "isotopologue_props_computed":
+                self.frames_dict[final_files_names_d[
+                    "isotopologue_proportions"]] = self.frames_dict[
+                    frame_name]
+                del self.frames_dict[frame_name]
+        # end for
+        self.final_files_names = final_files_names_d
+
     def compartmentalize_frames_dict(self):
         for k in self.frames_dict.keys():
             tmp = ut.df_to__dic_bycomp(
@@ -257,7 +325,8 @@ def drop_metabolites(self):
     def frames_filterby_min_admited_isotopol_proportions(
             self, confdict, isosprop_min_admitted: float
     ):
-        isos_propor_dic = self.frames_dict[confdict['isotopologue_proportions']]
+        isos_propor_dic = self.frames_dict[
+            confdict['isotopologue_proportions']]
         bad_mets = dict()
         for co in isos_propor_dic.keys():
             tmp = isos_propor_dic[co]
@@ -270,21 +339,27 @@ def frames_filterby_min_admited_isotopol_proportions(
             self.frames_dict, self.reverse_available_frames, bad_mets)
         self.frames_dict = tmp
 
-    def stomp_fraction_values(self, args, confdict):
+    def stomp_fraction_values(self, args, final_files_names: dict):
         if args.fractions_stomp_values:
             for frac_type in ["mean_enrichment", "isotopologue_proportions"]:
-                curr_dict = self.frames_dict[confdict[frac_type]]
-                for co in curr_dict.keys():
-                    df = curr_dict[co]
-                    df[df < 0] = 0
-                    df[df > 1] = 1
-                    curr_dict[co] = df
-                self.frames_dict[confdict[frac_type]] = curr_dict
-
-    def transfer__abund_nan__to_all_tables(self, confdict):
-        tmp = ut.transfer__abund_nan__to_all_tables(
-            confdict, self.frames_dict, self.metadata)
-        self.frames_dict = tmp
+                try:
+                    curr_dict = self.frames_dict[final_files_names[frac_type]]
+                    for co in curr_dict.keys():
+                        df = curr_dict[co]
+                        df[df < 0] = 0
+                        df[df > 1] = 1
+                        curr_dict[co] = df
+                    self.frames_dict[final_files_names[frac_type]] = curr_dict
+                except KeyError as e:
+                    logger.info(f"{e}: unavailable (stomp fractions)")
+
+    def transfer__abund_nan__to_all_tables(self, final_files_names: dict):
+        try:
+            tmp = ut.transfer__abund_nan__to_all_tables(
+                final_files_names, self.frames_dict, self.metadata)
+            self.frames_dict = tmp
+        except KeyError as e:
+            logger.info(f"{e}: unavailable (propagate NaN values)")
 
 # end class
 
@@ -330,12 +405,11 @@ def save_tables(frames_dict, groom_out_path, output_extension) -> None:
 def wrapper_common_steps(combo_data: CompositeData,
                          args, confdict, groom_out_path: str) -> None:
     combo_data.load_metabolite_to_isotopologue_df(confdict)
-    confdict = combo_data.fill_missing_data(confdict)
-    combo_data.true_key_value_available_frames(confdict)
-
+    combo_data.set_user_given_names(confdict)  # only user def names
+    confdict = combo_data.fill_missing_data(confdict)  # critical completion
+    combo_data.update_truly_available_frames(confdict)  # update 1
     combo_data.save_isotopologues_preview(args, confdict, groom_out_path)
-
-    combo_data.pull_internal_standard(confdict, args)
+    combo_data.pull_internal_standard(confdict, args)  # before normalisation
 
     if combo_data.material_df is not None:
         logger.info("computing normalization by amount of material")
@@ -345,12 +419,16 @@ def wrapper_common_steps(combo_data: CompositeData,
                 args, confdict)
         else:
             combo_data.normalize_total_abundance_by_material(args, confdict)
+
     combo_data.normalize_by_internal_standard(args, confdict)
+    combo_data.set_final_files_names()
+    combo_data.update_truly_available_frames(combo_data.final_files_names)  # update 2
     combo_data.compartmentalize_frames_dict()
     # last steps use compartmentalized frames
     combo_data.drop_metabolites()
-    combo_data.stomp_fraction_values(args, confdict)
-    combo_data.transfer__abund_nan__to_all_tables(confdict)
+    combo_data.stomp_fraction_values(args, combo_data.final_files_names)
+    combo_data.transfer__abund_nan__to_all_tables(
+            combo_data.final_files_names)
     save_tables(combo_data.frames_dict, groom_out_path,
                 args.output_files_extension)
 
diff --git a/tracegroomer/utils.py b/tracegroomer/utils.py

Original file line number	Diff line number	Diff line change
`@@ -33,7 +33,7 @@ Your data must be already processed by another software that performs such corre`
`33`	`33`	`> [!IMPORTANT]`
`34`	`34`	`> When using TraceGroomer, please cite:`
`35`	`35`	`>`
`36`		`-> Galvis J, Guyon J, Dartigues B, Hecht H, Grüning B, Specque F, Soueidan H, Karkar S, Daubon T, Nikolski M. DIMet: An open-source tool for Differential analysis of targeted Isotope-labeled Metabolomics data. _Bioinformatics_ 2024; btae282. [https://doi.org/10.1093/bioinformatics/btae282](https://doi.org/10.1093/bioinformatics/btae282)`
	`36`	`+> Galvis J, Guyon J, Dartigues B, Hecht H, Grüning B, Specque F, Soueidan H, Karkar S, Daubon T, Nikolski M. DIMet: An open-source tool for Differential analysis of targeted Isotope-labeled Metabolomics data. _Bioinformatics_ 2024; 40(5) btae282. [https://doi.org/10.1093/bioinformatics/btae282](https://doi.org/10.1093/bioinformatics/btae282)`
`37`	`37`
`38`	`38`	`--------------------------`
`39`	`39`