Skip to content

Commit 52fa302

Browse files
authored
Merge pull request #13 from cbib/prep-0.1.4
Prep 0.1.4
2 parents 8b25853 + 1c83a90 commit 52fa302

File tree

7 files changed

+194
-87
lines changed

7 files changed

+194
-87
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Your data must be already processed by another software that performs such corre
3333
> [!IMPORTANT]
3434
> When using TraceGroomer, please cite:
3535
>
36-
> Galvis J, Guyon J, Dartigues B, Hecht H, Grüning B, Specque F, Soueidan H, Karkar S, Daubon T, Nikolski M. DIMet: An open-source tool for Differential analysis of targeted Isotope-labeled Metabolomics data. _Bioinformatics_ 2024; btae282. [https://doi.org/10.1093/bioinformatics/btae282](https://doi.org/10.1093/bioinformatics/btae282)
36+
> Galvis J, Guyon J, Dartigues B, Hecht H, Grüning B, Specque F, Soueidan H, Karkar S, Daubon T, Nikolski M. DIMet: An open-source tool for Differential analysis of targeted Isotope-labeled Metabolomics data. _Bioinformatics_ 2024; 40(5) btae282. [https://doi.org/10.1093/bioinformatics/btae282](https://doi.org/10.1093/bioinformatics/btae282)
3737
3838
--------------------------
3939

pyproject.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "tracegroomer"
3-
version = "0.1.3"
3+
version = "0.1.4"
44
description = "Convert and normalise metabolomics data formats (preprocessing for DIMet)"
55
authors = [
66
"Johanna Galvis Rodriguez <deisy-johanna.galvis-rodriguez@u-bordeaux.fr>",
@@ -10,6 +10,10 @@ authors = [
1010
"Bjorn Gruening <bjoern.gruening@gmail.com>",
1111
"Macha Nikolski <macha.nikolski@u-bordeaux.fr>"
1212
]
13+
maintainers = [ "Benjamin Dartigues <benjamin.dartigues@u-bordeaux.fr>" ,
14+
"Johanna Galvis Rodriguez <deisy-johanna.galvis-rodriguez@u-bordeaux.fr>"
15+
]
16+
repository = "https://github.com/cbib/TraceGroomer.git"
1317
license = "MIT"
1418
readme = "README.md"
1519

tracegroomer/__main__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def prep_args() -> argparse.ArgumentParser:
6666
action=argparse.BooleanOptionalAction, default=True,
6767
help="Stomps fractional contributions (synonym: \
6868
mean enrichment), and isotopologue proportions, \
69-
to max 1.0 and min 0.0") #meanenrich_or_fracfontrib
69+
to max 1.0 and min 0.0") # meanenrich_or_fracfontrib
7070

7171
# for total abundance only if VIB data
7272
parser.add_argument("--under_detection_limit_set_nan",
@@ -95,7 +95,7 @@ def main() -> int:
9595
parser = prep_args()
9696
args = parser.parse_args()
9797
logger.info(
98-
f"Running TraceGroomer with the following parameters:")
98+
"Running TraceGroomer with the following parameters:")
9999
for x in vars(args).keys():
100100
logger.info(f"{x} = {vars(args)[x]} ")
101101

tracegroomer/tests/test_tidy.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def test_true_key_value_available_frames(self):
8484
config = {"isotopologues": None, "mean_enrichment": None,
8585
"abundances": "totalAbunds",
8686
"isotopologue_proportions": "FractionsIsotopic"}
87-
myobj.true_key_value_available_frames(config)
87+
myobj.update_truly_available_frames(config)
8888
result1 = myobj.available_frames
8989
result2 = myobj.reverse_available_frames
9090
self.assertListEqual(list(result1.values()), list(result2.keys()))
@@ -126,8 +126,8 @@ def test_stomp_fraction_values(self):
126126
)
127127

128128
def test_pull_internal_standard(self):
129-
df = pd.DataFrame({ "sample-a": [87, 64, 14],
130-
"sample-b": [85, 37, 17]})
129+
df = pd.DataFrame({"sample-a": [87, 64, 14],
130+
"sample-b": [85, 37, 17]})
131131
df.index = ["X", "W_acid", "Z"]
132132
config = {
133133
"mean_enrichment": "FracContribs", "isotopologues": None,

tracegroomer/tests/test_utils.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def test_compute_isotopologues_proportions_from_absolute(self):
6969
df, metabolites2isotopologues_df
7070
)
7171

72-
self.assertAlmostEqual(result.loc['acCoA_m+0','sampleA2'],
72+
self.assertAlmostEqual(result.loc['acCoA_m+0', 'sampleA2'],
7373
0.529913, places=6)
7474
self.assertAlmostEqual(result.loc['acCoA_m+1', 'sampleB1'],
7575
0.313908, places=6)
@@ -121,14 +121,14 @@ def test_compute_sums_isotopol_props(self):
121121
def test_impute_custom_levels_to_df(self):
122122
melted_df = pd.DataFrame({
123123
"metabolite": ["AcCoA", "AcCoA", "AcCoA", "unknown", "unknown",
124-
"gly", "gly", "gly", "AcCoA", "AcCoA", "AcCoA",
125-
"unknown", "unknown", "gly", "gly", "gly" ],
126-
"isotopologue_type": [0, 1, 2, 0, 1, 0, 1, 2, 0, 1, 2, 0, 1, 0, 1,
127-
2],
128-
"samples": ["s1", "s1", "s1", "s1", "s1", "s1", "s1", "s1", "s2",
129-
"s2", "s2", "s2", "s2", "s2", "s2", "s2"],
130-
"value": [0.3, 0.6, 0.1, 0.4, 0.6, 0.2, 0.5, 0.3,
131-
0.25, 0.62, 0.15, 0.5, 0.5, 0.23, 0.46, 0.31]
124+
"gly", "gly", "gly", "AcCoA", "AcCoA", "AcCoA",
125+
"unknown", "unknown", "gly", "gly", "gly"],
126+
"isotopologue_type": [0, 1, 2, 0, 1, 0, 1, 2, 0, 1, 2,
127+
0, 1, 0, 1, 2],
128+
"samples": ["s1", "s1", "s1", "s1", "s1", "s1", "s1", "s1", "s2",
129+
"s2", "s2", "s2", "s2", "s2", "s2", "s2"],
130+
"value": [0.3, 0.6, 0.1, 0.4, 0.6, 0.2, 0.5, 0.3,
131+
0.25, 0.62, 0.15, 0.5, 0.5, 0.23, 0.46, 0.31]
132132
})
133133
result = utils.impute_custom_levels_to_df(melted_df)
134134

@@ -188,8 +188,8 @@ def test_divide_by_amount_material(self):
188188
confdict = {"isotopologues": "MyIsotopes"}
189189

190190
result = utils.divide_by_amount_material(
191-
frames_dict,confdict, material_df=micrograms_weight,
192-
alternative_method=True, metric="isotopologues" )
191+
frames_dict, confdict, material_df=micrograms_weight,
192+
alternative_method=True, metric="isotopologues")
193193

194194
witness = (df.loc["acCoA_m+1", "sampleA2"] / micrograms_weight.loc[
195195
"sampleA2", "0"]) * micrograms_weight["0"].mean()
@@ -202,7 +202,7 @@ def test_divide_by_amount_material(self):
202202
np.around(
203203
result['MyIsotopes'].loc['acCoA_m+0', :], 6
204204
) == np.array(
205-
[ 10277.580938, 8869.973775, 6527.904882, 56000.880682]))
205+
[10277.580938, 8869.973775, 6527.904882, 56000.880682]))
206206
)
207207
self.assertTrue(np.all(
208208
np.around(

tracegroomer/tidy.py

Lines changed: 123 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ def __init__(self, type_of_file):
3030
'isotopologue_proportions',
3131
'isotopologues',
3232
'abundances']
33+
self.metabolites2isotopologues_df = None
34+
self.user_given_names = dict()
3335

3436
def load_metadata(self, metadata_path):
3537
self.metadata = ut.open_metadata(metadata_path)
@@ -123,48 +125,79 @@ def transpose_frames(self):
123125
def load_metabolite_to_isotopologue_df(self, confdict):
124126
"""df of correspondences between isotopologues and metabolites
125127
proper to the given data"""
126-
try:
127-
isotopologues_full = list(self.frames_dict[confdict[
128-
"isotopologue_proportions"]].index)
129-
# ok apply same proposed solution whether Key or Type error:
130-
except TypeError:
131-
isotopologues_full = list(self.frames_dict[confdict[
132-
"isotopologues"]].index)
133-
except KeyError:
134-
isotopologues_full = list(self.frames_dict[confdict[
135-
"isotopologues"]].index)
136-
137-
self.metabolites2isotopologues_df = ut.isotopologues_meaning_df(
138-
isotopologues_full)
128+
if (confdict['isotopologue_proportions'] is not None) or (
129+
confdict['isotopologues'] is not None):
130+
try:
131+
isotopologues_full = list(self.frames_dict[confdict[
132+
"isotopologue_proportions"]].index)
133+
# ok apply same proposed solution whether Key or Type error:
134+
except TypeError:
135+
isotopologues_full = list(self.frames_dict[confdict[
136+
"isotopologues"]].index)
137+
except KeyError:
138+
isotopologues_full = list(self.frames_dict[confdict[
139+
"isotopologues"]].index)
140+
141+
self.metabolites2isotopologues_df = ut.isotopologues_meaning_df(
142+
isotopologues_full)
143+
144+
def set_user_given_names(self, confdict):
145+
"""
146+
Set user given names of quantifications only.
147+
This will be useful for final_files_names property"""
148+
user_given_names = dict()
149+
for keyname_quantif in self.expected_keys_confdict:
150+
if confdict[keyname_quantif] is not None:
151+
user_given_names[keyname_quantif] = confdict[
152+
keyname_quantif]
153+
self.user_given_names = user_given_names
139154

140155
def fill_missing_data(self, confdict) -> Dict[str, str]:
156+
"""
157+
Computes the quantification that the user set None in confdict.
158+
This is done using the absolute isotopic values, and if not provided,
159+
will compute mean enrichment from isotopic proportions.
160+
"""
141161
tmp, confdict_new = ut.complete_missing_frames(
142162
confdict, self.frames_dict, self.metabolites2isotopologues_df)
143163
self.frames_dict = tmp
144164

145165
return confdict_new
146166

147-
def true_key_value_available_frames(self, confdict):
167+
def update_truly_available_frames(self, confdict):
168+
"""
169+
Sets the properties .available_frames and .reverse_available_frames,
170+
which are dictionaries of quantifications names that truly exist
171+
in the object (in the frames_dict)
172+
"""
148173
reverse_dict = dict()
149174
avail_dict = dict()
150175
true_reverse_dict = dict()
151176
for m in self.expected_keys_confdict:
152-
reverse_dict[confdict[m]] = m
177+
try:
178+
reverse_dict[confdict[m]] = m
179+
except KeyError:
180+
continue
153181
for h in self.frames_dict.keys():
182+
# if the quantification content and key exists
154183
if (self.frames_dict[h] is not None) and (h is not None):
155-
avail_dict[reverse_dict[h]] = h
156-
true_reverse_dict[h] = reverse_dict[h]
184+
try:
185+
avail_dict[reverse_dict[h]] = h
186+
true_reverse_dict[h] = reverse_dict[h]
187+
except Exception as e:
188+
print(e)
189+
continue
157190

158191
self.available_frames = avail_dict
159192
self.reverse_available_frames = true_reverse_dict
160193

161194
def save_isotopologues_preview(self, args, confdict, groom_out_path):
162-
compartmentalized_dict = ut.df_to__dic_bycomp(
163-
self.frames_dict[confdict['isotopologue_proportions']],
164-
self.metadata)
165-
output_plots_dir = os.path.join(groom_out_path, "preview_plots")
166195
if args.isotopologues_preview:
167-
logger.info(f"prepare isotopologue proportions overview figures")
196+
compartmentalized_dict = ut.df_to__dic_bycomp(
197+
self.frames_dict[confdict['isotopologue_proportions']],
198+
self.metadata)
199+
output_plots_dir = os.path.join(groom_out_path, "preview_plots")
200+
logger.info("prepare isotopologue proportions overview figures")
168201
if not os.path.exists(output_plots_dir):
169202
os.makedirs(output_plots_dir)
170203
ut.save_isos_preview(
@@ -178,8 +211,8 @@ def pull_internal_standard(self, confdict, args):
178211
):
179212
try:
180213
x = self.frames_dict[confdict['abundances']].columns.tolist()
181-
y = self.frames_dict[confdict['abundances']
182-
].loc[args.use_internal_standard, :].tolist()
214+
y = self.frames_dict[confdict['abundances']].loc[
215+
args.use_internal_standard, :].tolist()
183216
instandard_abun_df = pd.DataFrame(
184217
{"sample": x,
185218
args.use_internal_standard: y
@@ -228,6 +261,41 @@ def normalize_by_internal_standard(self, args, confdict):
228261
args.use_internal_standard)
229262
self.frames_dict = frames_dict
230263

264+
def set_final_files_names(self):
265+
"""
266+
Set final names of output files:
267+
1. set final names dictionary and add as attribute, and
268+
2. assign the values of 1. to the keys of the object.frames_dict
269+
"""
270+
not_user_defined_dict = ut.retrieve_dict_not_user_defined()
271+
final_files_names_d = dict() # set final names dictionary
272+
for valuename in self.reverse_available_frames.keys():
273+
keyname = self.reverse_available_frames[valuename]
274+
if keyname in list(self.user_given_names.keys()):
275+
final_files_names_d[keyname] = self.user_given_names[keyname]
276+
else:
277+
if self.available_frames[keyname] is not None:
278+
final_files_names_d[keyname] = not_user_defined_dict[
279+
keyname]
280+
# end for
281+
frames_names_list = list(self.frames_dict.keys())
282+
for frame_name in frames_names_list: # assign final names to frames
283+
if frame_name == "abundances_computed":
284+
self.frames_dict[final_files_names_d[
285+
"abundances"]] = self.frames_dict[frame_name]
286+
del self.frames_dict[frame_name]
287+
if frame_name == "mean_enrichment_computed":
288+
self.frames_dict[final_files_names_d[
289+
"mean_enrichment"]] = self.frames_dict[frame_name]
290+
del self.frames_dict[frame_name]
291+
if frame_name == "isotopologue_props_computed":
292+
self.frames_dict[final_files_names_d[
293+
"isotopologue_proportions"]] = self.frames_dict[
294+
frame_name]
295+
del self.frames_dict[frame_name]
296+
# end for
297+
self.final_files_names = final_files_names_d
298+
231299
def compartmentalize_frames_dict(self):
232300
for k in self.frames_dict.keys():
233301
tmp = ut.df_to__dic_bycomp(
@@ -257,7 +325,8 @@ def drop_metabolites(self):
257325
def frames_filterby_min_admited_isotopol_proportions(
258326
self, confdict, isosprop_min_admitted: float
259327
):
260-
isos_propor_dic = self.frames_dict[confdict['isotopologue_proportions']]
328+
isos_propor_dic = self.frames_dict[
329+
confdict['isotopologue_proportions']]
261330
bad_mets = dict()
262331
for co in isos_propor_dic.keys():
263332
tmp = isos_propor_dic[co]
@@ -270,21 +339,27 @@ def frames_filterby_min_admited_isotopol_proportions(
270339
self.frames_dict, self.reverse_available_frames, bad_mets)
271340
self.frames_dict = tmp
272341

273-
def stomp_fraction_values(self, args, confdict):
342+
def stomp_fraction_values(self, args, final_files_names: dict):
274343
if args.fractions_stomp_values:
275344
for frac_type in ["mean_enrichment", "isotopologue_proportions"]:
276-
curr_dict = self.frames_dict[confdict[frac_type]]
277-
for co in curr_dict.keys():
278-
df = curr_dict[co]
279-
df[df < 0] = 0
280-
df[df > 1] = 1
281-
curr_dict[co] = df
282-
self.frames_dict[confdict[frac_type]] = curr_dict
283-
284-
def transfer__abund_nan__to_all_tables(self, confdict):
285-
tmp = ut.transfer__abund_nan__to_all_tables(
286-
confdict, self.frames_dict, self.metadata)
287-
self.frames_dict = tmp
345+
try:
346+
curr_dict = self.frames_dict[final_files_names[frac_type]]
347+
for co in curr_dict.keys():
348+
df = curr_dict[co]
349+
df[df < 0] = 0
350+
df[df > 1] = 1
351+
curr_dict[co] = df
352+
self.frames_dict[final_files_names[frac_type]] = curr_dict
353+
except KeyError as e:
354+
logger.info(f"{e}: unavailable (stomp fractions)")
355+
356+
def transfer__abund_nan__to_all_tables(self, final_files_names: dict):
357+
try:
358+
tmp = ut.transfer__abund_nan__to_all_tables(
359+
final_files_names, self.frames_dict, self.metadata)
360+
self.frames_dict = tmp
361+
except KeyError as e:
362+
logger.info(f"{e}: unavailable (propagate NaN values)")
288363

289364
# end class
290365

@@ -330,12 +405,11 @@ def save_tables(frames_dict, groom_out_path, output_extension) -> None:
330405
def wrapper_common_steps(combo_data: CompositeData,
331406
args, confdict, groom_out_path: str) -> None:
332407
combo_data.load_metabolite_to_isotopologue_df(confdict)
333-
confdict = combo_data.fill_missing_data(confdict)
334-
combo_data.true_key_value_available_frames(confdict)
335-
408+
combo_data.set_user_given_names(confdict) # only user def names
409+
confdict = combo_data.fill_missing_data(confdict) # critical completion
410+
combo_data.update_truly_available_frames(confdict) # update 1
336411
combo_data.save_isotopologues_preview(args, confdict, groom_out_path)
337-
338-
combo_data.pull_internal_standard(confdict, args)
412+
combo_data.pull_internal_standard(confdict, args) # before normalisation
339413

340414
if combo_data.material_df is not None:
341415
logger.info("computing normalization by amount of material")
@@ -345,12 +419,16 @@ def wrapper_common_steps(combo_data: CompositeData,
345419
args, confdict)
346420
else:
347421
combo_data.normalize_total_abundance_by_material(args, confdict)
422+
348423
combo_data.normalize_by_internal_standard(args, confdict)
424+
combo_data.set_final_files_names()
425+
combo_data.update_truly_available_frames(combo_data.final_files_names) # update 2
349426
combo_data.compartmentalize_frames_dict()
350427
# last steps use compartmentalized frames
351428
combo_data.drop_metabolites()
352-
combo_data.stomp_fraction_values(args, confdict)
353-
combo_data.transfer__abund_nan__to_all_tables(confdict)
429+
combo_data.stomp_fraction_values(args, combo_data.final_files_names)
430+
combo_data.transfer__abund_nan__to_all_tables(
431+
combo_data.final_files_names)
354432
save_tables(combo_data.frames_dict, groom_out_path,
355433
args.output_files_extension)
356434

0 commit comments

Comments
 (0)