From 63276d46da79e8d0e9b770baf9cb4b2b986bd67d Mon Sep 17 00:00:00 2001
From: Agnes BAUD <agnes.baud@pasteur.fr>
Date: Thu, 2 Feb 2023 18:11:16 +0100
Subject: [PATCH] tree as string can be given and returned

---
 moonstone/utils/phylogenetic_tree_editing.py  | 29 ++++++++++++-------
 tests/utils/test_phylogenetic_tree_editing.py | 25 ++++++++++++----
 2 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/moonstone/utils/phylogenetic_tree_editing.py b/moonstone/utils/phylogenetic_tree_editing.py
index 59f816af..208cc072 100644
--- a/moonstone/utils/phylogenetic_tree_editing.py
+++ b/moonstone/utils/phylogenetic_tree_editing.py
@@ -40,8 +40,8 @@ def replacing_labels(
 
 def adapt_phylogenetic_tree_to_counts_df(
     new_otu_id_name_ser: pd.Series,
-    tree_file: str,
-    output_tree_file: str,
+    tree: str,
+    output_tree_file: str = None,
     quotechr: str = "'"
 ):
     """
@@ -49,16 +49,25 @@ def adapt_phylogenetic_tree_to_counts_df(
     Args:
         - new_otu_id_name_ser: pd.Series issued from count dataframe with only new_otu_id_name column
           ('NCBI_taxonomy_ID' for Kraken2, 'NCBI_tax_id' for Metaphlan3)
-        - tree_file: path to the tree file to adapt. The format of the tree leaves labels should be
+        - tree: path to the tree file to adapt or tree as a string. The format of the tree leaves labels should be
           '{species name}, {txid}' or '{species name}, {txid}*'
-        - output_tree_file: path to the output adapted tree file
+        - output_tree_file: path to the output adapted tree file.
+          If None, then function return the adaptated tree as a string
         - quotechr: quote character used as delimiter of labels in tree
     """
+    try:
+        infile = open(tree, "r")
+        T = infile.read()
+        infile.close()
+    except FileNotFoundError:
+        T = tree
+
     dic_translate_tree = generate_translation_dictionary(new_otu_id_name_ser)
-    infile = open(tree_file, "r")
-    T = infile.read()
-    infile.close()
+    T = replacing_labels(T, dic_translate_tree, quotechr)
 
-    outfile = open(output_tree_file, "w")
-    outfile.write(replacing_labels(T, dic_translate_tree, quotechr))
-    outfile.close()
+    if output_tree_file:
+        outfile = open(output_tree_file, "w")
+        outfile.write(T)
+        outfile.close()
+    else:
+        return T
diff --git a/tests/utils/test_phylogenetic_tree_editing.py b/tests/utils/test_phylogenetic_tree_editing.py
index 39122303..bdede51e 100644
--- a/tests/utils/test_phylogenetic_tree_editing.py
+++ b/tests/utils/test_phylogenetic_tree_editing.py
@@ -4,13 +4,14 @@
 
 from moonstone.utils.phylogenetic_tree_editing import (
     generate_translation_dictionary,
-    replacing_labels
+    replacing_labels,
+    adapt_phylogenetic_tree_to_counts_df
 )
 
 
 class TestPhylogeneticTreeAdaptation(TestCase):
-    def test_generate_translation_dictionary(self):
-        count_df = pd.DataFrame(
+    def setUp(self):
+        self.count_df = pd.DataFrame(
             [
                 ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillales (order)', 'Lactobacillales (order)', 'Lactobacillales (order)', 186826, 4.3],  # noqa
                 ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_jensenii', 109790, 1.0],  # noqa
@@ -21,12 +22,14 @@ def test_generate_translation_dictionary(self):
                 'NCBI_taxonomy_ID', 'SAMPLE_1'
             ]
         )
-        count_df = count_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'])
+        self.count_df = self.count_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'])
+
+    def test_generate_translation_dictionary(self):
         expected_dict = {
             '109790': 'Lactobacillus_jensenii',
             '147802': 'Lactobacillus_iners',
         }
-        tested_dict = generate_translation_dictionary(count_df['NCBI_taxonomy_ID'])
+        tested_dict = generate_translation_dictionary(self.count_df['NCBI_taxonomy_ID'])
         self.assertDictEqual(tested_dict, expected_dict)
 
     def test_replacing_labels(self):
@@ -50,3 +53,15 @@ def test_replacing_labels(self):
 ('Alloprevotella_Prevotella sp. oral taxon 473':0.5,\
 'Enterococcus lactis, 357441':0.05):1)root;\n"
         self.assertEqual(tested_string, expected_string)
+
+    def test_adapt_phylogenetic_tree_to_counts_df(self):
+        tree_string = "((('Lactobacillus jensenii, 109790':0.35,\
+'Lactobacillus iners, 147802':0.15):0.75,\
+'Lactobacillus ruminis CAG:367, 1263085*':1)root;\n"
+        tested_string = adapt_phylogenetic_tree_to_counts_df(
+            self.count_df['NCBI_taxonomy_ID'], tree_string
+        )
+        expected_string = "((('Lactobacillus_jensenii':0.35,\
+'Lactobacillus_iners':0.15):0.75,\
+'Lactobacillus ruminis CAG:367, 1263085*':1)root;\n"
+        self.assertEqual(tested_string, expected_string)