Skip to content

Commit

Permalink
tree as string can be given and returned
Browse files Browse the repository at this point in the history
  • Loading branch information
AgnesBaud committed Feb 2, 2023
1 parent fec9a50 commit 63276d4
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 15 deletions.
29 changes: 19 additions & 10 deletions moonstone/utils/phylogenetic_tree_editing.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,25 +40,34 @@ def replacing_labels(

def adapt_phylogenetic_tree_to_counts_df(
new_otu_id_name_ser: pd.Series,
tree_file: str,
output_tree_file: str,
tree: str,
output_tree_file: str = None,
quotechr: str = "'"
):
"""
Translate phylogenetic tree labels to names present in a counts dataframe using the txid as key
Args:
- new_otu_id_name_ser: pd.Series issued from count dataframe with only new_otu_id_name column
('NCBI_taxonomy_ID' for Kraken2, 'NCBI_tax_id' for Metaphlan3)
- tree_file: path to the tree file to adapt. The format of the tree leaves labels should be
- tree: path to the tree file to adapt or tree as a string. The format of the tree leaves labels should be
'{species name}, {txid}' or '{species name}, {txid}*'
- output_tree_file: path to the output adapted tree file
- output_tree_file: path to the output adapted tree file.
If None, then function return the adaptated tree as a string
- quotechr: quote character used as delimiter of labels in tree
"""
try:
infile = open(tree, "r")
T = infile.read()
infile.close()
except FileNotFoundError:
T = tree

dic_translate_tree = generate_translation_dictionary(new_otu_id_name_ser)
infile = open(tree_file, "r")
T = infile.read()
infile.close()
T = replacing_labels(T, dic_translate_tree, quotechr)

outfile = open(output_tree_file, "w")
outfile.write(replacing_labels(T, dic_translate_tree, quotechr))
outfile.close()
if output_tree_file:
outfile = open(output_tree_file, "w")
outfile.write(T)
outfile.close()
else:
return T
25 changes: 20 additions & 5 deletions tests/utils/test_phylogenetic_tree_editing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@

from moonstone.utils.phylogenetic_tree_editing import (
generate_translation_dictionary,
replacing_labels
replacing_labels,
adapt_phylogenetic_tree_to_counts_df
)


class TestPhylogeneticTreeAdaptation(TestCase):
def test_generate_translation_dictionary(self):
count_df = pd.DataFrame(
def setUp(self):
self.count_df = pd.DataFrame(
[
['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillales (order)', 'Lactobacillales (order)', 'Lactobacillales (order)', 186826, 4.3], # noqa
['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_jensenii', 109790, 1.0], # noqa
Expand All @@ -21,12 +22,14 @@ def test_generate_translation_dictionary(self):
'NCBI_taxonomy_ID', 'SAMPLE_1'
]
)
count_df = count_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'])
self.count_df = self.count_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'])

def test_generate_translation_dictionary(self):
expected_dict = {
'109790': 'Lactobacillus_jensenii',
'147802': 'Lactobacillus_iners',
}
tested_dict = generate_translation_dictionary(count_df['NCBI_taxonomy_ID'])
tested_dict = generate_translation_dictionary(self.count_df['NCBI_taxonomy_ID'])
self.assertDictEqual(tested_dict, expected_dict)

def test_replacing_labels(self):
Expand All @@ -50,3 +53,15 @@ def test_replacing_labels(self):
('Alloprevotella_Prevotella sp. oral taxon 473':0.5,\
'Enterococcus lactis, 357441':0.05):1)root;\n"
self.assertEqual(tested_string, expected_string)

def test_adapt_phylogenetic_tree_to_counts_df(self):
tree_string = "((('Lactobacillus jensenii, 109790':0.35,\
'Lactobacillus iners, 147802':0.15):0.75,\
'Lactobacillus ruminis CAG:367, 1263085*':1)root;\n"
tested_string = adapt_phylogenetic_tree_to_counts_df(
self.count_df['NCBI_taxonomy_ID'], tree_string
)
expected_string = "((('Lactobacillus_jensenii':0.35,\
'Lactobacillus_iners':0.15):0.75,\
'Lactobacillus ruminis CAG:367, 1263085*':1)root;\n"
self.assertEqual(tested_string, expected_string)

0 comments on commit 63276d4

Please sign in to comment.