From 608af996a929dc5cdade5f249ce644eb9571e7f7 Mon Sep 17 00:00:00 2001 From: Agnes BAUD Date: Mon, 20 Feb 2023 15:06:02 +0100 Subject: [PATCH 01/17] error name property --- moonstone/plot/graphs/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/moonstone/plot/graphs/base.py b/moonstone/plot/graphs/base.py index 511a9f1..20cedd3 100644 --- a/moonstone/plot/graphs/base.py +++ b/moonstone/plot/graphs/base.py @@ -247,7 +247,7 @@ def plot_one_graph( fig, filtered_df2[group_col], filtered_df2[data_col], - names[group], + str(names[group]), filtered_df.index, self._get_group_color(group, colors), orientation, @@ -265,7 +265,7 @@ def plot_one_graph( fig, filtered_df[group_col], filtered_df[data_col], - names[group], + str(names[group]), filtered_df.index, self._get_group_color(group, colors), orientation, From 3c8b8dc87e2dfc77d665c7d878650dd68e369609 Mon Sep 17 00:00:00 2001 From: Agnes BAUD Date: Tue, 21 Feb 2023 14:26:56 +0100 Subject: [PATCH 02/17] error unexpected arg read_excel --- moonstone/analysis/diversity/base.py | 2 +- moonstone/parsers/base.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/moonstone/analysis/diversity/base.py b/moonstone/analysis/diversity/base.py index 62f191a..3243194 100644 --- a/moonstone/analysis/diversity/base.py +++ b/moonstone/analysis/diversity/base.py @@ -240,7 +240,7 @@ def _compute_pval_inside_subgroups( self, diversity_index_dataframe: pd.DataFrame, group_col: str, final_group_col: str, stats_test: str, correction_method: str, structure_pval: str, sym: bool ): - pval = pd.Series([]) + pval = pd.Series([], dtype='float64') for g in diversity_index_dataframe[group_col].dropna().unique(): df_gp = diversity_index_dataframe[diversity_index_dataframe[group_col] == g] if df_gp.shape[0] < 2: diff --git a/moonstone/parsers/base.py b/moonstone/parsers/base.py index d6487c8..c270bb3 100644 --- a/moonstone/parsers/base.py +++ b/moonstone/parsers/base.py @@ -53,7 +53,7 @@ def _load_data(self) -> pd.DataFrame: } if ext in ext_engine.keys(): return pd.read_excel( - self.file_path, sep=self.sep, header=self.header, **self.parsing_options, + self.file_path, header=self.header, **self.parsing_options, engine=ext_engine[ext] ) return pd.read_csv( From 6bf11d7b7fe6d340a45aae67d6dcaab0f0641aeb Mon Sep 17 00:00:00 2001 From: Agnes BAUD Date: Mon, 3 Apr 2023 15:12:47 +0200 Subject: [PATCH 03/17] allow to keep NCBI_tax_id in metaphlan3 parser etc --- .../parsers/counts/taxonomy/metaphlan.py | 37 +++++++++++++++++-- moonstone/plot/counts.py | 2 +- tests/analysis/diversity/test_beta.py | 2 +- .../taxonomy/metaphlan3/test_metaphlan3.py | 28 +++++++++++++- .../counts/taxonomy/test_base_metaphlan.py | 8 ++++ 5 files changed, 69 insertions(+), 8 deletions(-) diff --git a/moonstone/parsers/counts/taxonomy/metaphlan.py b/moonstone/parsers/counts/taxonomy/metaphlan.py index 60039da..6657c7e 100644 --- a/moonstone/parsers/counts/taxonomy/metaphlan.py +++ b/moonstone/parsers/counts/taxonomy/metaphlan.py @@ -1,18 +1,34 @@ +import logging + from pandas import DataFrame from moonstone.parsers.counts.taxonomy.base import BaseTaxonomyCountsParser +logger = logging.getLogger(__name__) + class BaseMetaphlanParser(BaseTaxonomyCountsParser): def __init__(self, *args, analysis_type: str = 'rel_ab', **kwargs): """ Args: - analysis_type: output type of Metaphlan3 (see ``-t`` option of metaphlan3) + analysis_type: output type of Metaphlan3 (see ``-t`` option of metaphlan3) + { 'rel_ab', 'rel_ab_w_read_stats', 'reads_map', 'clade_profiles', 'marker_ab_table', 'marker_counts', + 'marker_pres_table', 'clade_specific_strain_tracker' } """ - self.analysis_type = analysis_type + self.analysis_type = self._valid_analysis_type(analysis_type) super().__init__(*args, **kwargs) + def _valid_analysis_type(self, analysis_type): + choices = [ + "rel_ab", "rel_ab_w_read_stats", "reads_map", "clade_profiles", + "marker_ab_table", "marker_counts", "marker_pres_table", "clade_specific_strain_tracker" + ] + if analysis_type not in choices: + logger.warning("analysis_type='%s' not valid, set to default ('rel_ab').", analysis_type) + analysis_type = "rel_ab" + return analysis_type + def rows_differences(self, dataframe1, dataframe2) -> DataFrame: rows_diff = dataframe1 - dataframe2 rows_diff[rows_diff.isnull()] = dataframe1 @@ -88,17 +104,30 @@ class Metaphlan3Parser(BaseMetaphlanParser): taxa_column = 'clade_name' NCBI_tax_column = 'NCBI_tax_id' - def __init__(self, *args, analysis_type: str = 'rel_ab', **kwargs): + def __init__(self, *args, analysis_type: str = 'rel_ab', keep_NCBI_tax_col: bool = False, **kwargs): """ Args: analysis_type: output type of Metaphlan3 (see ``-t`` option of metaphlan3) + { 'rel_ab', 'rel_ab_w_read_stats', 'reads_map', 'clade_profiles', 'marker_ab_table', 'marker_counts', + 'marker_pres_table', 'clade_specific_strain_tracker' } + keep_NCBI_tax_col: set to True if you want the NCBI tax column in the returned dataframe. """ + self.keep_NCBI_tax_col = keep_NCBI_tax_col super().__init__(*args, analysis_type=analysis_type, parsing_options={'skiprows': 1}, **kwargs) def _load_data(self) -> DataFrame: df = super()._load_data() - df = df.drop(self.NCBI_tax_column, axis=1) + + if self.keep_NCBI_tax_col: + tmp = df[[self.NCBI_tax_column, self.taxa_column]] + + df = df.drop(self.NCBI_tax_column, axis=1) # NCBI_tax_column needs to be dropped because sum df = self.remove_duplicates(df) + + if self.keep_NCBI_tax_col: + tmp[self.NCBI_tax_column] = tmp[self.NCBI_tax_column].map(lambda x: x.split("|")[-1]) + df = df.merge(tmp) + df = self.split_taxa_fill_none(df, sep="|") df = df.set_index(self.taxonomical_names[:self.rank_level]) return df diff --git a/moonstone/plot/counts.py b/moonstone/plot/counts.py index ac0d67d..cd0f4ac 100644 --- a/moonstone/plot/counts.py +++ b/moonstone/plot/counts.py @@ -747,7 +747,7 @@ def plot_sample_composition_most_abundant_taxa( # Make graph graph = MatrixBarGraph(data_df) # Plotting options - title = f"{taxa_level.capitalize()} composition for the top {taxa_number} most abundant species across samples" + title = f"{taxa_level.capitalize()} composition for the top {taxa_number} most abundant {taxa_level} across samples" if prevalence_threshold is not None: title += f" (present in at least {prevalence_threshold}% of samples)" diff --git a/tests/analysis/diversity/test_beta.py b/tests/analysis/diversity/test_beta.py index 89c25a0..93eb349 100644 --- a/tests/analysis/diversity/test_beta.py +++ b/tests/analysis/diversity/test_beta.py @@ -82,7 +82,7 @@ def test_run_statistical_test_groups_with_NaN(self): 'samples14': [9.35, 'A'], 'samples15': [7.89, 'A'], 'samples16': [4.65, 'C'], - 'samples17': [8.90, 'D'], + 'samples17': [8.90, 'D'], # only 1 sample from group D < 5 required to do ttest-independence 'samples18': [2.33, 'C'], 'samples19': [1.34, 'B'], 'samples20': [6.87, 'C'] diff --git a/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py b/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py index 6dd53dc..df51815 100644 --- a/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py +++ b/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py @@ -9,8 +9,8 @@ class TestMetaphlan2Parser(TestCase): def setUp(self): - input_path = os.path.join(os.path.dirname(__file__), 'input.tsv') - self.meta2parser = Metaphlan3Parser(input_path, analysis_type='marker_counts') + self.input_path = os.path.join(os.path.dirname(__file__), 'input.tsv') + self.meta2parser = Metaphlan3Parser(self.input_path, analysis_type='marker_counts') def test_to_dataframe(self): """ @@ -33,3 +33,27 @@ def test_to_dataframe(self): ) expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']) pd.testing.assert_frame_equal(self.meta2parser.dataframe, expected_df, check_like=True) + + def test_to_dataframe_keep_NCBI_tax_col(self): + """ + Test based on input.tsv file + """ + meta2parser = Metaphlan3Parser(self.input_path, analysis_type='rel_ab', keep_NCBI_tax_col=True) + expected_df = pd.DataFrame( + [ + ['Bacteria', 'Actinobacteria', 'Actinobacteria', 'Actinomycetales', 'Actinomycetaceae', 'Actinobaculum', + 'Actinobaculum_massiliense', 1.0, 2.0, '461393'], + ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillaceae', 'Lactobacillus', + 'Lactobacillus (genus)', 3.2, 8.0, '1632'], + ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Streptococcaceae', 'Streptococcus', + 'Streptococcus (genus)', 1.3, 0.4, '1301'], + ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Streptococcaceae', 'Streptococcus', + 'Streptococcus_thermophilus', 1.7, 0.7, '1308'], + ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Streptococcaceae', 'Streptococcus', + 'Streptococcus_salivarius', 3.3, 1.2, '1304'] + ], + columns=['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'SAMPLE_1', 'SAMPLE_2', 'NCBI_tax_id'] + ) + expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']) + observed_df = meta2parser.dataframe + pd.testing.assert_frame_equal(observed_df, expected_df, check_like=True) \ No newline at end of file diff --git a/tests/parsers/counts/taxonomy/test_base_metaphlan.py b/tests/parsers/counts/taxonomy/test_base_metaphlan.py index 3062afd..9ffe312 100644 --- a/tests/parsers/counts/taxonomy/test_base_metaphlan.py +++ b/tests/parsers/counts/taxonomy/test_base_metaphlan.py @@ -266,3 +266,11 @@ def test_remove_duplicates_rel_ab_addition_error_margin(self): observed_df = self.base_metaphlan_parser.remove_duplicates(tested_df) pd.testing.assert_frame_equal(observed_df, expected_df, check_like=True) + + def test_valid_analysis_type(self): + with self.assertLogs('moonstone.parsers.counts.taxonomy.metaphlan', level='WARNING') as log: + tested_object_instance = BaseMetaphlanParser("file", analysis_type="INVALID ANALYSIS TYPE") + self.assertEqual(len(log.output), 1) + self.assertIn("WARNING:moonstone.parsers.counts.taxonomy.metaphlan:analysis_type='INVALID ANALYSIS TYPE' not valid, \ +set to default ('rel_ab').", log.output) + self.assertEqual(tested_object_instance.analysis_type, 'rel_ab') \ No newline at end of file From 7c69c352d133c7a60ba88c2d2b026ea96f15ace5 Mon Sep 17 00:00:00 2001 From: Agnes BAUD Date: Mon, 3 Apr 2023 15:19:37 +0200 Subject: [PATCH 04/17] flake8 --- moonstone/parsers/counts/taxonomy/metaphlan.py | 6 +++--- moonstone/plot/counts.py | 3 ++- tests/parsers/counts/taxonomy/kraken2/test_kraken2.py | 2 +- tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py | 5 +++-- tests/parsers/counts/taxonomy/test_base_metaphlan.py | 2 +- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/moonstone/parsers/counts/taxonomy/metaphlan.py b/moonstone/parsers/counts/taxonomy/metaphlan.py index 6657c7e..1b63fda 100644 --- a/moonstone/parsers/counts/taxonomy/metaphlan.py +++ b/moonstone/parsers/counts/taxonomy/metaphlan.py @@ -12,8 +12,8 @@ class BaseMetaphlanParser(BaseTaxonomyCountsParser): def __init__(self, *args, analysis_type: str = 'rel_ab', **kwargs): """ Args: - analysis_type: output type of Metaphlan3 (see ``-t`` option of metaphlan3) - { 'rel_ab', 'rel_ab_w_read_stats', 'reads_map', 'clade_profiles', 'marker_ab_table', 'marker_counts', + analysis_type: output type of Metaphlan3 (see ``-t`` option of metaphlan3) + { 'rel_ab', 'rel_ab_w_read_stats', 'reads_map', 'clade_profiles', 'marker_ab_table', 'marker_counts', 'marker_pres_table', 'clade_specific_strain_tracker' } """ self.analysis_type = self._valid_analysis_type(analysis_type) @@ -108,7 +108,7 @@ def __init__(self, *args, analysis_type: str = 'rel_ab', keep_NCBI_tax_col: bool """ Args: analysis_type: output type of Metaphlan3 (see ``-t`` option of metaphlan3) - { 'rel_ab', 'rel_ab_w_read_stats', 'reads_map', 'clade_profiles', 'marker_ab_table', 'marker_counts', + { 'rel_ab', 'rel_ab_w_read_stats', 'reads_map', 'clade_profiles', 'marker_ab_table', 'marker_counts', 'marker_pres_table', 'clade_specific_strain_tracker' } keep_NCBI_tax_col: set to True if you want the NCBI tax column in the returned dataframe. """ diff --git a/moonstone/plot/counts.py b/moonstone/plot/counts.py index cd0f4ac..d028dd9 100644 --- a/moonstone/plot/counts.py +++ b/moonstone/plot/counts.py @@ -747,7 +747,8 @@ def plot_sample_composition_most_abundant_taxa( # Make graph graph = MatrixBarGraph(data_df) # Plotting options - title = f"{taxa_level.capitalize()} composition for the top {taxa_number} most abundant {taxa_level} across samples" + title = f"{taxa_level.capitalize()} composition for the top {taxa_number} most abundant {taxa_level} across \ +samples" if prevalence_threshold is not None: title += f" (present in at least {prevalence_threshold}% of samples)" diff --git a/tests/parsers/counts/taxonomy/kraken2/test_kraken2.py b/tests/parsers/counts/taxonomy/kraken2/test_kraken2.py index acf1acb..52c8aeb 100644 --- a/tests/parsers/counts/taxonomy/kraken2/test_kraken2.py +++ b/tests/parsers/counts/taxonomy/kraken2/test_kraken2.py @@ -88,4 +88,4 @@ def test_to_dataframe_ods(self): ] ) expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']) - pd.testing.assert_frame_equal(sunbeamkraken2parser.dataframe, expected_df) \ No newline at end of file + pd.testing.assert_frame_equal(sunbeamkraken2parser.dataframe, expected_df) diff --git a/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py b/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py index df51815..000160a 100644 --- a/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py +++ b/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py @@ -52,8 +52,9 @@ def test_to_dataframe_keep_NCBI_tax_col(self): ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Streptococcaceae', 'Streptococcus', 'Streptococcus_salivarius', 3.3, 1.2, '1304'] ], - columns=['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'SAMPLE_1', 'SAMPLE_2', 'NCBI_tax_id'] + columns=['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'SAMPLE_1', 'SAMPLE_2', + 'NCBI_tax_id'] ) expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']) observed_df = meta2parser.dataframe - pd.testing.assert_frame_equal(observed_df, expected_df, check_like=True) \ No newline at end of file + pd.testing.assert_frame_equal(observed_df, expected_df, check_like=True) diff --git a/tests/parsers/counts/taxonomy/test_base_metaphlan.py b/tests/parsers/counts/taxonomy/test_base_metaphlan.py index 9ffe312..40f58f8 100644 --- a/tests/parsers/counts/taxonomy/test_base_metaphlan.py +++ b/tests/parsers/counts/taxonomy/test_base_metaphlan.py @@ -273,4 +273,4 @@ def test_valid_analysis_type(self): self.assertEqual(len(log.output), 1) self.assertIn("WARNING:moonstone.parsers.counts.taxonomy.metaphlan:analysis_type='INVALID ANALYSIS TYPE' not valid, \ set to default ('rel_ab').", log.output) - self.assertEqual(tested_object_instance.analysis_type, 'rel_ab') \ No newline at end of file + self.assertEqual(tested_object_instance.analysis_type, 'rel_ab') From 2d2d29c8279f0e9ded663b0efe57189a42e189d7 Mon Sep 17 00:00:00 2001 From: Agnes BAUD Date: Mon, 3 Apr 2023 18:42:23 +0200 Subject: [PATCH 05/17] commenting code in metaphlan parser --- .../parsers/counts/taxonomy/metaphlan.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/moonstone/parsers/counts/taxonomy/metaphlan.py b/moonstone/parsers/counts/taxonomy/metaphlan.py index 1b63fda..2e6b21e 100644 --- a/moonstone/parsers/counts/taxonomy/metaphlan.py +++ b/moonstone/parsers/counts/taxonomy/metaphlan.py @@ -33,7 +33,9 @@ def rows_differences(self, dataframe1, dataframe2) -> DataFrame: rows_diff = dataframe1 - dataframe2 rows_diff[rows_diff.isnull()] = dataframe1 if self.analysis_type == 'rel_ab': - rows_diff[rows_diff < 0.0001] = 0 + rows_diff[rows_diff < 0.0001] = 0 # if difference between sum of organism of rank r (ex: sum of species of genus X) + # and value of rank r+1 (ex:genus X) is so small, + # we assume that it's due to python addition approximation with decimal else: rows_diff[rows_diff < 0] = 0 rows_diff = rows_diff.loc[rows_diff.sum(axis=1)[rows_diff.sum(axis=1) != 0].index] @@ -49,6 +51,21 @@ def compare_difference_between_two_levels(self, whole_df, df_at_lower_level, ran return self.rows_differences(df_rank, df_rank_computed) def remove_duplicates(self, df) -> DataFrame: + """ + Metaphlan3 results are by level therefore we need to remove the duplicated informations + Example: + We have: + ...|g_GenusA 50.0 + ...|g_GenusA|s_Species1 30.0 + ...|g_GenusB 50.0 + ...|g_GenusB|s_Species2 50.0 + Sum = 180.0 =/= 100.0 (while it's relative abundance -> but same problem with other analysis type) + We want: + ...|g_GenusA|s_GenusA (genus) 20.0 # unspecified species + ...|g_GenusA|s_Species1 30.0 + ...|g_GenusB|s_Species2 50.0 + Sum = 100.0 + """ df = df.set_index(self.taxa_column) # dataframe at rank level From f6c17a55111fed294d1111e39e723fadf80d1f49 Mon Sep 17 00:00:00 2001 From: Agnes BAUD Date: Fri, 12 May 2023 18:28:50 +0200 Subject: [PATCH 06/17] allow to ask for less taxonomical names --- .../parsers/counts/taxonomy/metaphlan.py | 10 +++++++- .../counts/taxonomy/metaphlan3/input.tsv | 4 ++-- .../taxonomy/metaphlan3/test_metaphlan3.py | 24 ++++++++++++++++++- 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/moonstone/parsers/counts/taxonomy/metaphlan.py b/moonstone/parsers/counts/taxonomy/metaphlan.py index 2e6b21e..d2ec7ec 100644 --- a/moonstone/parsers/counts/taxonomy/metaphlan.py +++ b/moonstone/parsers/counts/taxonomy/metaphlan.py @@ -33,7 +33,8 @@ def rows_differences(self, dataframe1, dataframe2) -> DataFrame: rows_diff = dataframe1 - dataframe2 rows_diff[rows_diff.isnull()] = dataframe1 if self.analysis_type == 'rel_ab': - rows_diff[rows_diff < 0.0001] = 0 # if difference between sum of organism of rank r (ex: sum of species of genus X) + rows_diff[rows_diff < 0.0001] = 0 + # if difference between sum of organism of rank r (ex: sum of species of genus X) # and value of rank r+1 (ex:genus X) is so small, # we assume that it's due to python addition approximation with decimal else: @@ -135,6 +136,13 @@ def __init__(self, *args, analysis_type: str = 'rel_ab', keep_NCBI_tax_col: bool def _load_data(self) -> DataFrame: df = super()._load_data() + # if number of taxonomical_names is inferior to the default, + if len(self.taxonomical_names) < len(BaseTaxonomyCountsParser.taxonomical_names): + # we need to restrict the rows considered to only the rows that recount taxonomical level inside the range + # wanted. + # Or error "ValueError: Error : expecting a integer inferior or equal to the number of taxonomical_names." + # will be raised + df = df[df["NCBI_tax_id"].map(lambda x: len(x.split("|"))) <= len(self.taxonomical_names)] if self.keep_NCBI_tax_col: tmp = df[[self.NCBI_tax_column, self.taxa_column]] diff --git a/tests/parsers/counts/taxonomy/metaphlan3/input.tsv b/tests/parsers/counts/taxonomy/metaphlan3/input.tsv index d1ffdab..e7995ad 100644 --- a/tests/parsers/counts/taxonomy/metaphlan3/input.tsv +++ b/tests/parsers/counts/taxonomy/metaphlan3/input.tsv @@ -8,8 +8,8 @@ k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomyce k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinobaculum 2|201174|1760|2037|2049|76833 1.0 2.0 k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinobaculum|s__Actinobaculum_massiliense 2|201174|1760|2037|2049|1654|461393 1.0 2.0 k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales 2|1239|91061|186826 9.5 10.3 -k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Lactobacillaceae 2|1239|91061|186826|33958|1578 3.2 8.0 -k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Lactobacillaceae|g__Lactobacillus 2|1239|91061|186826|33958|1578|1632 3.2 8.0 +k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Lactobacillaceae 2|1239|91061|186826|33958 3.2 8.0 +k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Lactobacillaceae|g__Lactobacillus 2|1239|91061|186826|33958|1578 3.2 8.0 k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Streptococcaceae 2|1239|91061|186826|1300 6.3 2.3 k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus 2|1239|91061|186826|1300|1301 6.3 2.3 k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus|s__Streptococcus_thermophilus 2|1239|91061|186826|1300|1301|1308 1.7 0.7 diff --git a/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py b/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py index 000160a..029e320 100644 --- a/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py +++ b/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py @@ -44,7 +44,7 @@ def test_to_dataframe_keep_NCBI_tax_col(self): ['Bacteria', 'Actinobacteria', 'Actinobacteria', 'Actinomycetales', 'Actinomycetaceae', 'Actinobaculum', 'Actinobaculum_massiliense', 1.0, 2.0, '461393'], ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillaceae', 'Lactobacillus', - 'Lactobacillus (genus)', 3.2, 8.0, '1632'], + 'Lactobacillus (genus)', 3.2, 8.0, '1578'], ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Streptococcaceae', 'Streptococcus', 'Streptococcus (genus)', 1.3, 0.4, '1301'], ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Streptococcaceae', 'Streptococcus', @@ -58,3 +58,25 @@ def test_to_dataframe_keep_NCBI_tax_col(self): expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']) observed_df = meta2parser.dataframe pd.testing.assert_frame_equal(observed_df, expected_df, check_like=True) + + def test_to_dataframe_less_taxonomical_names(self): + """ + Test based on input.tsv file + """ + meta2parser = Metaphlan3Parser(self.input_path, analysis_type='rel_ab', keep_NCBI_tax_col=True) + meta2parser.taxonomical_names = ['kingdom', 'phylum', 'class', 'order', 'family', 'genus'] + expected_df = pd.DataFrame( + [ + ['Bacteria', 'Actinobacteria', 'Actinobacteria', 'Actinomycetales', 'Actinomycetaceae', 'Actinobaculum', + 1.0, 2.0, '76833'], + ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillaceae', 'Lactobacillus', + 3.2, 8.0, '1578'], + ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Streptococcaceae', 'Streptococcus', + 6.3, 2.3, '1301'], + ], + columns=['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'SAMPLE_1', 'SAMPLE_2', + 'NCBI_tax_id'] + ) + expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus']) + observed_df = meta2parser.dataframe + pd.testing.assert_frame_equal(observed_df, expected_df, check_like=True) From 95b2acfd42226aab9c370db6a5ff825be65aaef0 Mon Sep 17 00:00:00 2001 From: Agnes BAUD Date: Wed, 7 Jun 2023 15:01:27 +0200 Subject: [PATCH 07/17] 1st try debugging numpy/scikit-bio error --- .github/workflows/python-package.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 4e52bcb..7d24e68 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -25,6 +25,7 @@ jobs: python -m pip install --upgrade pip pip install flake8 pytest pip install numpy==1.18.1 + pip install scikit-bio==0.5.6 pip install . pip install odfpy # optional dependencies pip install openpyxl # idem From 058b8e42327bf6b56a16b4b242c9d11cc2cb7b97 Mon Sep 17 00:00:00 2001 From: Agnes BAUD Date: Wed, 7 Jun 2023 15:32:02 +0200 Subject: [PATCH 08/17] 2nd try debugging numpy/scikit-bio error --- .github/workflows/python-package.yml | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 7d24e68..9966157 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -22,14 +22,13 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip - pip install flake8 pytest - pip install numpy==1.18.1 - pip install scikit-bio==0.5.6 - pip install . - pip install odfpy # optional dependencies - pip install openpyxl # idem - pip install xlrd # idem + python -m pip3 install --upgrade pip3 + pip3 install flake8 pytest + pip3 install numpy==1.18.1 + pip3 install . + pip3 install odfpy # optional dependencies + pip3 install openpyxl # idem + pip3 install xlrd # idem - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names From 3c3193bb426673c985346a44aa6c1a7e50a63ca8 Mon Sep 17 00:00:00 2001 From: Agnes BAUD Date: Tue, 10 Oct 2023 17:11:09 +0200 Subject: [PATCH 09/17] updating pd np scikit-bio --- .github/workflows/coverage.yml | 4 +- .github/workflows/python-package.yml | 4 +- .github/workflows/python-publish.yml | 2 +- moonstone/analysis/diversity/base.py | 22 ++-- moonstone/analysis/statistical_test.py | 9 ++ moonstone/parsers/base.py | 2 + .../parsers/counts/taxonomy/metaphlan.py | 16 +-- moonstone/plot/counts.py | 3 +- moonstone/plot/graphs/base.py | 2 +- moonstone/utils/df_merge.py | 14 ++- moonstone/utils/df_reindex.py | 24 +++- moonstone/utils/taxonomy.py | 2 +- requirements.txt | 6 +- setup.py | 6 +- tests/analysis/diversity/test_beta.py | 14 +-- .../counts/taxonomy/test_base_metaphlan.py | 4 +- tests/plot/test_counts.py | 2 +- tests/utils/pandas/test_series.py | 3 +- tests/utils/test_df_merge.py | 41 ++++++- tests/utils/test_df_reindex.py | 107 +++++++++++++++++- 20 files changed, 221 insertions(+), 66 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 68b83ed..2afda4d 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7] + python-version: [3.9] steps: - uses: actions/checkout@v2 @@ -21,7 +21,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pytest pytest-cov - pip install numpy==1.18.1 + pip install numpy==1.24.3 pip install . pip install odfpy # optional dependencies pip install openpyxl # idem diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 9966157..61b30ae 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.7", "3.8"] + python-version: ["3.8", "3.9"] steps: - uses: actions/checkout@v2 @@ -24,7 +24,7 @@ jobs: run: | python -m pip3 install --upgrade pip3 pip3 install flake8 pytest - pip3 install numpy==1.18.1 + pip3 install numpy==1.24.3 pip3 install . pip3 install odfpy # optional dependencies pip3 install openpyxl # idem diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index d4ba985..92ea8e9 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -17,7 +17,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: '3.7' + python-version: '3.9' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/moonstone/analysis/diversity/base.py b/moonstone/analysis/diversity/base.py index 3243194..97bfa1c 100644 --- a/moonstone/analysis/diversity/base.py +++ b/moonstone/analysis/diversity/base.py @@ -194,8 +194,8 @@ def _run_statistical_test_groups( corrected_pval.index = pval.dropna().index # postulate that the order hasn't changed if pval[pval.isnull()].size > 0: - corrected_pval = corrected_pval.append(pval[pval.isnull()]) - + # corrected_pval = corrected_pval.append(pval[pval.isnull()]) + corrected_pval = pd.concat([corrected_pval, pval[pval.isnull()]]) # remodelling of p-values output corrected_pval = self._structure_remodelling(corrected_pval, structure=structure_pval, sym=sym) return corrected_pval @@ -248,10 +248,13 @@ def _compute_pval_inside_subgroups( f"Less than 2 samples in dataframe group {g} in data. P-val can't be computed." ) else: - pval = pval.append(self._run_statistical_test_groups( - df_gp, final_group_col, stats_test, - correction_method, structure_pval, sym - )) + pval = pd.concat([ + pval, + self._run_statistical_test_groups( + df_gp, final_group_col, stats_test, + correction_method, structure_pval, sym + ) + ]) pval.index = pd.MultiIndex.from_tuples(pval.index, names=('Group1', 'Group2')) return pval @@ -317,12 +320,13 @@ def analyse_groups( df, group_col, final_group_col, stats_test, correction_method, structure_pval, sym ) if pval_to_compute == "same group_col or group_col2 values": - pval = pval.append( + pval = pd.concat([ + pval, self._compute_pval_inside_subgroups( df, group_col2, final_group_col, stats_test, correction_method, structure_pval, sym ) - ) + ]) else: df = self._get_grouped_df(filtered_metadata_df[group_col]) @@ -359,7 +363,7 @@ def analyse_groups( # 'data' different from 'diversity indexes' in the fact that it has been filtered on metadata, meaning that # samples without metadata for group_col (or group_col2) have been dropped - return{**{'data': df}, **self.report_data['analyse_groups']} + return {**{'data': df}, **self.report_data['analyse_groups']} def generate_report_data(self) -> dict: """ diff --git a/moonstone/analysis/statistical_test.py b/moonstone/analysis/statistical_test.py index 994b436..c1b2ffb 100644 --- a/moonstone/analysis/statistical_test.py +++ b/moonstone/analysis/statistical_test.py @@ -14,6 +14,15 @@ def _preprocess_groups_comparison( series: pd.Series, group_series: pd.Series, stat_test: str ): + # If samples in group_series/metadata but not in series/count_dataframe + # then we need to remove them from the group_series/metadata + # to not get an error like "None of [Index(['sample7'], dtype='object')] are in the [index]" + group_series_index_to_keep = group_series.index.intersection(series.index) + if len(group_series_index_to_keep) != len(group_series.index): + logger.info( + "Some index values in group_series aren't found in the series. Dropping those rows." + ) + group_series = group_series.loc[group_series_index_to_keep] groups = list(group_series.unique()) groups.sort() diff --git a/moonstone/parsers/base.py b/moonstone/parsers/base.py index c270bb3..323d111 100644 --- a/moonstone/parsers/base.py +++ b/moonstone/parsers/base.py @@ -52,6 +52,8 @@ def _load_data(self) -> pd.DataFrame: "xlsb": "pyxlsb" # Binary Excel files } if ext in ext_engine.keys(): + if self.header == "infer": + self.header = 0 # "infer" not accepted with read_excel anymore return pd.read_excel( self.file_path, header=self.header, **self.parsing_options, engine=ext_engine[ext] diff --git a/moonstone/parsers/counts/taxonomy/metaphlan.py b/moonstone/parsers/counts/taxonomy/metaphlan.py index d2ec7ec..da13136 100644 --- a/moonstone/parsers/counts/taxonomy/metaphlan.py +++ b/moonstone/parsers/counts/taxonomy/metaphlan.py @@ -1,6 +1,6 @@ import logging -from pandas import DataFrame +import pandas as pd from moonstone.parsers.counts.taxonomy.base import BaseTaxonomyCountsParser @@ -29,7 +29,7 @@ def _valid_analysis_type(self, analysis_type): analysis_type = "rel_ab" return analysis_type - def rows_differences(self, dataframe1, dataframe2) -> DataFrame: + def rows_differences(self, dataframe1, dataframe2) -> pd.DataFrame: rows_diff = dataframe1 - dataframe2 rows_diff[rows_diff.isnull()] = dataframe1 if self.analysis_type == 'rel_ab': @@ -42,7 +42,7 @@ def rows_differences(self, dataframe1, dataframe2) -> DataFrame: rows_diff = rows_diff.loc[rows_diff.sum(axis=1)[rows_diff.sum(axis=1) != 0].index] return rows_diff - def compare_difference_between_two_levels(self, whole_df, df_at_lower_level, rank) -> DataFrame: + def compare_difference_between_two_levels(self, whole_df, df_at_lower_level, rank) -> pd.DataFrame: df_rank = whole_df[whole_df.index.map(lambda x: len(x.split('|'))) == rank] # transformation lower_level to rank (level) @@ -51,7 +51,7 @@ def compare_difference_between_two_levels(self, whole_df, df_at_lower_level, ran df_rank_computed = df_rank_computed.groupby(df_rank_computed.index).sum() # grouping by rank (level) return self.rows_differences(df_rank, df_rank_computed) - def remove_duplicates(self, df) -> DataFrame: + def remove_duplicates(self, df) -> pd.DataFrame: """ Metaphlan3 results are by level therefore we need to remove the duplicated informations Example: @@ -90,8 +90,8 @@ def remove_duplicates(self, df) -> DataFrame: rank -= 1 rows_diff = self.compare_difference_between_two_levels(df, new_df, rank) if rows_diff.size != 0: - new_df = new_df.append(rows_diff) # add missing rows to the dataframe of the lower level - + # new_df = new_df.append(rows_diff) # add missing rows to the dataframe of the lower level + new_df = pd.concat([new_df, rows_diff]) # add missing rows to the dataframe of the lower level # verification that everything is defined up to the lower_level samples_with_incomp_lowerlevel = new_df.sum()[new_df.sum() < total] @@ -106,7 +106,7 @@ class Metaphlan2Parser(BaseMetaphlanParser): taxa_column = 'ID' - def _load_data(self) -> DataFrame: + def _load_data(self) -> pd.DataFrame: df = super()._load_data() df = self.remove_duplicates(df) df = self.split_taxa_fill_none(df, sep="|") @@ -133,7 +133,7 @@ def __init__(self, *args, analysis_type: str = 'rel_ab', keep_NCBI_tax_col: bool self.keep_NCBI_tax_col = keep_NCBI_tax_col super().__init__(*args, analysis_type=analysis_type, parsing_options={'skiprows': 1}, **kwargs) - def _load_data(self) -> DataFrame: + def _load_data(self) -> pd.DataFrame: df = super()._load_data() # if number of taxonomical_names is inferior to the default, diff --git a/moonstone/plot/counts.py b/moonstone/plot/counts.py index d028dd9..c31b364 100644 --- a/moonstone/plot/counts.py +++ b/moonstone/plot/counts.py @@ -456,7 +456,8 @@ def _plot_most_what_taxa_boxplot_or_violin( tmp = relab_df_taxa[i].reset_index() tmp.index = nb * [i] tmp.columns = ["species", "relative abundance"] - relab_df_taxa2 = relab_df_taxa2.append(tmp) + # relab_df_taxa2 = relab_df_taxa2.append(tmp) + relab_df_taxa2 = pd.concat([relab_df_taxa2, tmp]) relab_df_taxa2.species = relab_df_taxa2.species.apply(self._italicize_taxa_name) groups = [self._italicize_taxa_name(name) for name in groups] diff --git a/moonstone/plot/graphs/base.py b/moonstone/plot/graphs/base.py index 20cedd3..5183fdc 100644 --- a/moonstone/plot/graphs/base.py +++ b/moonstone/plot/graphs/base.py @@ -236,7 +236,7 @@ def plot_one_graph( if groups: filtered_df = self.data[self.data[group_col].isin(groups)] filtered_df[group_col] = filtered_df[group_col].astype("category") - filtered_df[group_col].cat.set_categories(groups, inplace=True) + filtered_df[group_col].cat = filtered_df[group_col].cat.set_categories(groups) filtered_df = filtered_df.sort_values([group_col]) else: filtered_df = copy.deepcopy(self.data) diff --git a/moonstone/utils/df_merge.py b/moonstone/utils/df_merge.py index dae1055..400e9c4 100644 --- a/moonstone/utils/df_merge.py +++ b/moonstone/utils/df_merge.py @@ -1,6 +1,5 @@ import logging import pandas as pd -import numpy as np logger = logging.getLogger(__name__) @@ -26,10 +25,15 @@ def merge(self): logger.info('Merge function called to merge count data and metadata.') logger.info(f'Variable {self.variable} from metadata file will be merged with counts.') - if not isinstance(self.dc.index, type(self.dm.index)): - logger.warning(f'Index types do not match: {type(self.dc.index)} and {type(self.dm.index)}.') - self.dc.set_index(np.int64(np.array(self.dc.index)), inplace=True) - logger.info(f' Indexes reset. Count Index={type(self.dc.index)}, Metadata Index={type(self.dm.index)}') + # if not isinstance(self.dc.index, type(self.dm.index)): + if self.dc.index.dtype != self.dm.index.dtype: + # logger.warning(f'Index types do not match: {type(self.dc.index)} and {type(self.dm.index)}.') + # self.dc = self.dc.set_index(np.int64(np.array(self.dc.index))) + # logger.info(f' Indexes reset. Count Index={type(self.dc.index)}, Metadata Index={type(self.dm.index)}') + logger.warning(f'Index types do not match: {self.dc.index.dtype} and {self.dm.index.dtype}.') + self.dc.index = self.dc.index.astype(str) + self.dm.index = self.dm.index.astype(str) + logger.warning('Both Count and Metadata Indexes set as string') df = pd.merge(self.dm[self.variable], self.dc, left_index=True, right_index=True) logger.info('Merge function completed. Returning merged data frame.') diff --git a/moonstone/utils/df_reindex.py b/moonstone/utils/df_reindex.py index dcdad70..225e530 100644 --- a/moonstone/utils/df_reindex.py +++ b/moonstone/utils/df_reindex.py @@ -22,12 +22,19 @@ def __init__(self, dataframe: Union[pd.Series, pd.DataFrame], self.taxonomy_df = taxonomy_dataframe self.taxa_column = taxa_column - def reindex_with_taxonomy(self, method: str = 'sum'): + def _sum_at_lowest_level(self, df): + df.index = df.index.to_flat_index() + df = df.groupby(level=0).sum() + df.index = pd.MultiIndex.from_tuples(df.index, names=self.taxonomical_names[:self._rank_level]) + return df + + def reindex_with_taxonomy(self, method: str = 'sum', na: str = 'drop'): """ reindexation on taxonomic information (if there are). :param method: how to combine genes' information of genes that have the same taxonomy. Choose 'sum' to sum the counts or 'count' to only have the number of genes with this taxonomy + :param na: {'drop' (default), 'keep', 'sum'} what to do with the genes with missing taxonomical information. NB: You can access the list of items without taxonomic information by checking the .without_info_index attributes @@ -48,16 +55,21 @@ def reindex_with_taxonomy(self, method: str = 'sum'): self.without_info_index = new_df['_merge'].loc[new_df['_merge'] == 'left_only'].index new_df = new_df.drop(['_merge'], axis=1) - new_df[self.taxa_column] = new_df[self.taxa_column].fillna(value='k__; p__; c__; o__; f__; g__; s__') + if na == 'drop': + new_df = new_df.dropna(subset=[self.taxa_column]) + elif na == 'keep': + new_df[self.taxa_column] = new_df[self.taxa_column].fillna( + 'k__; p__; c__; o__; f__; g__; s__'+new_df.index.to_series()+'_species' + ) + else: # na == 'sum' + new_df[self.taxa_column] = new_df[self.taxa_column].fillna(value='k__; p__; c__; o__; f__; g__; s__') new_df = self.split_taxa_fill_none(new_df, sep="; ", merge_genus_species=True) new_df = new_df.set_index(self.taxonomical_names[:self._rank_level]) if method == 'sum': - nb_levels = len(self.taxonomical_names[:self._rank_level]) - new_df = new_df.sum(level=list(range(nb_levels))) + new_df = self._sum_at_lowest_level(new_df) elif method == 'count': new_df[:] = np.where(new_df > 0, 1, 0) # presence/absence -> is > 0 then presence (1) else absence (0) - nb_levels = len(self.taxonomical_names[:self._rank_level]) - new_df = new_df.sum(level=list(range(nb_levels))) + new_df = self._sum_at_lowest_level(new_df) return new_df @property diff --git a/moonstone/utils/taxonomy.py b/moonstone/utils/taxonomy.py index 338862d..c24e66c 100644 --- a/moonstone/utils/taxonomy.py +++ b/moonstone/utils/taxonomy.py @@ -85,7 +85,7 @@ def remove_taxo_prefix(string): taxa_columns.columns = self.taxonomical_names[:self.rank_level] taxa_columns = taxa_columns.applymap(lambda x: remove_taxo_prefix(x)) if terms_to_remove is not None: - taxa_columns = taxa_columns.replace(terms_to_remove, np.nan) + taxa_columns = taxa_columns.replace(terms_to_remove, None) if merge_genus_species: taxa_columns = self._merge_genus_species(taxa_columns) taxa_columns = self._fill_none(taxa_columns) diff --git a/requirements.txt b/requirements.txt index 0d75f1a..8078fe1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -48,7 +48,7 @@ msgpack==1.0.0 # via cachecontrol natsort==7.0.1 # via scikit-bio -numpy==1.18.1 +numpy==1.24.3 # via # hdmedians # matplotlib @@ -59,7 +59,7 @@ numpy==1.18.1 # scikit-learn # scipy # statsmodels -pandas==1.0.1 +pandas==2.0.2 # via # moonstone (setup.py) # scikit-bio @@ -100,7 +100,7 @@ requests==2.24.0 # via cachecontrol retrying==1.3.3 # via plotly -scikit-bio==0.5.6 +scikit-bio==0.5.9 # via moonstone (setup.py) scikit-learn==0.21.3 # via diff --git a/setup.py b/setup.py index 2b5c8c2..3c69e1f 100644 --- a/setup.py +++ b/setup.py @@ -9,14 +9,14 @@ author='Kenzo-Hugo Hillion, Agnès Baud, Mariela Furstenheim, Sean Kennedy', author_email='kehillio@pasteur.fr', install_requires=[ - 'pandas==1.0.1', + 'pandas==2.0.2', 'matplotlib==3.3.0', 'plotly==5.6.0', 'statsmodels==0.11.1', 'python-slugify==4.0.1', 'pyaml==20.4.0', - 'numpy==1.18.1', - 'scikit-bio==0.5.6', + 'numpy==1.24.3', + 'scikit-bio==0.5.9', 'scikit-learn==0.21.3', 'hdmedians==0.13', 'cython==0.29.21', diff --git a/tests/analysis/diversity/test_beta.py b/tests/analysis/diversity/test_beta.py index 93eb349..9fa67b2 100644 --- a/tests/analysis/diversity/test_beta.py +++ b/tests/analysis/diversity/test_beta.py @@ -35,7 +35,8 @@ def test_compute_beta_diversity_df(self): ) pd.testing.assert_frame_equal( tested_object_instance.beta_diversity_df, expected_object, - check_less_precise=2, # Deprecated since version 1.1.0, to be changed when updating pandas + rtol=0.01 + # check_less_precise=2, # Deprecated since version 1.1.0, to be changed when updating pandas ) def test_compute_beta_diversity_series(self): @@ -54,11 +55,11 @@ def test_compute_beta_diversity_series(self): # Two ways of retrieving the series pd.testing.assert_series_equal( tested_object_instance.beta_diversity_series, expected_object, - check_less_precise=2, # Deprecated since version 1.1.0, to be changed when updating pandas + rtol=0.01 ) pd.testing.assert_series_equal( tested_object_instance.diversity_indexes, expected_object, - check_less_precise=2, # Deprecated since version 1.1.0, to be changed when updating pandas + rtol=0.01 ) def test_run_statistical_test_groups_with_NaN(self): @@ -109,7 +110,6 @@ def test_run_statistical_test_groups_with_NaN(self): pd.testing.assert_series_equal( pval, expected_object, - check_less_precise=2, # Deprecated since version 1.1.0, to be changed when updating pandas ) def test_get_grouped_df_series(self): @@ -131,7 +131,6 @@ def test_get_grouped_df_series(self): output = tested_object_instance._get_grouped_df_series(metadata_ser) pd.testing.assert_frame_equal( output, expected_object, - check_less_precise=2, # Deprecated since version 1.1.0, to be changed when updating pandas ) def test_get_grouped_df_dataframe(self): @@ -172,7 +171,6 @@ def test_get_grouped_df_dataframe(self): output = tested_object_instance._get_grouped_df_dataframe(metadata_df) pd.testing.assert_frame_equal( output, expected_object, - check_less_precise=2, # Deprecated since version 1.1.0, to be changed when updating pandas ) def test_analyse_grouped_df(self): @@ -195,7 +193,6 @@ def test_analyse_grouped_df(self): output = tested_object_instance.analyse_groups(metadata_df, 'sex', show=False, show_pval=False) pd.testing.assert_frame_equal( output['data'], expected_object, - check_less_precise=2, # Deprecated since version 1.1.0, to be changed when updating pandas ) def test_analyse_grouped_df_with_group_col2(self): @@ -238,7 +235,6 @@ def test_analyse_grouped_df_with_group_col2(self): ) pd.testing.assert_frame_equal( output["data"], expected_object, - check_less_precise=2, # Deprecated since version 1.1.0, to be changed when updating pandas ) @@ -269,7 +265,6 @@ def test_compute_beta_diversity(self): ) pd.testing.assert_frame_equal( tested_object_instance.beta_diversity_df, expected_object, - check_less_precise=2, # Deprecated since version 1.1.0, to be changed when updating pandas ) def test_compute_beta_diversity_force_computation(self): @@ -325,7 +320,6 @@ def test_compute_beta_diversity(self): ) pd.testing.assert_frame_equal( tested_object_instance.beta_diversity_df, expected_object, - check_less_precise=2, # Deprecated since version 1.1.0, to be changed when updating pandas ) def test_compute_beta_diversity_force_computation(self): diff --git a/tests/parsers/counts/taxonomy/test_base_metaphlan.py b/tests/parsers/counts/taxonomy/test_base_metaphlan.py index 40f58f8..0d485ca 100644 --- a/tests/parsers/counts/taxonomy/test_base_metaphlan.py +++ b/tests/parsers/counts/taxonomy/test_base_metaphlan.py @@ -271,6 +271,6 @@ def test_valid_analysis_type(self): with self.assertLogs('moonstone.parsers.counts.taxonomy.metaphlan', level='WARNING') as log: tested_object_instance = BaseMetaphlanParser("file", analysis_type="INVALID ANALYSIS TYPE") self.assertEqual(len(log.output), 1) - self.assertIn("WARNING:moonstone.parsers.counts.taxonomy.metaphlan:analysis_type='INVALID ANALYSIS TYPE' not valid, \ -set to default ('rel_ab').", log.output) + self.assertIn("WARNING:moonstone.parsers.counts.taxonomy.metaphlan:analysis_type='INVALID ANALYSIS TYPE' \ +not valid, set to default ('rel_ab').", log.output) self.assertEqual(tested_object_instance.analysis_type, 'rel_ab') diff --git a/tests/plot/test_counts.py b/tests/plot/test_counts.py index 0f51a9c..7d92984 100644 --- a/tests/plot/test_counts.py +++ b/tests/plot/test_counts.py @@ -741,7 +741,7 @@ def test_plot_most_prevalent_taxa_modebargraph_plotting_options(self): expected_x = [75.0, 100.0] expected_y = [ - "Streptococcus salivarius", + "Lactobacillus (genus)", # with Streptococcus_salivarius and Streptococcus (genus) all at 75% "Streptococcus thermophilus", ] diff --git a/tests/utils/pandas/test_series.py b/tests/utils/pandas/test_series.py index 924c8ba..1051a14 100644 --- a/tests/utils/pandas/test_series.py +++ b/tests/utils/pandas/test_series.py @@ -74,7 +74,7 @@ def test_build_stats_float(self): class TestSeriesBinning(TestCase): - def test_compute_homoogeneous_bins(self): + def test_compute_homogeneous_bins(self): tested_object = pd.Series( { 'gene_1': 10.5, @@ -108,6 +108,7 @@ def test_compute_binned_data(self): expected_object = pd.Series( [1, 2], index=[']0, 5]', ']5, 10]'] ) + expected_object.name = "count" tested_object_instance = SeriesBinning(series) tested_object_instance.bins_values = [0, 5, 10] tested_object = tested_object_instance.compute_binned_data() diff --git a/tests/utils/test_df_merge.py b/tests/utils/test_df_merge.py index 88d0213..4a741c4 100644 --- a/tests/utils/test_df_merge.py +++ b/tests/utils/test_df_merge.py @@ -6,8 +6,8 @@ class TestMergeDF(TestCase): - def test_merge(self): - d1 = pd.DataFrame( + def setUp(self): + self.d1 = pd.DataFrame( [ [23, 7, 44, 0, 101], [15, 4, 76, 3, 107], @@ -15,9 +15,10 @@ def test_merge(self): [31, 4, 50, 0, 99] ], columns=['item_1', 'item_2', 'item_3', 'item_4', 'item_5'], - index=['1', '2', '3', '4'] # index dtype='object' + index=[1, 2, 3, 4] # index dtype='object' ) + def test_merge(self): d2 = pd.DataFrame( [ ['M', 'Yes', 23, 'June', 170], @@ -40,5 +41,37 @@ def test_merge(self): index=[1, 2, 3, 4] # index dtype='int64' ) - merged_df = MergeDF(d1, d2, 'sex').merged_df + merged_df = MergeDF(self.d1, d2, 'sex').merged_df + pd.testing.assert_frame_equal(merged_df, df_expected) + + def test_merge_index_dont_match(self): + d2 = pd.DataFrame( + [ + ['M', 'Yes', 23, 'June', 170], + ['F', 'Yes', 33, 'Nov', 154], + ['F', 'Yes', 29, 'Jan', 161], + ['F', 'No', 27, 'Jan', 152] + ], + columns=['sex', 'pets', 'age', 'sample_month', 'height'], + index=['1', '2', '3', '4'] # index dtype='object' + ) + + df_expected = pd.DataFrame( + [ + ['M', 23, 7, 44, 0, 101], + ['F', 15, 4, 76, 3, 107], + ['F', 20, 0, 22, 0, 101], + ['F', 31, 4, 50, 0, 99] + ], + columns=['sex', 'item_1', 'item_2', 'item_3', 'item_4', 'item_5'], + index=['1', '2', '3', '4'] # index dtype='object' + ) + + with self.assertLogs('moonstone.utils.df_merge', level='WARNING') as log: + merged_df = MergeDF(self.d1, d2, 'sex').merged_df + self.assertEqual(len(log.output), 2) + self.assertIn( + "WARNING:moonstone.utils.df_merge:Index types do not match: int64 and object.", + log.output + ) pd.testing.assert_frame_equal(merged_df, df_expected) diff --git a/tests/utils/test_df_reindex.py b/tests/utils/test_df_reindex.py index cc92c6a..54cdab4 100644 --- a/tests/utils/test_df_reindex.py +++ b/tests/utils/test_df_reindex.py @@ -1,5 +1,6 @@ from unittest import TestCase +import numpy as np import pandas as pd from moonstone.utils.df_reindex import GenesToTaxonomy @@ -33,17 +34,18 @@ def test_reindex_with_taxonomy(self): 'sample_1': { ('Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', - 'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_iners'): 23, + 'Enterococcaceae', 'Enterococcus', 'Enterococcus_faecium'): 15, ('Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', - 'Enterococcaceae', 'Enterococcus', 'Enterococcus_faecium'): 15 + 'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_iners'): 23 }, 'sample_2': { ('Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', - 'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_iners'): 7, + 'Enterococcaceae', 'Enterococcus', 'Enterococcus_faecium'): 4, ('Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', - 'Enterococcaceae', 'Enterococcus', 'Enterococcus_faecium'): 4} + 'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_iners'): 7 } + } ) df_expected.index.set_names(["kingdom", "phylum", "class", "order", "family", "genus", "species"], inplace=True) @@ -51,8 +53,7 @@ def test_reindex_with_taxonomy(self): reindexed_df = reindexation_instance.reindexed_df pd.testing.assert_frame_equal(reindexed_df, df_expected) - def test_reindex_with_taxonomy_missing_infos(self): - # for now, if there aren't any taxonomic information, the gene is dropped + def test_reindex_with_taxonomy_missing_infos_dropped(self): df = pd.DataFrame( [ [23, 7], @@ -94,6 +95,100 @@ def test_reindex_with_taxonomy_missing_infos(self): pd.testing.assert_frame_equal(reindexed_df, df_expected) pd.testing.assert_index_equal(reindexation_instance.without_info_index, pd.Index(['gene_2'], dtype='object')) + def test_reindex_with_taxonomy_missing_infos_kept(self): + df = pd.DataFrame( + [ + [23, 7], + [15, 4], + [0, 36], + ], + columns=['sample_1', 'sample_2'], + index=['gene_1', 'gene_2', 'gene_4'] # index dtype='object' + ) + df_taxo = pd.DataFrame( + [ + [147802, + 'k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; \ +f__Lactobacillaceae; g__Lactobacillus; s__iners'], + [1352, + 'k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; \ +f__Enterococcaceae; g__Enterococcus; s__faecium'] + ], + columns=['tax_id', 'full_tax'], + index=['gene_1', 'gene_3'] # index dtype='object' + ) + df_expected = pd.DataFrame.from_dict( + { + 'sample_1': + { + ('Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', + 'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_iners'): 23, + (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 'gene_2_species'): 15, + (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 'gene_4_species'): 0, + }, + 'sample_2': + { + ('Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', + 'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_iners'): 7, + (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 'gene_2_species'): 4, + (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 'gene_4_species'): 36, + } + } + ) + df_expected.index.set_names(["kingdom", "phylum", "class", "order", "family", "genus", "species"], inplace=True) + + reindexation_instance = GenesToTaxonomy(df, df_taxo) + reindexed_df = reindexation_instance.reindex_with_taxonomy(na='keep') + pd.testing.assert_frame_equal(reindexed_df, df_expected) + pd.testing.assert_index_equal( + reindexation_instance.without_info_index, + pd.Index(['gene_2', 'gene_4'], dtype='object') + ) + + def test_reindex_with_taxonomy_missing_infos_summed(self): + df = pd.DataFrame( + [ + [23, 7], + [15, 4], + [0, 36], + ], + columns=['sample_1', 'sample_2'], + index=['gene_1', 'gene_2', 'gene_4'] # index dtype='object' + ) + df_taxo = pd.DataFrame( + [ + [147802, + 'k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; \ +f__Lactobacillaceae; g__Lactobacillus; s__iners'], + [1352, + 'k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; \ +f__Enterococcaceae; g__Enterococcus; s__faecium'] + ], + columns=['tax_id', 'full_tax'], + index=['gene_1', 'gene_3'] # index dtype='object' + ) + df_expected = pd.DataFrame.from_dict( + { + 'sample_1': + { + ('Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', + 'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_iners'): 23, + (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan): 15, + }, + 'sample_2': + { + ('Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', + 'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_iners'): 7, + (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan): 40, + } + } + ) + df_expected.index.set_names(["kingdom", "phylum", "class", "order", "family", "genus", "species"], inplace=True) + + reindexation_instance = GenesToTaxonomy(df, df_taxo) + reindexed_df = reindexation_instance.reindex_with_taxonomy(na='sum') + pd.testing.assert_frame_equal(reindexed_df, df_expected) + def test_reindex_with_taxonomy_summing(self): df = pd.DataFrame( [ From 7efac41ddc811498785dc2d1750489add68e7102 Mon Sep 17 00:00:00 2001 From: Agnes BAUD Date: Tue, 10 Oct 2023 17:21:50 +0200 Subject: [PATCH 10/17] statsmodels --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8078fe1..6b08cec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -118,7 +118,7 @@ six==1.15.0 # plotly # python-dateutil # retrying -statsmodels==0.11.1 +statsmodels==0.13.0 # via moonstone (setup.py) text-unidecode==1.3 # via python-slugify From aaaaaa92e767c199c2cd67e180decb34ab895174 Mon Sep 17 00:00:00 2001 From: Agnes BAUD Date: Tue, 10 Oct 2023 17:38:06 +0200 Subject: [PATCH 11/17] statsmodels==0.13.0 in setup --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3c69e1f..855e579 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ 'pandas==2.0.2', 'matplotlib==3.3.0', 'plotly==5.6.0', - 'statsmodels==0.11.1', + 'statsmodels==0.13.0', 'python-slugify==4.0.1', 'pyaml==20.4.0', 'numpy==1.24.3', From 1709821281f6b698dae8e134d228dca00fb1c160 Mon Sep 17 00:00:00 2001 From: Agnes BAUD Date: Tue, 10 Oct 2023 17:47:00 +0200 Subject: [PATCH 12/17] update scikit-learn --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6b08cec..1e0cb93 100644 --- a/requirements.txt +++ b/requirements.txt @@ -102,7 +102,7 @@ retrying==1.3.3 # via plotly scikit-bio==0.5.9 # via moonstone (setup.py) -scikit-learn==0.21.3 +scikit-learn==1.3.1 # via # moonstone (setup.py) # scikit-bio diff --git a/setup.py b/setup.py index 855e579..c886e2b 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ 'pyaml==20.4.0', 'numpy==1.24.3', 'scikit-bio==0.5.9', - 'scikit-learn==0.21.3', + 'scikit-learn==1.3.1', 'hdmedians==0.13', 'cython==0.29.21', 'scipy==1.5.2' From 95c27041f9ce5b29fc2f31e0da921d77bc1ef2d0 Mon Sep 17 00:00:00 2001 From: Agnes BAUD Date: Tue, 10 Oct 2023 17:52:10 +0200 Subject: [PATCH 13/17] hdmedians + pip3 into pip --- .github/workflows/python-package.yml | 14 +++++++------- requirements.txt | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 61b30ae..7e6ffe0 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -22,13 +22,13 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip3 install --upgrade pip3 - pip3 install flake8 pytest - pip3 install numpy==1.24.3 - pip3 install . - pip3 install odfpy # optional dependencies - pip3 install openpyxl # idem - pip3 install xlrd # idem + python -m pip install --upgrade pip + pip install flake8 pytest + pip install numpy==1.24.3 + pip install . + pip install odfpy # optional dependencies + pip install openpyxl # idem + pip install xlrd # idem - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names diff --git a/requirements.txt b/requirements.txt index 1e0cb93..aae76f2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,7 +22,7 @@ decorator==4.4.2 # via # ipython # scikit-bio -hdmedians==0.13 +hdmedians==0.14.2 # via # moonstone (setup.py) # scikit-bio From e69c4a3d0702581b953cf916ccda100149cd233f Mon Sep 17 00:00:00 2001 From: Agnes BAUD Date: Wed, 11 Oct 2023 10:56:09 +0200 Subject: [PATCH 14/17] upgrade setuptools and wheel --- .github/workflows/coverage.yml | 2 +- .github/workflows/python-package.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 2afda4d..1c3212b 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -19,7 +19,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip + python -m pip install --upgrade pip setuptools wheel pip install pytest pytest-cov pip install numpy==1.24.3 pip install . diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 7e6ffe0..3a5546a 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -22,7 +22,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip + python -m pip install --upgrade pip setuptools wheel pip install flake8 pytest pip install numpy==1.24.3 pip install . From 576c1c2f30cca3258f128871607a95f3f73a83ad Mon Sep 17 00:00:00 2001 From: Agnes BAUD Date: Wed, 11 Oct 2023 11:02:48 +0200 Subject: [PATCH 15/17] hdmedians in setup corrected --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c886e2b..39b2cb1 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ 'numpy==1.24.3', 'scikit-bio==0.5.9', 'scikit-learn==1.3.1', - 'hdmedians==0.13', + 'hdmedians==0.14.2', 'cython==0.29.21', 'scipy==1.5.2' ], From ef497055681e8c1484263402ff30cdc71585fe8a Mon Sep 17 00:00:00 2001 From: Agnes BAUD Date: Thu, 12 Oct 2023 13:44:05 +0200 Subject: [PATCH 16/17] update plotly + tests adapted to new version np --- moonstone/analysis/differential_analysis.py | 10 +++++----- requirements.txt | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/moonstone/analysis/differential_analysis.py b/moonstone/analysis/differential_analysis.py index c78eb35..9387200 100644 --- a/moonstone/analysis/differential_analysis.py +++ b/moonstone/analysis/differential_analysis.py @@ -1,7 +1,6 @@ import logging import pandas as pd -import numpy as np import scipy.stats as st from statsmodels.stats.multitest import multipletests @@ -53,8 +52,8 @@ def test_dichotomic_features(self, feature, test_to_use): cat1 = self.full_table[self.full_table[feature] == self.full_table[feature][0]] cat2 = self.full_table[self.full_table[feature] != self.full_table[feature][0]] for family in range(self.number_columns_to_skip, self.full_table.shape[1]): - test = self.tests_functions_used[test_to_use](cat1[self.full_table.columns[family]], - cat2[self.full_table.columns[family]]) + test = self.tests_functions_used[test_to_use](cat1[self.full_table.columns[family]].astype(float), + cat2[self.full_table.columns[family]].astype(float)) features.append(feature) taxons.append(self.full_table.columns[family]) static_value.append(round(test[0], 6)) @@ -79,7 +78,8 @@ def test_multiple_features(self, feature, test_to_use): list_ofgroups = [] for variable in variable_dic: list_ofgroups.append(variable_dic[variable][self.full_table.columns[family]]) - test = self.tests_functions_used[test_to_use](*np.asarray(list_ofgroups)) + #test = self.tests_functions_used[test_to_use](*np.asarray(list_ofgroups)) + test = self.tests_functions_used[test_to_use](*list_ofgroups) # works for kruskal and one way anova features.append(feature) taxons.append(self.full_table.columns[family]) static_values.append(round(test[0], 6)) @@ -114,5 +114,5 @@ def differential_analysis_by_feature(self, features, type_of_features, test_to_u for feature in features: test_result = getattr(self, f"test_{type_of_features}", self.test_default)(feature, test_to_use) test_result['corrected_p-value'] = self.corrected_p_values(test_result['p-value'], correction_method_used) - final_table = final_table.append(test_result) + final_table = pd.concat([final_table, test_result]) return final_table diff --git a/requirements.txt b/requirements.txt index aae76f2..5708b61 100644 --- a/requirements.txt +++ b/requirements.txt @@ -74,7 +74,7 @@ pickleshare==0.7.5 # via ipython pillow==7.2.0 # via matplotlib -plotly==5.6.0 +plotly==5.17.0 # via moonstone (setup.py) prompt-toolkit==3.0.7 # via ipython From c513aaa731a73f541bee423cd313649458acb927 Mon Sep 17 00:00:00 2001 From: Agnes BAUD Date: Thu, 12 Oct 2023 14:08:17 +0200 Subject: [PATCH 17/17] updating scipy (everywhere) and plotly (in setup) --- requirements.txt | 2 +- setup.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 5708b61..9071174 100644 --- a/requirements.txt +++ b/requirements.txt @@ -106,7 +106,7 @@ scikit-learn==1.3.1 # via # moonstone (setup.py) # scikit-bio -scipy==1.5.2 +scipy==1.9.0 # via # scikit-bio # scikit-learn diff --git a/setup.py b/setup.py index 39b2cb1..13ffa8b 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ install_requires=[ 'pandas==2.0.2', 'matplotlib==3.3.0', - 'plotly==5.6.0', + 'plotly==5.17.0', 'statsmodels==0.13.0', 'python-slugify==4.0.1', 'pyaml==20.4.0', @@ -20,7 +20,7 @@ 'scikit-learn==1.3.1', 'hdmedians==0.14.2', 'cython==0.29.21', - 'scipy==1.5.2' + 'scipy==1.9.0' ], packages=find_packages(), entry_points={'console_scripts': ['moonstone=moonstone.main:run']},