From 608af996a929dc5cdade5f249ce644eb9571e7f7 Mon Sep 17 00:00:00 2001
From: Agnes BAUD <agnes.baud@pasteur.fr>
Date: Mon, 20 Feb 2023 15:06:02 +0100
Subject: [PATCH 01/17] error name property

---
 moonstone/plot/graphs/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/moonstone/plot/graphs/base.py b/moonstone/plot/graphs/base.py
index 511a9f1..20cedd3 100644
--- a/moonstone/plot/graphs/base.py
+++ b/moonstone/plot/graphs/base.py
@@ -247,7 +247,7 @@ def plot_one_graph(
                     fig,
                     filtered_df2[group_col],
                     filtered_df2[data_col],
-                    names[group],
+                    str(names[group]),
                     filtered_df.index,
                     self._get_group_color(group, colors),
                     orientation,
@@ -265,7 +265,7 @@ def plot_one_graph(
                     fig,
                     filtered_df[group_col],
                     filtered_df[data_col],
-                    names[group],
+                    str(names[group]),
                     filtered_df.index,
                     self._get_group_color(group, colors),
                     orientation,

From 3c8b8dc87e2dfc77d665c7d878650dd68e369609 Mon Sep 17 00:00:00 2001
From: Agnes BAUD <agnes.baud@pasteur.fr>
Date: Tue, 21 Feb 2023 14:26:56 +0100
Subject: [PATCH 02/17] error unexpected arg read_excel

---
 moonstone/analysis/diversity/base.py | 2 +-
 moonstone/parsers/base.py            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/moonstone/analysis/diversity/base.py b/moonstone/analysis/diversity/base.py
index 62f191a..3243194 100644
--- a/moonstone/analysis/diversity/base.py
+++ b/moonstone/analysis/diversity/base.py
@@ -240,7 +240,7 @@ def _compute_pval_inside_subgroups(
         self, diversity_index_dataframe: pd.DataFrame, group_col: str, final_group_col: str,
         stats_test: str, correction_method: str, structure_pval: str, sym: bool
     ):
-        pval = pd.Series([])
+        pval = pd.Series([], dtype='float64')
         for g in diversity_index_dataframe[group_col].dropna().unique():
             df_gp = diversity_index_dataframe[diversity_index_dataframe[group_col] == g]
             if df_gp.shape[0] < 2:
diff --git a/moonstone/parsers/base.py b/moonstone/parsers/base.py
index d6487c8..c270bb3 100644
--- a/moonstone/parsers/base.py
+++ b/moonstone/parsers/base.py
@@ -53,7 +53,7 @@ def _load_data(self) -> pd.DataFrame:
             }
         if ext in ext_engine.keys():
             return pd.read_excel(
-                self.file_path, sep=self.sep, header=self.header, **self.parsing_options,
+                self.file_path, header=self.header, **self.parsing_options,
                 engine=ext_engine[ext]
             )
         return pd.read_csv(

From 6bf11d7b7fe6d340a45aae67d6dcaab0f0641aeb Mon Sep 17 00:00:00 2001
From: Agnes BAUD <agnes.baud@pasteur.fr>
Date: Mon, 3 Apr 2023 15:12:47 +0200
Subject: [PATCH 03/17] allow to keep NCBI_tax_id in metaphlan3 parser etc

---
 .../parsers/counts/taxonomy/metaphlan.py      | 37 +++++++++++++++++--
 moonstone/plot/counts.py                      |  2 +-
 tests/analysis/diversity/test_beta.py         |  2 +-
 .../taxonomy/metaphlan3/test_metaphlan3.py    | 28 +++++++++++++-
 .../counts/taxonomy/test_base_metaphlan.py    |  8 ++++
 5 files changed, 69 insertions(+), 8 deletions(-)

diff --git a/moonstone/parsers/counts/taxonomy/metaphlan.py b/moonstone/parsers/counts/taxonomy/metaphlan.py
index 60039da..6657c7e 100644
--- a/moonstone/parsers/counts/taxonomy/metaphlan.py
+++ b/moonstone/parsers/counts/taxonomy/metaphlan.py
@@ -1,18 +1,34 @@
+import logging
+
 from pandas import DataFrame
 
 from moonstone.parsers.counts.taxonomy.base import BaseTaxonomyCountsParser
 
+logger = logging.getLogger(__name__)
+
 
 class BaseMetaphlanParser(BaseTaxonomyCountsParser):
 
     def __init__(self, *args, analysis_type: str = 'rel_ab', **kwargs):
         """
         Args:
-            analysis_type: output type of Metaphlan3 (see ``-t`` option of metaphlan3)
+            analysis_type: output type of Metaphlan3 (see ``-t`` option of metaphlan3) 
+              { 'rel_ab', 'rel_ab_w_read_stats', 'reads_map', 'clade_profiles', 'marker_ab_table', 'marker_counts', 
+              'marker_pres_table', 'clade_specific_strain_tracker' }
         """
-        self.analysis_type = analysis_type
+        self.analysis_type = self._valid_analysis_type(analysis_type)
         super().__init__(*args, **kwargs)
 
+    def _valid_analysis_type(self, analysis_type):
+        choices = [
+            "rel_ab", "rel_ab_w_read_stats", "reads_map", "clade_profiles",
+            "marker_ab_table", "marker_counts", "marker_pres_table", "clade_specific_strain_tracker"
+        ]
+        if analysis_type not in choices:
+            logger.warning("analysis_type='%s' not valid, set to default ('rel_ab').", analysis_type)
+            analysis_type = "rel_ab"
+        return analysis_type
+
     def rows_differences(self, dataframe1, dataframe2) -> DataFrame:
         rows_diff = dataframe1 - dataframe2
         rows_diff[rows_diff.isnull()] = dataframe1
@@ -88,17 +104,30 @@ class Metaphlan3Parser(BaseMetaphlanParser):
     taxa_column = 'clade_name'
     NCBI_tax_column = 'NCBI_tax_id'
 
-    def __init__(self, *args, analysis_type: str = 'rel_ab', **kwargs):
+    def __init__(self, *args, analysis_type: str = 'rel_ab', keep_NCBI_tax_col: bool = False, **kwargs):
         """
         Args:
             analysis_type: output type of Metaphlan3 (see ``-t`` option of metaphlan3)
+              { 'rel_ab', 'rel_ab_w_read_stats', 'reads_map', 'clade_profiles', 'marker_ab_table', 'marker_counts', 
+              'marker_pres_table', 'clade_specific_strain_tracker' }
+            keep_NCBI_tax_col: set to True if you want the NCBI tax column in the returned dataframe.
         """
+        self.keep_NCBI_tax_col = keep_NCBI_tax_col
         super().__init__(*args, analysis_type=analysis_type, parsing_options={'skiprows': 1}, **kwargs)
 
     def _load_data(self) -> DataFrame:
         df = super()._load_data()
-        df = df.drop(self.NCBI_tax_column, axis=1)
+
+        if self.keep_NCBI_tax_col:
+            tmp = df[[self.NCBI_tax_column, self.taxa_column]]
+
+        df = df.drop(self.NCBI_tax_column, axis=1)  # NCBI_tax_column needs to be dropped because sum
         df = self.remove_duplicates(df)
+
+        if self.keep_NCBI_tax_col:
+            tmp[self.NCBI_tax_column] = tmp[self.NCBI_tax_column].map(lambda x: x.split("|")[-1])
+            df = df.merge(tmp)
+
         df = self.split_taxa_fill_none(df, sep="|")
         df = df.set_index(self.taxonomical_names[:self.rank_level])
         return df
diff --git a/moonstone/plot/counts.py b/moonstone/plot/counts.py
index ac0d67d..cd0f4ac 100644
--- a/moonstone/plot/counts.py
+++ b/moonstone/plot/counts.py
@@ -747,7 +747,7 @@ def plot_sample_composition_most_abundant_taxa(
         # Make graph
         graph = MatrixBarGraph(data_df)
         # Plotting options
-        title = f"{taxa_level.capitalize()} composition for the top {taxa_number} most abundant species across samples"
+        title = f"{taxa_level.capitalize()} composition for the top {taxa_number} most abundant {taxa_level} across samples"
         if prevalence_threshold is not None:
             title += f" (present in at least {prevalence_threshold}% of samples)"
 
diff --git a/tests/analysis/diversity/test_beta.py b/tests/analysis/diversity/test_beta.py
index 89c25a0..93eb349 100644
--- a/tests/analysis/diversity/test_beta.py
+++ b/tests/analysis/diversity/test_beta.py
@@ -82,7 +82,7 @@ def test_run_statistical_test_groups_with_NaN(self):
                 'samples14': [9.35, 'A'],
                 'samples15': [7.89, 'A'],
                 'samples16': [4.65, 'C'],
-                'samples17': [8.90, 'D'],
+                'samples17': [8.90, 'D'],  # only 1 sample from group D < 5 required to do ttest-independence
                 'samples18': [2.33, 'C'],
                 'samples19': [1.34, 'B'],
                 'samples20': [6.87, 'C']
diff --git a/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py b/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py
index 6dd53dc..df51815 100644
--- a/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py
+++ b/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py
@@ -9,8 +9,8 @@
 class TestMetaphlan2Parser(TestCase):
 
     def setUp(self):
-        input_path = os.path.join(os.path.dirname(__file__), 'input.tsv')
-        self.meta2parser = Metaphlan3Parser(input_path, analysis_type='marker_counts')
+        self.input_path = os.path.join(os.path.dirname(__file__), 'input.tsv')
+        self.meta2parser = Metaphlan3Parser(self.input_path, analysis_type='marker_counts')
 
     def test_to_dataframe(self):
         """
@@ -33,3 +33,27 @@ def test_to_dataframe(self):
         )
         expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'])
         pd.testing.assert_frame_equal(self.meta2parser.dataframe, expected_df, check_like=True)
+
+    def test_to_dataframe_keep_NCBI_tax_col(self):
+        """
+        Test based on input.tsv file
+        """
+        meta2parser = Metaphlan3Parser(self.input_path, analysis_type='rel_ab', keep_NCBI_tax_col=True)
+        expected_df = pd.DataFrame(
+            [
+                ['Bacteria', 'Actinobacteria', 'Actinobacteria', 'Actinomycetales', 'Actinomycetaceae', 'Actinobaculum',
+                 'Actinobaculum_massiliense', 1.0, 2.0, '461393'],
+                ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillaceae', 'Lactobacillus',
+                 'Lactobacillus (genus)', 3.2, 8.0, '1632'],
+                ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Streptococcaceae', 'Streptococcus',
+                 'Streptococcus (genus)', 1.3, 0.4, '1301'],
+                ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Streptococcaceae', 'Streptococcus',
+                 'Streptococcus_thermophilus', 1.7, 0.7, '1308'],
+                ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Streptococcaceae', 'Streptococcus',
+                 'Streptococcus_salivarius', 3.3, 1.2, '1304']
+            ],
+            columns=['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'SAMPLE_1', 'SAMPLE_2', 'NCBI_tax_id']
+        )
+        expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'])
+        observed_df = meta2parser.dataframe
+        pd.testing.assert_frame_equal(observed_df, expected_df, check_like=True)
\ No newline at end of file
diff --git a/tests/parsers/counts/taxonomy/test_base_metaphlan.py b/tests/parsers/counts/taxonomy/test_base_metaphlan.py
index 3062afd..9ffe312 100644
--- a/tests/parsers/counts/taxonomy/test_base_metaphlan.py
+++ b/tests/parsers/counts/taxonomy/test_base_metaphlan.py
@@ -266,3 +266,11 @@ def test_remove_duplicates_rel_ab_addition_error_margin(self):
 
         observed_df = self.base_metaphlan_parser.remove_duplicates(tested_df)
         pd.testing.assert_frame_equal(observed_df, expected_df, check_like=True)
+
+    def test_valid_analysis_type(self):
+        with self.assertLogs('moonstone.parsers.counts.taxonomy.metaphlan', level='WARNING') as log:
+            tested_object_instance = BaseMetaphlanParser("file", analysis_type="INVALID ANALYSIS TYPE")
+            self.assertEqual(len(log.output), 1)
+            self.assertIn("WARNING:moonstone.parsers.counts.taxonomy.metaphlan:analysis_type='INVALID ANALYSIS TYPE' not valid, \
+set to default ('rel_ab').", log.output)
+            self.assertEqual(tested_object_instance.analysis_type, 'rel_ab')
\ No newline at end of file

From 7c69c352d133c7a60ba88c2d2b026ea96f15ace5 Mon Sep 17 00:00:00 2001
From: Agnes BAUD <agnes.baud@pasteur.fr>
Date: Mon, 3 Apr 2023 15:19:37 +0200
Subject: [PATCH 04/17] flake8

---
 moonstone/parsers/counts/taxonomy/metaphlan.py              | 6 +++---
 moonstone/plot/counts.py                                    | 3 ++-
 tests/parsers/counts/taxonomy/kraken2/test_kraken2.py       | 2 +-
 tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py | 5 +++--
 tests/parsers/counts/taxonomy/test_base_metaphlan.py        | 2 +-
 5 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/moonstone/parsers/counts/taxonomy/metaphlan.py b/moonstone/parsers/counts/taxonomy/metaphlan.py
index 6657c7e..1b63fda 100644
--- a/moonstone/parsers/counts/taxonomy/metaphlan.py
+++ b/moonstone/parsers/counts/taxonomy/metaphlan.py
@@ -12,8 +12,8 @@ class BaseMetaphlanParser(BaseTaxonomyCountsParser):
     def __init__(self, *args, analysis_type: str = 'rel_ab', **kwargs):
         """
         Args:
-            analysis_type: output type of Metaphlan3 (see ``-t`` option of metaphlan3) 
-              { 'rel_ab', 'rel_ab_w_read_stats', 'reads_map', 'clade_profiles', 'marker_ab_table', 'marker_counts', 
+            analysis_type: output type of Metaphlan3 (see ``-t`` option of metaphlan3)
+              { 'rel_ab', 'rel_ab_w_read_stats', 'reads_map', 'clade_profiles', 'marker_ab_table', 'marker_counts',
               'marker_pres_table', 'clade_specific_strain_tracker' }
         """
         self.analysis_type = self._valid_analysis_type(analysis_type)
@@ -108,7 +108,7 @@ def __init__(self, *args, analysis_type: str = 'rel_ab', keep_NCBI_tax_col: bool
         """
         Args:
             analysis_type: output type of Metaphlan3 (see ``-t`` option of metaphlan3)
-              { 'rel_ab', 'rel_ab_w_read_stats', 'reads_map', 'clade_profiles', 'marker_ab_table', 'marker_counts', 
+              { 'rel_ab', 'rel_ab_w_read_stats', 'reads_map', 'clade_profiles', 'marker_ab_table', 'marker_counts',
               'marker_pres_table', 'clade_specific_strain_tracker' }
             keep_NCBI_tax_col: set to True if you want the NCBI tax column in the returned dataframe.
         """
diff --git a/moonstone/plot/counts.py b/moonstone/plot/counts.py
index cd0f4ac..d028dd9 100644
--- a/moonstone/plot/counts.py
+++ b/moonstone/plot/counts.py
@@ -747,7 +747,8 @@ def plot_sample_composition_most_abundant_taxa(
         # Make graph
         graph = MatrixBarGraph(data_df)
         # Plotting options
-        title = f"{taxa_level.capitalize()} composition for the top {taxa_number} most abundant {taxa_level} across samples"
+        title = f"{taxa_level.capitalize()} composition for the top {taxa_number} most abundant {taxa_level} across \
+samples"
         if prevalence_threshold is not None:
             title += f" (present in at least {prevalence_threshold}% of samples)"
 
diff --git a/tests/parsers/counts/taxonomy/kraken2/test_kraken2.py b/tests/parsers/counts/taxonomy/kraken2/test_kraken2.py
index acf1acb..52c8aeb 100644
--- a/tests/parsers/counts/taxonomy/kraken2/test_kraken2.py
+++ b/tests/parsers/counts/taxonomy/kraken2/test_kraken2.py
@@ -88,4 +88,4 @@ def test_to_dataframe_ods(self):
             ]
         )
         expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'])
-        pd.testing.assert_frame_equal(sunbeamkraken2parser.dataframe, expected_df)
\ No newline at end of file
+        pd.testing.assert_frame_equal(sunbeamkraken2parser.dataframe, expected_df)
diff --git a/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py b/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py
index df51815..000160a 100644
--- a/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py
+++ b/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py
@@ -52,8 +52,9 @@ def test_to_dataframe_keep_NCBI_tax_col(self):
                 ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Streptococcaceae', 'Streptococcus',
                  'Streptococcus_salivarius', 3.3, 1.2, '1304']
             ],
-            columns=['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'SAMPLE_1', 'SAMPLE_2', 'NCBI_tax_id']
+            columns=['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species', 'SAMPLE_1', 'SAMPLE_2',
+                     'NCBI_tax_id']
         )
         expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'])
         observed_df = meta2parser.dataframe
-        pd.testing.assert_frame_equal(observed_df, expected_df, check_like=True)
\ No newline at end of file
+        pd.testing.assert_frame_equal(observed_df, expected_df, check_like=True)
diff --git a/tests/parsers/counts/taxonomy/test_base_metaphlan.py b/tests/parsers/counts/taxonomy/test_base_metaphlan.py
index 9ffe312..40f58f8 100644
--- a/tests/parsers/counts/taxonomy/test_base_metaphlan.py
+++ b/tests/parsers/counts/taxonomy/test_base_metaphlan.py
@@ -273,4 +273,4 @@ def test_valid_analysis_type(self):
             self.assertEqual(len(log.output), 1)
             self.assertIn("WARNING:moonstone.parsers.counts.taxonomy.metaphlan:analysis_type='INVALID ANALYSIS TYPE' not valid, \
 set to default ('rel_ab').", log.output)
-            self.assertEqual(tested_object_instance.analysis_type, 'rel_ab')
\ No newline at end of file
+            self.assertEqual(tested_object_instance.analysis_type, 'rel_ab')

From 2d2d29c8279f0e9ded663b0efe57189a42e189d7 Mon Sep 17 00:00:00 2001
From: Agnes BAUD <agnes.baud@pasteur.fr>
Date: Mon, 3 Apr 2023 18:42:23 +0200
Subject: [PATCH 05/17] commenting code in metaphlan parser

---
 .../parsers/counts/taxonomy/metaphlan.py      | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/moonstone/parsers/counts/taxonomy/metaphlan.py b/moonstone/parsers/counts/taxonomy/metaphlan.py
index 1b63fda..2e6b21e 100644
--- a/moonstone/parsers/counts/taxonomy/metaphlan.py
+++ b/moonstone/parsers/counts/taxonomy/metaphlan.py
@@ -33,7 +33,9 @@ def rows_differences(self, dataframe1, dataframe2) -> DataFrame:
         rows_diff = dataframe1 - dataframe2
         rows_diff[rows_diff.isnull()] = dataframe1
         if self.analysis_type == 'rel_ab':
-            rows_diff[rows_diff < 0.0001] = 0
+            rows_diff[rows_diff < 0.0001] = 0   # if difference between sum of organism of rank r (ex: sum of species of genus X)
+            # and value of rank r+1 (ex:genus X) is so small,
+            # we assume that it's due to python addition approximation with decimal
         else:
             rows_diff[rows_diff < 0] = 0
         rows_diff = rows_diff.loc[rows_diff.sum(axis=1)[rows_diff.sum(axis=1) != 0].index]
@@ -49,6 +51,21 @@ def compare_difference_between_two_levels(self, whole_df, df_at_lower_level, ran
         return self.rows_differences(df_rank, df_rank_computed)
 
     def remove_duplicates(self, df) -> DataFrame:
+        """
+        Metaphlan3 results are by level therefore we need to remove the duplicated informations
+        Example:
+        We have:
+            ...|g_GenusA    50.0
+            ...|g_GenusA|s_Species1 30.0
+            ...|g_GenusB    50.0
+            ...|g_GenusB|s_Species2 50.0
+            Sum = 180.0 =/= 100.0 (while it's relative abundance -> but same problem with other analysis type)
+        We want:
+            ...|g_GenusA|s_GenusA (genus)   20.0    # unspecified species
+            ...|g_GenusA|s_Species1 30.0
+            ...|g_GenusB|s_Species2 50.0
+            Sum = 100.0
+        """
         df = df.set_index(self.taxa_column)
 
         # dataframe at rank level

From f6c17a55111fed294d1111e39e723fadf80d1f49 Mon Sep 17 00:00:00 2001
From: Agnes BAUD <agnes.baud@pasteur.fr>
Date: Fri, 12 May 2023 18:28:50 +0200
Subject: [PATCH 06/17] allow to ask for less taxonomical names

---
 .../parsers/counts/taxonomy/metaphlan.py      | 10 +++++++-
 .../counts/taxonomy/metaphlan3/input.tsv      |  4 ++--
 .../taxonomy/metaphlan3/test_metaphlan3.py    | 24 ++++++++++++++++++-
 3 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/moonstone/parsers/counts/taxonomy/metaphlan.py b/moonstone/parsers/counts/taxonomy/metaphlan.py
index 2e6b21e..d2ec7ec 100644
--- a/moonstone/parsers/counts/taxonomy/metaphlan.py
+++ b/moonstone/parsers/counts/taxonomy/metaphlan.py
@@ -33,7 +33,8 @@ def rows_differences(self, dataframe1, dataframe2) -> DataFrame:
         rows_diff = dataframe1 - dataframe2
         rows_diff[rows_diff.isnull()] = dataframe1
         if self.analysis_type == 'rel_ab':
-            rows_diff[rows_diff < 0.0001] = 0   # if difference between sum of organism of rank r (ex: sum of species of genus X)
+            rows_diff[rows_diff < 0.0001] = 0
+            # if difference between sum of organism of rank r (ex: sum of species of genus X)
             # and value of rank r+1 (ex:genus X) is so small,
             # we assume that it's due to python addition approximation with decimal
         else:
@@ -135,6 +136,13 @@ def __init__(self, *args, analysis_type: str = 'rel_ab', keep_NCBI_tax_col: bool
     def _load_data(self) -> DataFrame:
         df = super()._load_data()
 
+        # if number of taxonomical_names is inferior to the default,
+        if len(self.taxonomical_names) < len(BaseTaxonomyCountsParser.taxonomical_names):
+            # we need to restrict the rows considered to only the rows that recount taxonomical level inside the range
+            # wanted.
+            # Or error "ValueError: Error : expecting a integer inferior or equal to the number of taxonomical_names."
+            # will be raised
+            df = df[df["NCBI_tax_id"].map(lambda x: len(x.split("|"))) <= len(self.taxonomical_names)]
         if self.keep_NCBI_tax_col:
             tmp = df[[self.NCBI_tax_column, self.taxa_column]]
 
diff --git a/tests/parsers/counts/taxonomy/metaphlan3/input.tsv b/tests/parsers/counts/taxonomy/metaphlan3/input.tsv
index d1ffdab..e7995ad 100644
--- a/tests/parsers/counts/taxonomy/metaphlan3/input.tsv
+++ b/tests/parsers/counts/taxonomy/metaphlan3/input.tsv
@@ -8,8 +8,8 @@ k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomyce
 k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinobaculum	2|201174|1760|2037|2049|76833	1.0	2.0
 k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Actinomycetaceae|g__Actinobaculum|s__Actinobaculum_massiliense	2|201174|1760|2037|2049|1654|461393	1.0	2.0
 k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales	2|1239|91061|186826	9.5	10.3
-k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Lactobacillaceae	2|1239|91061|186826|33958|1578	3.2	8.0
-k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Lactobacillaceae|g__Lactobacillus	2|1239|91061|186826|33958|1578|1632	3.2	8.0
+k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Lactobacillaceae	2|1239|91061|186826|33958	3.2	8.0
+k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Lactobacillaceae|g__Lactobacillus	2|1239|91061|186826|33958|1578	3.2	8.0
 k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Streptococcaceae	2|1239|91061|186826|1300	6.3	2.3
 k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus	2|1239|91061|186826|1300|1301	6.3	2.3
 k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus|s__Streptococcus_thermophilus	2|1239|91061|186826|1300|1301|1308	1.7	0.7
diff --git a/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py b/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py
index 000160a..029e320 100644
--- a/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py
+++ b/tests/parsers/counts/taxonomy/metaphlan3/test_metaphlan3.py
@@ -44,7 +44,7 @@ def test_to_dataframe_keep_NCBI_tax_col(self):
                 ['Bacteria', 'Actinobacteria', 'Actinobacteria', 'Actinomycetales', 'Actinomycetaceae', 'Actinobaculum',
                  'Actinobaculum_massiliense', 1.0, 2.0, '461393'],
                 ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillaceae', 'Lactobacillus',
-                 'Lactobacillus (genus)', 3.2, 8.0, '1632'],
+                 'Lactobacillus (genus)', 3.2, 8.0, '1578'],
                 ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Streptococcaceae', 'Streptococcus',
                  'Streptococcus (genus)', 1.3, 0.4, '1301'],
                 ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Streptococcaceae', 'Streptococcus',
@@ -58,3 +58,25 @@ def test_to_dataframe_keep_NCBI_tax_col(self):
         expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'])
         observed_df = meta2parser.dataframe
         pd.testing.assert_frame_equal(observed_df, expected_df, check_like=True)
+
+    def test_to_dataframe_less_taxonomical_names(self):
+        """
+        Test based on input.tsv file
+        """
+        meta2parser = Metaphlan3Parser(self.input_path, analysis_type='rel_ab', keep_NCBI_tax_col=True)
+        meta2parser.taxonomical_names = ['kingdom', 'phylum', 'class', 'order', 'family', 'genus']
+        expected_df = pd.DataFrame(
+            [
+                ['Bacteria', 'Actinobacteria', 'Actinobacteria', 'Actinomycetales', 'Actinomycetaceae', 'Actinobaculum',
+                 1.0, 2.0, '76833'],
+                ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillaceae', 'Lactobacillus',
+                 3.2, 8.0, '1578'],
+                ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Streptococcaceae', 'Streptococcus',
+                 6.3, 2.3, '1301'],
+            ],
+            columns=['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'SAMPLE_1', 'SAMPLE_2',
+                     'NCBI_tax_id']
+        )
+        expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus'])
+        observed_df = meta2parser.dataframe
+        pd.testing.assert_frame_equal(observed_df, expected_df, check_like=True)

From 95b2acfd42226aab9c370db6a5ff825be65aaef0 Mon Sep 17 00:00:00 2001
From: Agnes BAUD <agnes.baud@pasteur.fr>
Date: Wed, 7 Jun 2023 15:01:27 +0200
Subject: [PATCH 07/17] 1st try debugging numpy/scikit-bio error

---
 .github/workflows/python-package.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 4e52bcb..7d24e68 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -25,6 +25,7 @@ jobs:
         python -m pip install --upgrade pip
         pip install flake8 pytest
         pip install numpy==1.18.1
+        pip install scikit-bio==0.5.6
         pip install .
         pip install odfpy     # optional dependencies
         pip install openpyxl  # idem

From 058b8e42327bf6b56a16b4b242c9d11cc2cb7b97 Mon Sep 17 00:00:00 2001
From: Agnes BAUD <agnes.baud@pasteur.fr>
Date: Wed, 7 Jun 2023 15:32:02 +0200
Subject: [PATCH 08/17] 2nd try debugging numpy/scikit-bio error

---
 .github/workflows/python-package.yml | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 7d24e68..9966157 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -22,14 +22,13 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip
-        pip install flake8 pytest
-        pip install numpy==1.18.1
-        pip install scikit-bio==0.5.6
-        pip install .
-        pip install odfpy     # optional dependencies
-        pip install openpyxl  # idem
-        pip install xlrd      # idem
+        python -m pip3 install --upgrade pip3
+        pip3 install flake8 pytest
+        pip3 install numpy==1.18.1
+        pip3 install .
+        pip3 install odfpy     # optional dependencies
+        pip3 install openpyxl  # idem
+        pip3 install xlrd      # idem
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names

From 3c3193bb426673c985346a44aa6c1a7e50a63ca8 Mon Sep 17 00:00:00 2001
From: Agnes BAUD <agnes.baud@pasteur.fr>
Date: Tue, 10 Oct 2023 17:11:09 +0200
Subject: [PATCH 09/17] updating pd np scikit-bio

---
 .github/workflows/coverage.yml                |   4 +-
 .github/workflows/python-package.yml          |   4 +-
 .github/workflows/python-publish.yml          |   2 +-
 moonstone/analysis/diversity/base.py          |  22 ++--
 moonstone/analysis/statistical_test.py        |   9 ++
 moonstone/parsers/base.py                     |   2 +
 .../parsers/counts/taxonomy/metaphlan.py      |  16 +--
 moonstone/plot/counts.py                      |   3 +-
 moonstone/plot/graphs/base.py                 |   2 +-
 moonstone/utils/df_merge.py                   |  14 ++-
 moonstone/utils/df_reindex.py                 |  24 +++-
 moonstone/utils/taxonomy.py                   |   2 +-
 requirements.txt                              |   6 +-
 setup.py                                      |   6 +-
 tests/analysis/diversity/test_beta.py         |  14 +--
 .../counts/taxonomy/test_base_metaphlan.py    |   4 +-
 tests/plot/test_counts.py                     |   2 +-
 tests/utils/pandas/test_series.py             |   3 +-
 tests/utils/test_df_merge.py                  |  41 ++++++-
 tests/utils/test_df_reindex.py                | 107 +++++++++++++++++-
 20 files changed, 221 insertions(+), 66 deletions(-)

diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
index 68b83ed..2afda4d 100644
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -9,7 +9,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.7]
+        python-version: [3.9]
 
     steps:
     - uses: actions/checkout@v2
@@ -21,7 +21,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install pytest pytest-cov
-        pip install numpy==1.18.1
+        pip install numpy==1.24.3
         pip install .
         pip install odfpy     # optional dependencies
         pip install openpyxl  # idem
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 9966157..61b30ae 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -12,7 +12,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.7", "3.8"]
+        python-version: ["3.8", "3.9"]
 
     steps:
     - uses: actions/checkout@v2
@@ -24,7 +24,7 @@ jobs:
       run: |
         python -m pip3 install --upgrade pip3
         pip3 install flake8 pytest
-        pip3 install numpy==1.18.1
+        pip3 install numpy==1.24.3
         pip3 install .
         pip3 install odfpy     # optional dependencies
         pip3 install openpyxl  # idem
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index d4ba985..92ea8e9 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -17,7 +17,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v2
       with:
-        python-version: '3.7'
+        python-version: '3.9'
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
diff --git a/moonstone/analysis/diversity/base.py b/moonstone/analysis/diversity/base.py
index 3243194..97bfa1c 100644
--- a/moonstone/analysis/diversity/base.py
+++ b/moonstone/analysis/diversity/base.py
@@ -194,8 +194,8 @@ def _run_statistical_test_groups(
 
             corrected_pval.index = pval.dropna().index   # postulate that the order hasn't changed
             if pval[pval.isnull()].size > 0:
-                corrected_pval = corrected_pval.append(pval[pval.isnull()])
-
+                # corrected_pval = corrected_pval.append(pval[pval.isnull()])
+                corrected_pval = pd.concat([corrected_pval, pval[pval.isnull()]])
             # remodelling of p-values output
             corrected_pval = self._structure_remodelling(corrected_pval, structure=structure_pval, sym=sym)
             return corrected_pval
@@ -248,10 +248,13 @@ def _compute_pval_inside_subgroups(
                     f"Less than 2 samples in dataframe group {g} in data. P-val can't be computed."
                 )
             else:
-                pval = pval.append(self._run_statistical_test_groups(
-                    df_gp, final_group_col, stats_test,
-                    correction_method, structure_pval, sym
-                ))
+                pval = pd.concat([
+                    pval,
+                    self._run_statistical_test_groups(
+                        df_gp, final_group_col, stats_test,
+                        correction_method, structure_pval, sym
+                    )
+                ])
         pval.index = pd.MultiIndex.from_tuples(pval.index, names=('Group1', 'Group2'))
         return pval
 
@@ -317,12 +320,13 @@ def analyse_groups(
                     df, group_col, final_group_col, stats_test, correction_method, structure_pval, sym
                 )
                 if pval_to_compute == "same group_col or group_col2 values":
-                    pval = pval.append(
+                    pval = pd.concat([
+                        pval,
                         self._compute_pval_inside_subgroups(
                             df, group_col2, final_group_col,
                             stats_test, correction_method, structure_pval, sym
                         )
-                    )
+                    ])
 
         else:
             df = self._get_grouped_df(filtered_metadata_df[group_col])
@@ -359,7 +363,7 @@ def analyse_groups(
 
         # 'data' different from 'diversity indexes' in the fact that it has been filtered on metadata, meaning that
         # samples without metadata for group_col (or group_col2) have been dropped
-        return{**{'data': df}, **self.report_data['analyse_groups']}
+        return {**{'data': df}, **self.report_data['analyse_groups']}
 
     def generate_report_data(self) -> dict:
         """
diff --git a/moonstone/analysis/statistical_test.py b/moonstone/analysis/statistical_test.py
index 994b436..c1b2ffb 100644
--- a/moonstone/analysis/statistical_test.py
+++ b/moonstone/analysis/statistical_test.py
@@ -14,6 +14,15 @@
 def _preprocess_groups_comparison(
     series: pd.Series, group_series: pd.Series, stat_test: str
 ):
+    # If samples in group_series/metadata but not in series/count_dataframe
+    # then we need to remove them from the group_series/metadata
+    # to not get an error like "None of [Index(['sample7'], dtype='object')] are in the [index]"
+    group_series_index_to_keep = group_series.index.intersection(series.index)
+    if len(group_series_index_to_keep) != len(group_series.index):
+        logger.info(
+            "Some index values in group_series aren't found in the series. Dropping those rows."
+        )
+        group_series = group_series.loc[group_series_index_to_keep]
     groups = list(group_series.unique())
     groups.sort()
 
diff --git a/moonstone/parsers/base.py b/moonstone/parsers/base.py
index c270bb3..323d111 100644
--- a/moonstone/parsers/base.py
+++ b/moonstone/parsers/base.py
@@ -52,6 +52,8 @@ def _load_data(self) -> pd.DataFrame:
             "xlsb": "pyxlsb"                            # Binary Excel files
             }
         if ext in ext_engine.keys():
+            if self.header == "infer":
+                self.header = 0  # "infer" not accepted with read_excel anymore
             return pd.read_excel(
                 self.file_path, header=self.header, **self.parsing_options,
                 engine=ext_engine[ext]
diff --git a/moonstone/parsers/counts/taxonomy/metaphlan.py b/moonstone/parsers/counts/taxonomy/metaphlan.py
index d2ec7ec..da13136 100644
--- a/moonstone/parsers/counts/taxonomy/metaphlan.py
+++ b/moonstone/parsers/counts/taxonomy/metaphlan.py
@@ -1,6 +1,6 @@
 import logging
 
-from pandas import DataFrame
+import pandas as pd
 
 from moonstone.parsers.counts.taxonomy.base import BaseTaxonomyCountsParser
 
@@ -29,7 +29,7 @@ def _valid_analysis_type(self, analysis_type):
             analysis_type = "rel_ab"
         return analysis_type
 
-    def rows_differences(self, dataframe1, dataframe2) -> DataFrame:
+    def rows_differences(self, dataframe1, dataframe2) -> pd.DataFrame:
         rows_diff = dataframe1 - dataframe2
         rows_diff[rows_diff.isnull()] = dataframe1
         if self.analysis_type == 'rel_ab':
@@ -42,7 +42,7 @@ def rows_differences(self, dataframe1, dataframe2) -> DataFrame:
         rows_diff = rows_diff.loc[rows_diff.sum(axis=1)[rows_diff.sum(axis=1) != 0].index]
         return rows_diff
 
-    def compare_difference_between_two_levels(self, whole_df, df_at_lower_level, rank) -> DataFrame:
+    def compare_difference_between_two_levels(self, whole_df, df_at_lower_level, rank) -> pd.DataFrame:
         df_rank = whole_df[whole_df.index.map(lambda x: len(x.split('|'))) == rank]
 
         # transformation lower_level to rank (level)
@@ -51,7 +51,7 @@ def compare_difference_between_two_levels(self, whole_df, df_at_lower_level, ran
         df_rank_computed = df_rank_computed.groupby(df_rank_computed.index).sum()             # grouping by rank (level)
         return self.rows_differences(df_rank, df_rank_computed)
 
-    def remove_duplicates(self, df) -> DataFrame:
+    def remove_duplicates(self, df) -> pd.DataFrame:
         """
         Metaphlan3 results are by level therefore we need to remove the duplicated informations
         Example:
@@ -90,8 +90,8 @@ def remove_duplicates(self, df) -> DataFrame:
             rank -= 1
             rows_diff = self.compare_difference_between_two_levels(df, new_df, rank)
             if rows_diff.size != 0:
-                new_df = new_df.append(rows_diff)              # add missing rows to the dataframe of the lower level
-
+                # new_df = new_df.append(rows_diff)              # add missing rows to the dataframe of the lower level
+                new_df = pd.concat([new_df, rows_diff])        # add missing rows to the dataframe of the lower level
             # verification that everything is defined up to the lower_level
             samples_with_incomp_lowerlevel = new_df.sum()[new_df.sum() < total]
 
@@ -106,7 +106,7 @@ class Metaphlan2Parser(BaseMetaphlanParser):
 
     taxa_column = 'ID'
 
-    def _load_data(self) -> DataFrame:
+    def _load_data(self) -> pd.DataFrame:
         df = super()._load_data()
         df = self.remove_duplicates(df)
         df = self.split_taxa_fill_none(df, sep="|")
@@ -133,7 +133,7 @@ def __init__(self, *args, analysis_type: str = 'rel_ab', keep_NCBI_tax_col: bool
         self.keep_NCBI_tax_col = keep_NCBI_tax_col
         super().__init__(*args, analysis_type=analysis_type, parsing_options={'skiprows': 1}, **kwargs)
 
-    def _load_data(self) -> DataFrame:
+    def _load_data(self) -> pd.DataFrame:
         df = super()._load_data()
 
         # if number of taxonomical_names is inferior to the default,
diff --git a/moonstone/plot/counts.py b/moonstone/plot/counts.py
index d028dd9..c31b364 100644
--- a/moonstone/plot/counts.py
+++ b/moonstone/plot/counts.py
@@ -456,7 +456,8 @@ def _plot_most_what_taxa_boxplot_or_violin(
             tmp = relab_df_taxa[i].reset_index()
             tmp.index = nb * [i]
             tmp.columns = ["species", "relative abundance"]
-            relab_df_taxa2 = relab_df_taxa2.append(tmp)
+            # relab_df_taxa2 = relab_df_taxa2.append(tmp)
+            relab_df_taxa2 = pd.concat([relab_df_taxa2, tmp])
         relab_df_taxa2.species = relab_df_taxa2.species.apply(self._italicize_taxa_name)
         groups = [self._italicize_taxa_name(name) for name in groups]
 
diff --git a/moonstone/plot/graphs/base.py b/moonstone/plot/graphs/base.py
index 20cedd3..5183fdc 100644
--- a/moonstone/plot/graphs/base.py
+++ b/moonstone/plot/graphs/base.py
@@ -236,7 +236,7 @@ def plot_one_graph(
             if groups:
                 filtered_df = self.data[self.data[group_col].isin(groups)]
                 filtered_df[group_col] = filtered_df[group_col].astype("category")
-                filtered_df[group_col].cat.set_categories(groups, inplace=True)
+                filtered_df[group_col].cat = filtered_df[group_col].cat.set_categories(groups)
                 filtered_df = filtered_df.sort_values([group_col])
             else:
                 filtered_df = copy.deepcopy(self.data)
diff --git a/moonstone/utils/df_merge.py b/moonstone/utils/df_merge.py
index dae1055..400e9c4 100644
--- a/moonstone/utils/df_merge.py
+++ b/moonstone/utils/df_merge.py
@@ -1,6 +1,5 @@
 import logging
 import pandas as pd
-import numpy as np
 
 logger = logging.getLogger(__name__)
 
@@ -26,10 +25,15 @@ def merge(self):
         logger.info('Merge function called to merge count data and metadata.')
         logger.info(f'Variable {self.variable} from metadata file will be merged with counts.')
 
-        if not isinstance(self.dc.index, type(self.dm.index)):
-            logger.warning(f'Index types do not match: {type(self.dc.index)} and {type(self.dm.index)}.')
-            self.dc.set_index(np.int64(np.array(self.dc.index)), inplace=True)
-            logger.info(f' Indexes reset. Count Index={type(self.dc.index)}, Metadata Index={type(self.dm.index)}')
+        # if not isinstance(self.dc.index, type(self.dm.index)):
+        if self.dc.index.dtype != self.dm.index.dtype:
+            # logger.warning(f'Index types do not match: {type(self.dc.index)} and {type(self.dm.index)}.')
+            # self.dc = self.dc.set_index(np.int64(np.array(self.dc.index)))
+            # logger.info(f' Indexes reset. Count Index={type(self.dc.index)}, Metadata Index={type(self.dm.index)}')
+            logger.warning(f'Index types do not match: {self.dc.index.dtype} and {self.dm.index.dtype}.')
+            self.dc.index = self.dc.index.astype(str)
+            self.dm.index = self.dm.index.astype(str)
+            logger.warning('Both Count and Metadata Indexes set as string')
 
         df = pd.merge(self.dm[self.variable], self.dc, left_index=True, right_index=True)
         logger.info('Merge function completed. Returning merged data frame.')
diff --git a/moonstone/utils/df_reindex.py b/moonstone/utils/df_reindex.py
index dcdad70..225e530 100644
--- a/moonstone/utils/df_reindex.py
+++ b/moonstone/utils/df_reindex.py
@@ -22,12 +22,19 @@ def __init__(self, dataframe: Union[pd.Series, pd.DataFrame],
         self.taxonomy_df = taxonomy_dataframe
         self.taxa_column = taxa_column
 
-    def reindex_with_taxonomy(self, method: str = 'sum'):
+    def _sum_at_lowest_level(self, df):
+        df.index = df.index.to_flat_index()
+        df = df.groupby(level=0).sum()
+        df.index = pd.MultiIndex.from_tuples(df.index, names=self.taxonomical_names[:self._rank_level])
+        return df
+
+    def reindex_with_taxonomy(self, method: str = 'sum', na: str = 'drop'):
         """
         reindexation on taxonomic information (if there are).
 
         :param method: how to combine genes' information of genes that have the same taxonomy.
         Choose 'sum' to sum the counts or 'count' to only have the number of genes with this taxonomy
+        :param na: {'drop' (default), 'keep', 'sum'} what to do with the genes with missing taxonomical information.
 
         NB: You can access the list of items without taxonomic information by checking the .without_info_index
         attributes
@@ -48,16 +55,21 @@ def reindex_with_taxonomy(self, method: str = 'sum'):
         self.without_info_index = new_df['_merge'].loc[new_df['_merge'] == 'left_only'].index
 
         new_df = new_df.drop(['_merge'], axis=1)
-        new_df[self.taxa_column] = new_df[self.taxa_column].fillna(value='k__; p__; c__; o__; f__; g__; s__')
+        if na == 'drop':
+            new_df = new_df.dropna(subset=[self.taxa_column])
+        elif na == 'keep':
+            new_df[self.taxa_column] = new_df[self.taxa_column].fillna(
+                'k__; p__; c__; o__; f__; g__; s__'+new_df.index.to_series()+'_species'
+                )
+        else:  # na == 'sum'
+            new_df[self.taxa_column] = new_df[self.taxa_column].fillna(value='k__; p__; c__; o__; f__; g__; s__')
         new_df = self.split_taxa_fill_none(new_df, sep="; ", merge_genus_species=True)
         new_df = new_df.set_index(self.taxonomical_names[:self._rank_level])
         if method == 'sum':
-            nb_levels = len(self.taxonomical_names[:self._rank_level])
-            new_df = new_df.sum(level=list(range(nb_levels)))
+            new_df = self._sum_at_lowest_level(new_df)
         elif method == 'count':
             new_df[:] = np.where(new_df > 0, 1, 0)    # presence/absence -> is > 0 then presence (1) else absence (0)
-            nb_levels = len(self.taxonomical_names[:self._rank_level])
-            new_df = new_df.sum(level=list(range(nb_levels)))
+            new_df = self._sum_at_lowest_level(new_df)
         return new_df
 
     @property
diff --git a/moonstone/utils/taxonomy.py b/moonstone/utils/taxonomy.py
index 338862d..c24e66c 100644
--- a/moonstone/utils/taxonomy.py
+++ b/moonstone/utils/taxonomy.py
@@ -85,7 +85,7 @@ def remove_taxo_prefix(string):
         taxa_columns.columns = self.taxonomical_names[:self.rank_level]
         taxa_columns = taxa_columns.applymap(lambda x: remove_taxo_prefix(x))
         if terms_to_remove is not None:
-            taxa_columns = taxa_columns.replace(terms_to_remove, np.nan)
+            taxa_columns = taxa_columns.replace(terms_to_remove, None)
         if merge_genus_species:
             taxa_columns = self._merge_genus_species(taxa_columns)
         taxa_columns = self._fill_none(taxa_columns)
diff --git a/requirements.txt b/requirements.txt
index 0d75f1a..8078fe1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -48,7 +48,7 @@ msgpack==1.0.0
     # via cachecontrol
 natsort==7.0.1
     # via scikit-bio
-numpy==1.18.1
+numpy==1.24.3
     # via
     #   hdmedians
     #   matplotlib
@@ -59,7 +59,7 @@ numpy==1.18.1
     #   scikit-learn
     #   scipy
     #   statsmodels
-pandas==1.0.1
+pandas==2.0.2
     # via
     #   moonstone (setup.py)
     #   scikit-bio
@@ -100,7 +100,7 @@ requests==2.24.0
     # via cachecontrol
 retrying==1.3.3
     # via plotly
-scikit-bio==0.5.6
+scikit-bio==0.5.9
     # via moonstone (setup.py)
 scikit-learn==0.21.3
     # via
diff --git a/setup.py b/setup.py
index 2b5c8c2..3c69e1f 100644
--- a/setup.py
+++ b/setup.py
@@ -9,14 +9,14 @@
     author='Kenzo-Hugo Hillion, Agnès Baud, Mariela Furstenheim, Sean Kennedy',
     author_email='kehillio@pasteur.fr',
     install_requires=[
-        'pandas==1.0.1',
+        'pandas==2.0.2',
         'matplotlib==3.3.0',
         'plotly==5.6.0',
         'statsmodels==0.11.1',
         'python-slugify==4.0.1',
         'pyaml==20.4.0',
-        'numpy==1.18.1',
-        'scikit-bio==0.5.6',
+        'numpy==1.24.3',
+        'scikit-bio==0.5.9',
         'scikit-learn==0.21.3',
         'hdmedians==0.13',
         'cython==0.29.21',
diff --git a/tests/analysis/diversity/test_beta.py b/tests/analysis/diversity/test_beta.py
index 93eb349..9fa67b2 100644
--- a/tests/analysis/diversity/test_beta.py
+++ b/tests/analysis/diversity/test_beta.py
@@ -35,7 +35,8 @@ def test_compute_beta_diversity_df(self):
         )
         pd.testing.assert_frame_equal(
             tested_object_instance.beta_diversity_df, expected_object,
-            check_less_precise=2,  # Deprecated since version 1.1.0, to be changed when updating pandas
+            rtol=0.01
+            # check_less_precise=2,  # Deprecated since version 1.1.0, to be changed when updating pandas
         )
 
     def test_compute_beta_diversity_series(self):
@@ -54,11 +55,11 @@ def test_compute_beta_diversity_series(self):
         # Two ways of retrieving the series
         pd.testing.assert_series_equal(
             tested_object_instance.beta_diversity_series, expected_object,
-            check_less_precise=2,  # Deprecated since version 1.1.0, to be changed when updating pandas
+            rtol=0.01
         )
         pd.testing.assert_series_equal(
             tested_object_instance.diversity_indexes, expected_object,
-            check_less_precise=2,  # Deprecated since version 1.1.0, to be changed when updating pandas
+            rtol=0.01
         )
 
     def test_run_statistical_test_groups_with_NaN(self):
@@ -109,7 +110,6 @@ def test_run_statistical_test_groups_with_NaN(self):
 
         pd.testing.assert_series_equal(
             pval, expected_object,
-            check_less_precise=2,  # Deprecated since version 1.1.0, to be changed when updating pandas
         )
 
     def test_get_grouped_df_series(self):
@@ -131,7 +131,6 @@ def test_get_grouped_df_series(self):
         output = tested_object_instance._get_grouped_df_series(metadata_ser)
         pd.testing.assert_frame_equal(
             output, expected_object,
-            check_less_precise=2,  # Deprecated since version 1.1.0, to be changed when updating pandas
         )
 
     def test_get_grouped_df_dataframe(self):
@@ -172,7 +171,6 @@ def test_get_grouped_df_dataframe(self):
         output = tested_object_instance._get_grouped_df_dataframe(metadata_df)
         pd.testing.assert_frame_equal(
             output, expected_object,
-            check_less_precise=2,  # Deprecated since version 1.1.0, to be changed when updating pandas
         )
 
     def test_analyse_grouped_df(self):
@@ -195,7 +193,6 @@ def test_analyse_grouped_df(self):
         output = tested_object_instance.analyse_groups(metadata_df, 'sex', show=False, show_pval=False)
         pd.testing.assert_frame_equal(
             output['data'], expected_object,
-            check_less_precise=2,  # Deprecated since version 1.1.0, to be changed when updating pandas
         )
 
     def test_analyse_grouped_df_with_group_col2(self):
@@ -238,7 +235,6 @@ def test_analyse_grouped_df_with_group_col2(self):
             )
         pd.testing.assert_frame_equal(
             output["data"], expected_object,
-            check_less_precise=2,  # Deprecated since version 1.1.0, to be changed when updating pandas
         )
 
 
@@ -269,7 +265,6 @@ def test_compute_beta_diversity(self):
         )
         pd.testing.assert_frame_equal(
             tested_object_instance.beta_diversity_df, expected_object,
-            check_less_precise=2,  # Deprecated since version 1.1.0, to be changed when updating pandas
         )
 
     def test_compute_beta_diversity_force_computation(self):
@@ -325,7 +320,6 @@ def test_compute_beta_diversity(self):
         )
         pd.testing.assert_frame_equal(
             tested_object_instance.beta_diversity_df, expected_object,
-            check_less_precise=2,  # Deprecated since version 1.1.0, to be changed when updating pandas
         )
 
     def test_compute_beta_diversity_force_computation(self):
diff --git a/tests/parsers/counts/taxonomy/test_base_metaphlan.py b/tests/parsers/counts/taxonomy/test_base_metaphlan.py
index 40f58f8..0d485ca 100644
--- a/tests/parsers/counts/taxonomy/test_base_metaphlan.py
+++ b/tests/parsers/counts/taxonomy/test_base_metaphlan.py
@@ -271,6 +271,6 @@ def test_valid_analysis_type(self):
         with self.assertLogs('moonstone.parsers.counts.taxonomy.metaphlan', level='WARNING') as log:
             tested_object_instance = BaseMetaphlanParser("file", analysis_type="INVALID ANALYSIS TYPE")
             self.assertEqual(len(log.output), 1)
-            self.assertIn("WARNING:moonstone.parsers.counts.taxonomy.metaphlan:analysis_type='INVALID ANALYSIS TYPE' not valid, \
-set to default ('rel_ab').", log.output)
+            self.assertIn("WARNING:moonstone.parsers.counts.taxonomy.metaphlan:analysis_type='INVALID ANALYSIS TYPE' \
+not valid, set to default ('rel_ab').", log.output)
             self.assertEqual(tested_object_instance.analysis_type, 'rel_ab')
diff --git a/tests/plot/test_counts.py b/tests/plot/test_counts.py
index 0f51a9c..7d92984 100644
--- a/tests/plot/test_counts.py
+++ b/tests/plot/test_counts.py
@@ -741,7 +741,7 @@ def test_plot_most_prevalent_taxa_modebargraph_plotting_options(self):
 
         expected_x = [75.0, 100.0]
         expected_y = [
-            "<i>Streptococcus salivarius</i>",
+            "<i>Lactobacillus</i> (genus)",  # with Streptococcus_salivarius and Streptococcus (genus) all at 75%
             "<i>Streptococcus thermophilus</i>",
         ]
 
diff --git a/tests/utils/pandas/test_series.py b/tests/utils/pandas/test_series.py
index 924c8ba..1051a14 100644
--- a/tests/utils/pandas/test_series.py
+++ b/tests/utils/pandas/test_series.py
@@ -74,7 +74,7 @@ def test_build_stats_float(self):
 
 class TestSeriesBinning(TestCase):
 
-    def test_compute_homoogeneous_bins(self):
+    def test_compute_homogeneous_bins(self):
         tested_object = pd.Series(
             {
                 'gene_1': 10.5,
@@ -108,6 +108,7 @@ def test_compute_binned_data(self):
         expected_object = pd.Series(
             [1, 2], index=[']0, 5]', ']5, 10]']
         )
+        expected_object.name = "count"
         tested_object_instance = SeriesBinning(series)
         tested_object_instance.bins_values = [0, 5, 10]
         tested_object = tested_object_instance.compute_binned_data()
diff --git a/tests/utils/test_df_merge.py b/tests/utils/test_df_merge.py
index 88d0213..4a741c4 100644
--- a/tests/utils/test_df_merge.py
+++ b/tests/utils/test_df_merge.py
@@ -6,8 +6,8 @@
 
 class TestMergeDF(TestCase):
 
-    def test_merge(self):
-        d1 = pd.DataFrame(
+    def setUp(self):
+        self.d1 = pd.DataFrame(
             [
                 [23, 7, 44, 0, 101],
                 [15, 4, 76, 3, 107],
@@ -15,9 +15,10 @@ def test_merge(self):
                 [31, 4, 50, 0, 99]
             ],
             columns=['item_1', 'item_2', 'item_3', 'item_4', 'item_5'],
-            index=['1', '2', '3', '4']  # index dtype='object'
+            index=[1, 2, 3, 4]  # index dtype='object'
         )
 
+    def test_merge(self):
         d2 = pd.DataFrame(
             [
                 ['M', 'Yes', 23, 'June', 170],
@@ -40,5 +41,37 @@ def test_merge(self):
             index=[1, 2, 3, 4]  # index dtype='int64'
         )
 
-        merged_df = MergeDF(d1, d2, 'sex').merged_df
+        merged_df = MergeDF(self.d1, d2, 'sex').merged_df
+        pd.testing.assert_frame_equal(merged_df, df_expected)
+
+    def test_merge_index_dont_match(self):
+        d2 = pd.DataFrame(
+            [
+                ['M', 'Yes', 23, 'June', 170],
+                ['F', 'Yes', 33, 'Nov', 154],
+                ['F', 'Yes', 29, 'Jan', 161],
+                ['F', 'No', 27, 'Jan', 152]
+            ],
+            columns=['sex', 'pets', 'age', 'sample_month', 'height'],
+            index=['1', '2', '3', '4']  # index dtype='object'
+        )
+
+        df_expected = pd.DataFrame(
+            [
+                ['M', 23, 7, 44, 0, 101],
+                ['F', 15, 4, 76, 3, 107],
+                ['F', 20, 0, 22, 0, 101],
+                ['F', 31, 4, 50, 0, 99]
+            ],
+            columns=['sex', 'item_1', 'item_2', 'item_3', 'item_4', 'item_5'],
+            index=['1', '2', '3', '4']  # index dtype='object'
+        )
+
+        with self.assertLogs('moonstone.utils.df_merge', level='WARNING') as log:
+            merged_df = MergeDF(self.d1, d2, 'sex').merged_df
+            self.assertEqual(len(log.output), 2)
+            self.assertIn(
+                "WARNING:moonstone.utils.df_merge:Index types do not match: int64 and object.",
+                log.output
+            )
         pd.testing.assert_frame_equal(merged_df, df_expected)
diff --git a/tests/utils/test_df_reindex.py b/tests/utils/test_df_reindex.py
index cc92c6a..54cdab4 100644
--- a/tests/utils/test_df_reindex.py
+++ b/tests/utils/test_df_reindex.py
@@ -1,5 +1,6 @@
 from unittest import TestCase
 
+import numpy as np
 import pandas as pd
 
 from moonstone.utils.df_reindex import GenesToTaxonomy
@@ -33,17 +34,18 @@ def test_reindex_with_taxonomy(self):
                 'sample_1':
                 {
                     ('Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales',
-                     'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_iners'): 23,
+                     'Enterococcaceae', 'Enterococcus', 'Enterococcus_faecium'): 15,
                     ('Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales',
-                     'Enterococcaceae', 'Enterococcus', 'Enterococcus_faecium'): 15
+                     'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_iners'): 23
                 },
                 'sample_2':
                 {
                     ('Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales',
-                     'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_iners'): 7,
+                     'Enterococcaceae', 'Enterococcus', 'Enterococcus_faecium'): 4,
                     ('Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales',
-                     'Enterococcaceae', 'Enterococcus', 'Enterococcus_faecium'): 4}
+                     'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_iners'): 7
                 }
+            }
         )
         df_expected.index.set_names(["kingdom", "phylum", "class", "order", "family", "genus", "species"], inplace=True)
 
@@ -51,8 +53,7 @@ def test_reindex_with_taxonomy(self):
         reindexed_df = reindexation_instance.reindexed_df
         pd.testing.assert_frame_equal(reindexed_df, df_expected)
 
-    def test_reindex_with_taxonomy_missing_infos(self):
-        # for now, if there aren't any taxonomic information, the gene is dropped
+    def test_reindex_with_taxonomy_missing_infos_dropped(self):
         df = pd.DataFrame(
             [
                 [23, 7],
@@ -94,6 +95,100 @@ def test_reindex_with_taxonomy_missing_infos(self):
         pd.testing.assert_frame_equal(reindexed_df, df_expected)
         pd.testing.assert_index_equal(reindexation_instance.without_info_index, pd.Index(['gene_2'], dtype='object'))
 
+    def test_reindex_with_taxonomy_missing_infos_kept(self):
+        df = pd.DataFrame(
+            [
+                [23, 7],
+                [15, 4],
+                [0, 36],
+            ],
+            columns=['sample_1', 'sample_2'],
+            index=['gene_1', 'gene_2', 'gene_4']  # index dtype='object'
+        )
+        df_taxo = pd.DataFrame(
+            [
+                [147802,
+                 'k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; \
+f__Lactobacillaceae; g__Lactobacillus; s__iners'],
+                [1352,
+                 'k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; \
+f__Enterococcaceae; g__Enterococcus; s__faecium']
+            ],
+            columns=['tax_id', 'full_tax'],
+            index=['gene_1', 'gene_3']  # index dtype='object'
+        )
+        df_expected = pd.DataFrame.from_dict(
+            {
+                'sample_1':
+                {
+                    ('Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales',
+                     'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_iners'): 23,
+                    (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 'gene_2_species'): 15,
+                    (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 'gene_4_species'): 0,
+                },
+                'sample_2':
+                {
+                    ('Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales',
+                     'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_iners'): 7,
+                    (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 'gene_2_species'): 4,
+                    (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 'gene_4_species'): 36,
+                }
+            }
+        )
+        df_expected.index.set_names(["kingdom", "phylum", "class", "order", "family", "genus", "species"], inplace=True)
+
+        reindexation_instance = GenesToTaxonomy(df, df_taxo)
+        reindexed_df = reindexation_instance.reindex_with_taxonomy(na='keep')
+        pd.testing.assert_frame_equal(reindexed_df, df_expected)
+        pd.testing.assert_index_equal(
+            reindexation_instance.without_info_index,
+            pd.Index(['gene_2', 'gene_4'], dtype='object')
+        )
+
+    def test_reindex_with_taxonomy_missing_infos_summed(self):
+        df = pd.DataFrame(
+            [
+                [23, 7],
+                [15, 4],
+                [0, 36],
+            ],
+            columns=['sample_1', 'sample_2'],
+            index=['gene_1', 'gene_2', 'gene_4']  # index dtype='object'
+        )
+        df_taxo = pd.DataFrame(
+            [
+                [147802,
+                 'k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; \
+f__Lactobacillaceae; g__Lactobacillus; s__iners'],
+                [1352,
+                 'k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; \
+f__Enterococcaceae; g__Enterococcus; s__faecium']
+            ],
+            columns=['tax_id', 'full_tax'],
+            index=['gene_1', 'gene_3']  # index dtype='object'
+        )
+        df_expected = pd.DataFrame.from_dict(
+            {
+                'sample_1':
+                {
+                    ('Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales',
+                     'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_iners'): 23,
+                    (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan): 15,
+                },
+                'sample_2':
+                {
+                    ('Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales',
+                     'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_iners'): 7,
+                    (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan): 40,
+                }
+            }
+        )
+        df_expected.index.set_names(["kingdom", "phylum", "class", "order", "family", "genus", "species"], inplace=True)
+
+        reindexation_instance = GenesToTaxonomy(df, df_taxo)
+        reindexed_df = reindexation_instance.reindex_with_taxonomy(na='sum')
+        pd.testing.assert_frame_equal(reindexed_df, df_expected)
+
     def test_reindex_with_taxonomy_summing(self):
         df = pd.DataFrame(
             [

From 7efac41ddc811498785dc2d1750489add68e7102 Mon Sep 17 00:00:00 2001
From: Agnes BAUD <agnes.baud@pasteur.fr>
Date: Tue, 10 Oct 2023 17:21:50 +0200
Subject: [PATCH 10/17] statsmodels

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 8078fe1..6b08cec 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -118,7 +118,7 @@ six==1.15.0
     #   plotly
     #   python-dateutil
     #   retrying
-statsmodels==0.11.1
+statsmodels==0.13.0
     # via moonstone (setup.py)
 text-unidecode==1.3
     # via python-slugify

From aaaaaa92e767c199c2cd67e180decb34ab895174 Mon Sep 17 00:00:00 2001
From: Agnes BAUD <agnes.baud@pasteur.fr>
Date: Tue, 10 Oct 2023 17:38:06 +0200
Subject: [PATCH 11/17] statsmodels==0.13.0 in setup

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 3c69e1f..855e579 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@
         'pandas==2.0.2',
         'matplotlib==3.3.0',
         'plotly==5.6.0',
-        'statsmodels==0.11.1',
+        'statsmodels==0.13.0',
         'python-slugify==4.0.1',
         'pyaml==20.4.0',
         'numpy==1.24.3',

From 1709821281f6b698dae8e134d228dca00fb1c160 Mon Sep 17 00:00:00 2001
From: Agnes BAUD <agnes.baud@pasteur.fr>
Date: Tue, 10 Oct 2023 17:47:00 +0200
Subject: [PATCH 12/17] update scikit-learn

---
 requirements.txt | 2 +-
 setup.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 6b08cec..1e0cb93 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -102,7 +102,7 @@ retrying==1.3.3
     # via plotly
 scikit-bio==0.5.9
     # via moonstone (setup.py)
-scikit-learn==0.21.3
+scikit-learn==1.3.1
     # via
     #   moonstone (setup.py)
     #   scikit-bio
diff --git a/setup.py b/setup.py
index 855e579..c886e2b 100644
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,7 @@
         'pyaml==20.4.0',
         'numpy==1.24.3',
         'scikit-bio==0.5.9',
-        'scikit-learn==0.21.3',
+        'scikit-learn==1.3.1',
         'hdmedians==0.13',
         'cython==0.29.21',
         'scipy==1.5.2'

From 95c27041f9ce5b29fc2f31e0da921d77bc1ef2d0 Mon Sep 17 00:00:00 2001
From: Agnes BAUD <agnes.baud@pasteur.fr>
Date: Tue, 10 Oct 2023 17:52:10 +0200
Subject: [PATCH 13/17] hdmedians + pip3 into pip

---
 .github/workflows/python-package.yml | 14 +++++++-------
 requirements.txt                     |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 61b30ae..7e6ffe0 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -22,13 +22,13 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python -m pip3 install --upgrade pip3
-        pip3 install flake8 pytest
-        pip3 install numpy==1.24.3
-        pip3 install .
-        pip3 install odfpy     # optional dependencies
-        pip3 install openpyxl  # idem
-        pip3 install xlrd      # idem
+        python -m pip install --upgrade pip
+        pip install flake8 pytest
+        pip install numpy==1.24.3
+        pip install .
+        pip install odfpy     # optional dependencies
+        pip install openpyxl  # idem
+        pip install xlrd      # idem
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names
diff --git a/requirements.txt b/requirements.txt
index 1e0cb93..aae76f2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -22,7 +22,7 @@ decorator==4.4.2
     # via
     #   ipython
     #   scikit-bio
-hdmedians==0.13
+hdmedians==0.14.2
     # via
     #   moonstone (setup.py)
     #   scikit-bio

From e69c4a3d0702581b953cf916ccda100149cd233f Mon Sep 17 00:00:00 2001
From: Agnes BAUD <agnes.baud@pasteur.fr>
Date: Wed, 11 Oct 2023 10:56:09 +0200
Subject: [PATCH 14/17] upgrade setuptools and wheel

---
 .github/workflows/coverage.yml       | 2 +-
 .github/workflows/python-package.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
index 2afda4d..1c3212b 100644
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -19,7 +19,7 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip
+        python -m pip install --upgrade pip setuptools wheel
         pip install pytest pytest-cov
         pip install numpy==1.24.3
         pip install .
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 7e6ffe0..3a5546a 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -22,7 +22,7 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip
+        python -m pip install --upgrade pip setuptools wheel
         pip install flake8 pytest
         pip install numpy==1.24.3
         pip install .

From 576c1c2f30cca3258f128871607a95f3f73a83ad Mon Sep 17 00:00:00 2001
From: Agnes BAUD <agnes.baud@pasteur.fr>
Date: Wed, 11 Oct 2023 11:02:48 +0200
Subject: [PATCH 15/17] hdmedians in setup corrected

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index c886e2b..39b2cb1 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@
         'numpy==1.24.3',
         'scikit-bio==0.5.9',
         'scikit-learn==1.3.1',
-        'hdmedians==0.13',
+        'hdmedians==0.14.2',
         'cython==0.29.21',
         'scipy==1.5.2'
     ],

From ef497055681e8c1484263402ff30cdc71585fe8a Mon Sep 17 00:00:00 2001
From: Agnes BAUD <agnes.baud@pasteur.fr>
Date: Thu, 12 Oct 2023 13:44:05 +0200
Subject: [PATCH 16/17] update plotly + tests adapted to new version np

---
 moonstone/analysis/differential_analysis.py | 10 +++++-----
 requirements.txt                            |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/moonstone/analysis/differential_analysis.py b/moonstone/analysis/differential_analysis.py
index c78eb35..9387200 100644
--- a/moonstone/analysis/differential_analysis.py
+++ b/moonstone/analysis/differential_analysis.py
@@ -1,7 +1,6 @@
 import logging
 
 import pandas as pd
-import numpy as np
 import scipy.stats as st
 from statsmodels.stats.multitest import multipletests
 
@@ -53,8 +52,8 @@ def test_dichotomic_features(self, feature, test_to_use):
         cat1 = self.full_table[self.full_table[feature] == self.full_table[feature][0]]
         cat2 = self.full_table[self.full_table[feature] != self.full_table[feature][0]]
         for family in range(self.number_columns_to_skip, self.full_table.shape[1]):
-            test = self.tests_functions_used[test_to_use](cat1[self.full_table.columns[family]],
-                                                          cat2[self.full_table.columns[family]])
+            test = self.tests_functions_used[test_to_use](cat1[self.full_table.columns[family]].astype(float),
+                                                          cat2[self.full_table.columns[family]].astype(float))
             features.append(feature)
             taxons.append(self.full_table.columns[family])
             static_value.append(round(test[0], 6))
@@ -79,7 +78,8 @@ def test_multiple_features(self, feature, test_to_use):
             list_ofgroups = []
             for variable in variable_dic:
                 list_ofgroups.append(variable_dic[variable][self.full_table.columns[family]])
-            test = self.tests_functions_used[test_to_use](*np.asarray(list_ofgroups))
+            #test = self.tests_functions_used[test_to_use](*np.asarray(list_ofgroups))
+            test = self.tests_functions_used[test_to_use](*list_ofgroups)  # works for kruskal and one way anova
             features.append(feature)
             taxons.append(self.full_table.columns[family])
             static_values.append(round(test[0], 6))
@@ -114,5 +114,5 @@ def differential_analysis_by_feature(self, features, type_of_features, test_to_u
         for feature in features:
             test_result = getattr(self, f"test_{type_of_features}", self.test_default)(feature, test_to_use)
             test_result['corrected_p-value'] = self.corrected_p_values(test_result['p-value'], correction_method_used)
-            final_table = final_table.append(test_result)
+            final_table = pd.concat([final_table, test_result])
         return final_table
diff --git a/requirements.txt b/requirements.txt
index aae76f2..5708b61 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -74,7 +74,7 @@ pickleshare==0.7.5
     # via ipython
 pillow==7.2.0
     # via matplotlib
-plotly==5.6.0
+plotly==5.17.0
     # via moonstone (setup.py)
 prompt-toolkit==3.0.7
     # via ipython

From c513aaa731a73f541bee423cd313649458acb927 Mon Sep 17 00:00:00 2001
From: Agnes BAUD <agnes.baud@pasteur.fr>
Date: Thu, 12 Oct 2023 14:08:17 +0200
Subject: [PATCH 17/17] updating scipy (everywhere) and plotly (in setup)

---
 requirements.txt | 2 +-
 setup.py         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 5708b61..9071174 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -106,7 +106,7 @@ scikit-learn==1.3.1
     # via
     #   moonstone (setup.py)
     #   scikit-bio
-scipy==1.5.2
+scipy==1.9.0
     # via
     #   scikit-bio
     #   scikit-learn
diff --git a/setup.py b/setup.py
index 39b2cb1..13ffa8b 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
     install_requires=[
         'pandas==2.0.2',
         'matplotlib==3.3.0',
-        'plotly==5.6.0',
+        'plotly==5.17.0',
         'statsmodels==0.13.0',
         'python-slugify==4.0.1',
         'pyaml==20.4.0',
@@ -20,7 +20,7 @@
         'scikit-learn==1.3.1',
         'hdmedians==0.14.2',
         'cython==0.29.21',
-        'scipy==1.5.2'
+        'scipy==1.9.0'
     ],
     packages=find_packages(),
     entry_points={'console_scripts': ['moonstone=moonstone.main:run']},