motleystate
diff --git a/‎.github/workflows/coverage.yml
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/coverage.yml
Lines changed: 3 additions & 0 deletions
diff --git a/‎.github/workflows/python-package.yml
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/python-package.yml
Lines changed: 3 additions & 0 deletions
diff --git a/‎moonstone/parsers/base.py
Lines changed: 12 additions & 0 deletions b/‎moonstone/parsers/base.py
Lines changed: 12 additions & 0 deletions
diff --git a/‎requirements-dev.txt
Lines changed: 1 addition & 0 deletions b/‎requirements-dev.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/parsers/counts/taxonomy/kraken2/sunbeam_kraken2.ods
9.68 KB b/‎tests/parsers/counts/taxonomy/kraken2/sunbeam_kraken2.ods
9.68 KB
diff --git a/‎tests/parsers/counts/taxonomy/kraken2/sunbeam_kraken2.xls
6 KB b/‎tests/parsers/counts/taxonomy/kraken2/sunbeam_kraken2.xls
6 KB
diff --git a/‎tests/parsers/counts/taxonomy/kraken2/sunbeam_kraken2.xlsx
4.95 KB b/‎tests/parsers/counts/taxonomy/kraken2/sunbeam_kraken2.xlsx
4.95 KB
diff --git a/‎tests/parsers/counts/taxonomy/kraken2/test_kraken2.py
Lines changed: 60 additions & 0 deletions b/‎tests/parsers/counts/taxonomy/kraken2/test_kraken2.py
Lines changed: 60 additions & 0 deletions
@@ -23,6 +23,9 @@ jobs:
         pip install pytest pytest-cov
         pip install numpy==1.18.1
         pip install .
+        pip install odfpy     # optional dependencies
+        pip install openpyxl  # idem
+        pip install xlrd      # idem
     - name: Generate coverage report
       run: |
         pytest --cov ./moonstone --cov-report xml --cov-report term
 
@@ -26,6 +26,9 @@ jobs:
         pip install flake8 pytest
         pip install numpy==1.18.1
         pip install .
+        pip install odfpy     # optional dependencies
+        pip install openpyxl  # idem
+        pip install xlrd      # idem
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names
 
@@ -44,6 +44,18 @@ def _load_data(self) -> pd.DataFrame:
         """
         method that handles the loading and parsing of your file into a pandas dataframe.
         """
+        ext = self.file_path.split(".")[-1]
+        ext_engine = {
+            "xls": "xlrd",                              # old-style Excel files
+            "xlsx": "openpyxl",                         # newer Excel file formats
+            "odf": "odf", "ods": "odf", "odt": "odf",   # OpenDocument file formats
+            "xlsb": "pyxlsb"                            # Binary Excel files
+            }
+        if ext in ext_engine.keys():
+            return pd.read_excel(
+                self.file_path, sep=self.sep, header=self.header, **self.parsing_options,
+                engine=ext_engine[ext]
+            )
         return pd.read_csv(
             self.file_path, sep=self.sep, header=self.header, **self.parsing_options
         )
 
@@ -7,3 +7,4 @@ pip-tools
 black
 pydocstyle
 mypy
+odfpy
@@ -29,3 +29,63 @@ def test_to_dataframe(self):
         )
         expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'])
         pd.testing.assert_frame_equal(self.sunbeamkraken2parser.dataframe, expected_df)
+
+    def test_to_dataframe_xls(self):
+        """
+        Test based on input.xls (old-style Excel format)
+        """
+        input_path = os.path.join(os.path.dirname(__file__), 'sunbeam_kraken2.xls')
+        sunbeamkraken2parser = SunbeamKraken2Parser(input_path)
+        expected_df = pd.DataFrame(
+            [
+                ['Bacteria', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 2, 5.5, 6.0],  # noqa
+                ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillales (order)', 'Lactobacillales (order)', 'Lactobacillales (order)', 186826, 4.3, 2.1],  # noqa
+                ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_jensenii', 109790, 1.0, 12.0]  # noqa
+            ],
+            columns=[
+                'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species',
+                sunbeamkraken2parser.new_otu_id_name, 'SAMPLE_1', 'SAMPLE_2'
+            ]
+        )
+        expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'])
+        pd.testing.assert_frame_equal(sunbeamkraken2parser.dataframe, expected_df)
+
+    def test_to_dataframe_xlsx(self):
+        """
+        Test based on input.xlsx (newer Excel file formats)
+        """
+        input_path = os.path.join(os.path.dirname(__file__), 'sunbeam_kraken2.xlsx')
+        sunbeamkraken2parser = SunbeamKraken2Parser(input_path)
+        expected_df = pd.DataFrame(
+            [
+                ['Bacteria', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 2, 5.5, 6.0],  # noqa
+                ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillales (order)', 'Lactobacillales (order)', 'Lactobacillales (order)', 186826, 4.3, 2.1],  # noqa
+                ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_jensenii', 109790, 1.0, 12.0]  # noqa
+            ],
+            columns=[
+                'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species',
+                sunbeamkraken2parser.new_otu_id_name, 'SAMPLE_1', 'SAMPLE_2'
+            ]
+        )
+        expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'])
+        pd.testing.assert_frame_equal(sunbeamkraken2parser.dataframe, expected_df)
+
+    def test_to_dataframe_ods(self):
+        """
+        Test based on input.ods (OpenDocument file format)
+        """
+        input_path = os.path.join(os.path.dirname(__file__), 'sunbeam_kraken2.ods')
+        sunbeamkraken2parser = SunbeamKraken2Parser(input_path)
+        expected_df = pd.DataFrame(
+            [
+                ['Bacteria', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 2, 5.5, 6.0],  # noqa
+                ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillales (order)', 'Lactobacillales (order)', 'Lactobacillales (order)', 186826, 4.3, 2.1],  # noqa
+                ['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_jensenii', 109790, 1.0, 12.0]  # noqa
+            ],
+            columns=[
+                'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species',
+                sunbeamkraken2parser.new_otu_id_name, 'SAMPLE_1', 'SAMPLE_2'
+            ]
+        )
+        expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'])
+        pd.testing.assert_frame_equal(sunbeamkraken2parser.dataframe, expected_df)
-Original file line number
+Diff line change
 black
 pydocstyle
 mypy
 +odfpy