Skip to content

Commit 6f62464

Browse files
authored
Merge pull request #96 from motleystate/parse-more-format
more format available to parse
2 parents 70b5b20 + 085ec30 commit 6f62464

File tree

8 files changed

+79
-0
lines changed

8 files changed

+79
-0
lines changed

.github/workflows/coverage.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ jobs:
2323
pip install pytest pytest-cov
2424
pip install numpy==1.18.1
2525
pip install .
26+
pip install odfpy # optional dependencies
27+
pip install openpyxl # idem
28+
pip install xlrd # idem
2629
- name: Generate coverage report
2730
run: |
2831
pytest --cov ./moonstone --cov-report xml --cov-report term

.github/workflows/python-package.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ jobs:
2626
pip install flake8 pytest
2727
pip install numpy==1.18.1
2828
pip install .
29+
pip install odfpy # optional dependencies
30+
pip install openpyxl # idem
31+
pip install xlrd # idem
2932
- name: Lint with flake8
3033
run: |
3134
# stop the build if there are Python syntax errors or undefined names

moonstone/parsers/base.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,18 @@ def _load_data(self) -> pd.DataFrame:
4444
"""
4545
method that handles the loading and parsing of your file into a pandas dataframe.
4646
"""
47+
ext = self.file_path.split(".")[-1]
48+
ext_engine = {
49+
"xls": "xlrd", # old-style Excel files
50+
"xlsx": "openpyxl", # newer Excel file formats
51+
"odf": "odf", "ods": "odf", "odt": "odf", # OpenDocument file formats
52+
"xlsb": "pyxlsb" # Binary Excel files
53+
}
54+
if ext in ext_engine.keys():
55+
return pd.read_excel(
56+
self.file_path, sep=self.sep, header=self.header, **self.parsing_options,
57+
engine=ext_engine[ext]
58+
)
4759
return pd.read_csv(
4860
self.file_path, sep=self.sep, header=self.header, **self.parsing_options
4961
)

requirements-dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ pip-tools
77
black
88
pydocstyle
99
mypy
10+
odfpy
Binary file not shown.
Binary file not shown.
Binary file not shown.

tests/parsers/counts/taxonomy/kraken2/test_kraken2.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,63 @@ def test_to_dataframe(self):
2929
)
3030
expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'])
3131
pd.testing.assert_frame_equal(self.sunbeamkraken2parser.dataframe, expected_df)
32+
33+
def test_to_dataframe_xls(self):
34+
"""
35+
Test based on input.xls (old-style Excel format)
36+
"""
37+
input_path = os.path.join(os.path.dirname(__file__), 'sunbeam_kraken2.xls')
38+
sunbeamkraken2parser = SunbeamKraken2Parser(input_path)
39+
expected_df = pd.DataFrame(
40+
[
41+
['Bacteria', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 2, 5.5, 6.0], # noqa
42+
['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillales (order)', 'Lactobacillales (order)', 'Lactobacillales (order)', 186826, 4.3, 2.1], # noqa
43+
['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_jensenii', 109790, 1.0, 12.0] # noqa
44+
],
45+
columns=[
46+
'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species',
47+
sunbeamkraken2parser.new_otu_id_name, 'SAMPLE_1', 'SAMPLE_2'
48+
]
49+
)
50+
expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'])
51+
pd.testing.assert_frame_equal(sunbeamkraken2parser.dataframe, expected_df)
52+
53+
def test_to_dataframe_xlsx(self):
54+
"""
55+
Test based on input.xlsx (newer Excel file formats)
56+
"""
57+
input_path = os.path.join(os.path.dirname(__file__), 'sunbeam_kraken2.xlsx')
58+
sunbeamkraken2parser = SunbeamKraken2Parser(input_path)
59+
expected_df = pd.DataFrame(
60+
[
61+
['Bacteria', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 2, 5.5, 6.0], # noqa
62+
['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillales (order)', 'Lactobacillales (order)', 'Lactobacillales (order)', 186826, 4.3, 2.1], # noqa
63+
['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_jensenii', 109790, 1.0, 12.0] # noqa
64+
],
65+
columns=[
66+
'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species',
67+
sunbeamkraken2parser.new_otu_id_name, 'SAMPLE_1', 'SAMPLE_2'
68+
]
69+
)
70+
expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'])
71+
pd.testing.assert_frame_equal(sunbeamkraken2parser.dataframe, expected_df)
72+
73+
def test_to_dataframe_ods(self):
74+
"""
75+
Test based on input.ods (OpenDocument file format)
76+
"""
77+
input_path = os.path.join(os.path.dirname(__file__), 'sunbeam_kraken2.ods')
78+
sunbeamkraken2parser = SunbeamKraken2Parser(input_path)
79+
expected_df = pd.DataFrame(
80+
[
81+
['Bacteria', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 'Bacteria (kingdom)', 2, 5.5, 6.0], # noqa
82+
['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillales (order)', 'Lactobacillales (order)', 'Lactobacillales (order)', 186826, 4.3, 2.1], # noqa
83+
['Bacteria', 'Firmicutes', 'Bacilli', 'Lactobacillales', 'Lactobacillaceae', 'Lactobacillus', 'Lactobacillus_jensenii', 109790, 1.0, 12.0] # noqa
84+
],
85+
columns=[
86+
'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species',
87+
sunbeamkraken2parser.new_otu_id_name, 'SAMPLE_1', 'SAMPLE_2'
88+
]
89+
)
90+
expected_df = expected_df.set_index(['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'])
91+
pd.testing.assert_frame_equal(sunbeamkraken2parser.dataframe, expected_df)

0 commit comments

Comments
 (0)