Skip to content

Commit

Permalink
Polars tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mwiewior committed Dec 11, 2024
1 parent bbdecc7 commit 08878e2
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 20 deletions.
24 changes: 14 additions & 10 deletions tests/_expected.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path

import pandas as pd
import polars as pl

TEST_DIR = Path(__file__).parent
DATA_DIR = TEST_DIR / "data"
Expand All @@ -28,15 +29,18 @@
"+--------+-----------+---------+--------+-----------+---------+",
"""

DF_OVERLAP = (mdpd.from_md(EXPECTED_OVERLAP)
.astype({'pos_start_1': 'int64'})
.astype({'pos_end_1': 'int64'})
.astype({'pos_start_2': 'int64'})
.astype({'pos_end_2': 'int64'}))
# Pandas
PD_DF_OVERLAP = (mdpd.from_md(EXPECTED_OVERLAP)
.astype({'pos_start_1': 'int64'})
.astype({'pos_end_1': 'int64'})
.astype({'pos_start_2': 'int64'})
.astype({'pos_end_2': 'int64'}))

DF_OVERLAP = DF_OVERLAP.sort_values(by=list(DF_OVERLAP.columns)).reset_index(drop=True)
PD_DF_OVERLAP = PD_DF_OVERLAP.sort_values(by=list(PD_DF_OVERLAP.columns)).reset_index(drop=True)
PD_DF1 = pd.read_csv(f"{DATA_DIR}/reads.csv")
PD_DF2 = pd.read_csv(f"{DATA_DIR}/targets.csv")



DF1 = pd.read_csv(f"{DATA_DIR}/reads.csv")
DF2 = pd.read_csv(f"{DATA_DIR}/targets.csv")
# Polars
PL_DF_OVERLAP = pl.DataFrame(PD_DF_OVERLAP)
PL_DF1 = pl.DataFrame(PD_DF1)
PL_DF2 = pl.DataFrame(PD_DF2)
14 changes: 5 additions & 9 deletions tests/test_pandas_overlap.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,17 @@
import pandas as pd

import polars_bio as pb
from _expected import DF_OVERLAP, DF1, DF2




from _expected import PD_DF_OVERLAP, PD_DF1, PD_DF2


class TestOverlapPandas:
result = pb.overlap(PD_DF1, PD_DF2, output_type="pandas.DataFrame")
def test_overlap_count(self):
assert len(pb.overlap(DF1, DF2, output_type="pandas.DataFrame")) == 16
assert len(self.result) == 16

def test_overlap_schema_rows(self):
result = pb.overlap(DF1, DF2, output_type="pandas.DataFrame")
result = result.sort_values(by=list(result.columns)).reset_index(drop=True)
expected = DF_OVERLAP
result = self.result.sort_values(by=list(self.result.columns)).reset_index(drop=True)
expected = PD_DF_OVERLAP
pd.testing.assert_frame_equal(result, expected)


Expand Down
20 changes: 19 additions & 1 deletion tests/test_polars_overlap.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,22 @@
import pandas as pd
import polars_bio.overlap as overlap
import polars_bio as pb
from _expected import PL_DF_OVERLAP, PL_DF1, PL_DF2


class TestOverlapPolars:

result_frame = pb.overlap(PL_DF1, PL_DF2, output_type="polars.DataFrame")
result_lazy = pb.overlap(PL_DF1, PL_DF2, output_type="polars.LazyFrame").collect()
expected = PL_DF_OVERLAP

def test_overlap_count(self):
assert len(self.result_frame) == 16
assert len(self.result_lazy) == 16

def test_overlap_schema_rows(self):
result = self.result_frame.sort(by=self.result_frame.columns)
assert self.expected.equals(result)

def test_overlap_schema_rows_lazy(self):
result = self.result_lazy.sort(by=self.result_lazy.columns)
assert self.expected.equals(result)

0 comments on commit 08878e2

Please sign in to comment.