diff --git a/tests/_expected.py b/tests/_expected.py index 05fa6ea..57e8cb6 100644 --- a/tests/_expected.py +++ b/tests/_expected.py @@ -42,6 +42,10 @@ PD_DF1 = pd.read_csv(DF_PATH1) PD_DF2 = pd.read_csv(DF_PATH2) +BIO_PD_DF1 = pd.read_parquet(f"{DATA_DIR}/exons/") +BIO_PD_DF2 = pd.read_parquet(f"{DATA_DIR}/fBrain-DS14718/") + + # Polars PL_DF_OVERLAP = pl.DataFrame(PD_DF_OVERLAP) PL_DF1 = pl.DataFrame(PD_DF1) diff --git a/tests/test_bioframe.py b/tests/test_bioframe.py index e69de29..1d0ad28 100644 --- a/tests/test_bioframe.py +++ b/tests/test_bioframe.py @@ -0,0 +1,22 @@ +import bioframe as bf +import pandas as pd +import polars_bio as pb + +from _expected import BIO_PD_DF1, BIO_PD_DF2 +from polars_bio import OverlapFilter + +class TestOverlapBioframe: + result = pb.overlap(BIO_PD_DF1, BIO_PD_DF2, output_type="pandas.DataFrame", overlap_filter=OverlapFilter.Strict) + result_bio = bf.overlap(BIO_PD_DF1, BIO_PD_DF2, + cols1=('contig','pos_start','pos_end'), + cols2=('contig','pos_start','pos_end'), + suffixes=('_1', '_2'), + how="inner") + def test_overlap_count(self): + assert len(self.result) == 54246 + assert len(self.result) == len(self.result_bio) + + def test_overlap_schema_rows(self): + expected = self.result_bio.sort_values(by=list(self.result.columns)).reset_index(drop=True) + result = self.result.sort_values(by=list(self.result.columns)).reset_index(drop=True) + pd.testing.assert_frame_equal(result, expected) \ No newline at end of file