From cf089ac4ff86b8790ae79ca2f6d35da233b15809 Mon Sep 17 00:00:00 2001 From: sjoshistrats Date: Sat, 16 Apr 2022 18:01:50 -0400 Subject: [PATCH 1/4] add benchmarks --- python/fastgrouper/test/test_arr.py | 25 +++---- .../test/test_grouped_benchmark.py | 66 +++++++++++++++++++ setup.py | 5 +- 3 files changed, 83 insertions(+), 13 deletions(-) create mode 100644 python/fastgrouper/test/test_grouped_benchmark.py diff --git a/python/fastgrouper/test/test_arr.py b/python/fastgrouper/test/test_arr.py index be1cd69..5c30e0d 100644 --- a/python/fastgrouper/test/test_arr.py +++ b/python/fastgrouper/test/test_arr.py @@ -1,3 +1,4 @@ +import pytest import numpy as np from numpy.testing import assert_almost_equal @@ -14,37 +15,37 @@ def foobar_op(x, y): return np.mean(x + y) def test_arr_grouped(): - + # Prepare example arr_grpr = fastgrouper.arr.Grouped(GIDS) - + # Check apply with positional args result = arr_grpr.apply(foobar_op, XVALS, YVALS) - + # Ensure returned result is a numpy array assert isinstance(result, np.ndarray) - + # Check values assert_almost_equal(result, np.array(EXPECTED_APPLY)) - + # Check values when using apply with keyword args result = arr_grpr.apply(foobar_op, XVALS, y=YVALS) assert_almost_equal(result, np.array(EXPECTED_APPLY)) - + # Check values when using apply_expand result = arr_grpr.apply_expand(foobar_op, XVALS, y=YVALS) assert_almost_equal(result, np.array(EXPECTED_APPLY_EXPAND)) - + def test_li_grouped(): - + # Prepare example li_grpr = fastgrouper.li.Grouped(GIDS) - + # Check apply with keyword args result = li_grpr.apply(foobar_op, XVALS, y=YVALS) - + # Ensure returned result is a list assert isinstance(result, list) - + # Check values - assert_almost_equal(result, EXPECTED_APPLY) \ No newline at end of file + assert_almost_equal(result, EXPECTED_APPLY) diff --git a/python/fastgrouper/test/test_grouped_benchmark.py b/python/fastgrouper/test/test_grouped_benchmark.py new file mode 100644 index 0000000..0a179f7 --- /dev/null +++ b/python/fastgrouper/test/test_grouped_benchmark.py @@ -0,0 +1,66 @@ +import pytest +import pandas as pd +import numpy as np +from numpy.testing import assert_almost_equal + +import fastgrouper.arr +import fastgrouper.li + +# Create assymetrical groups +XVALS = np.linspace(500, 1200, 50000) +YVALS = np.linspace(-230, 177.3, 50000) +GIDS = np.tile(np.arange(500), 100) +GIDS[[14, 19, 230, 87]] = 4 +GIDS[[345, 1270, 63, 1287]] = 12 + +def beepbop(x, y): + return np.min(np.abs(np.sin(x) + np.sin(y))) + +def test_fastgrouper_arr_slice_apply_benchmark(benchmark): + arr_grpr = fastgrouper.arr.Grouped(GIDS) + idx = pd.Index(arr_grpr.dedup_gids, name="gids") + + def apply_fn(): + result = arr_grpr.apply(beepbop, XVALS, YVALS) + + # Sorting to make a more fair comparison against pure pandas benchmark + return pd.Series(result, index=idx).sort_index() + + benchmark(apply_fn) + +def test_pure_pandas_slice_apply_benchmark(benchmark): + df = pd.DataFrame({ + "gids": GIDS, + "xvals": XVALS, + "yvals": YVALS + }) + pdgrpd = df.groupby("gids") + + def apply_fn(r): + return beepbop(r["xvals"].values, r["yvals"].values) + + benchmark(pdgrpd.apply, apply_fn) + +def test_fastgrouper_all_steps_benchmark(benchmark): + def apply_fn(): + arr_grpr = fastgrouper.arr.Grouped(GIDS) + idx = pd.Index(arr_grpr.dedup_gids, name="gids") + result = arr_grpr.apply(beepbop, XVALS, YVALS) + + # Sorting to make a more fair comparison against pure pandas benchmark + return pd.Series(result, index=idx).sort_index() + + benchmark(apply_fn) + +def test_pure_pandas_all_steps_benchmark(benchmark): + df = pd.DataFrame({ + "gids": GIDS, + "xvals": XVALS, + "yvals": YVALS + }) + pdgrpd = df.groupby("gids") + + def apply_fn(): + return df.groupby("gids").apply(lambda r: beepbop(r["xvals"].values, r["yvals"].values)) + + benchmark(apply_fn) diff --git a/setup.py b/setup.py index 62b316d..b9f984b 100644 --- a/setup.py +++ b/setup.py @@ -26,5 +26,8 @@ package_dir={"": "python"}, packages=setuptools.find_packages(where="python"), python_requires=">=3.6", - install_requires=["numpy", "pandas"] + install_requires=["numpy", "pandas"], + extras_require={ + "testing": ["pytest", "pytest-benchmark"] + } ) From 93d9de39d156e665b54c3c43374d86b364e72209 Mon Sep 17 00:00:00 2001 From: sjoshistrats Date: Sat, 16 Apr 2022 18:05:44 -0400 Subject: [PATCH 2/4] Bump patch, update circleci build directives. --- .circleci/config.yml | 1 + setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2cb8a33..4cbaff0 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -13,6 +13,7 @@ jobs: - run: command: | # Run tests pipenv install pytest + pipenv install pytest-benchmark pipenv run pytest python/fastgrouper/test/ -s -vv workflows: build_test: diff --git a/setup.py b/setup.py index b9f984b..12912cf 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ setuptools.setup( name="fastgrouper", - version="0.1.1", + version="0.1.2", author="Shreyas Joshi", author_email="sjoshistrats@gmail.com", description="A package for applying efficient groupby operations.", From 83b1f13bec6ecaa61360ad8bdcb1b132d99858b2 Mon Sep 17 00:00:00 2001 From: sjoshistrats Date: Sat, 16 Apr 2022 18:07:17 -0400 Subject: [PATCH 3/4] Fix build. --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 4cbaff0..7defdb8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -13,7 +13,7 @@ jobs: - run: command: | # Run tests pipenv install pytest - pipenv install pytest-benchmark + pipenv install pytest-benchmark pipenv run pytest python/fastgrouper/test/ -s -vv workflows: build_test: From dcb13e05faee83b2d652acf8ad8785b053432e00 Mon Sep 17 00:00:00 2001 From: sjoshistrats Date: Sat, 16 Apr 2022 18:09:41 -0400 Subject: [PATCH 4/4] Rename test. --- python/fastgrouper/test/{test_arr.py => test_grouped.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename python/fastgrouper/test/{test_arr.py => test_grouped.py} (100%) diff --git a/python/fastgrouper/test/test_arr.py b/python/fastgrouper/test/test_grouped.py similarity index 100% rename from python/fastgrouper/test/test_arr.py rename to python/fastgrouper/test/test_grouped.py