Skip to content

compat: Spatialpandas with dask-expr #6503

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Feb 14, 2025
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions holoviews/core/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1269,3 +1269,7 @@ def ndloc(self):
dataset.ndloc[[1, 2, 3], [0, 2, 3]]
"""
return ndloc(self)

def __dask_tokenize__(self):
from dask.base import normalize_token
return normalize_token(type(self)), self.data
40 changes: 3 additions & 37 deletions holoviews/tests/core/data/test_daskinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,18 @@
from holoviews.core.util import PANDAS_VERSION
from holoviews.util.transform import dim

from ...utils import DASK_VERSION, dask_setup, dask_switcher
from .test_pandasinterface import BasePandasInterfaceTests

classic, expr = dask_setup()


class _DaskDatasetTest(BasePandasInterfaceTests):
class DaskDatasetTest(BasePandasInterfaceTests):
"""
Test of the pandas DaskDataset interface.
"""

datatype = 'dask'
data_type = dd.DataFrame

__test__ = False
__test__ = True

# Disabled tests for NotImplemented methods
def test_dataset_add_dimensions_values_hm(self):
Expand Down Expand Up @@ -130,42 +128,10 @@
self.assertIsInstance(new_ds.data, dd.DataFrame)
self.assertEqual(new_ds.data.compute(), df[df.b == 10])


class DaskClassicDatasetTest(_DaskDatasetTest):

# No longer supported from Dask 2025.1

data_type = getattr(dd.core, "DataFrame", None)

__test__ = classic

@dask_switcher(query=False)
def setUp(self):
return super().setUp()


class DaskExprDatasetTest(_DaskDatasetTest):

__test__ = expr

@property
def data_type(self):
# Only available from 2025.1 and forward
if DASK_VERSION >= (2025, 1, 0):
return dd.DataFrame
else:
import dask_expr

return dask_expr.DataFrame

@dask_switcher(query=True)
def setUp(self):
return super().setUp()

def test_dataset_groupby(self):
# Dask-expr unique sort the order when running unique on column
super().test_dataset_groupby(sort=True)

Check failure on line 133 in holoviews/tests/core/data/test_daskinterface.py

View workflow job for this annotation

GitHub Actions / unit:test-39:ubuntu-latest

DaskDatasetTest.test_dataset_groupby AssertionError: Dataset not of matching length, 2 vs. 1.

Check failure on line 133 in holoviews/tests/core/data/test_daskinterface.py

View workflow job for this annotation

GitHub Actions / unit:test-39:macos-latest

DaskDatasetTest.test_dataset_groupby AssertionError: Dataset not of matching length, 2 vs. 1.

def test_dataset_groupby_alias(self):
# Dask-expr unique sort the order when running unique on column
super().test_dataset_groupby_alias(sort=True)

Check failure on line 137 in holoviews/tests/core/data/test_daskinterface.py

View workflow job for this annotation

GitHub Actions / unit:test-39:ubuntu-latest

DaskDatasetTest.test_dataset_groupby_alias AssertionError: Dataset not of matching length, 2 vs. 1.

Check failure on line 137 in holoviews/tests/core/data/test_daskinterface.py

View workflow job for this annotation

GitHub Actions / unit:test-39:macos-latest

DaskDatasetTest.test_dataset_groupby_alias AssertionError: Dataset not of matching length, 2 vs. 1.
2 changes: 0 additions & 2 deletions holoviews/tests/core/data/test_spatialpandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
from holoviews.element import Path, Points, Polygons
from holoviews.element.comparison import ComparisonTestCase

from ...utils import dask_switcher
from .test_multiinterface import GeomTests


Expand Down Expand Up @@ -260,7 +259,6 @@ class DaskSpatialPandasTest(GeomTests, RoundTripTests):

__test__ = True

@dask_switcher(query=False, extras=["spatialpandas.dask"])
def setUp(self):
if spatialpandas is None:
raise SkipTest('DaskSpatialPandasInterface requires spatialpandas, skipping tests')
Expand Down
7 changes: 2 additions & 5 deletions holoviews/tests/element/test_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@
from holoviews.element.selection import spatial_select_columnar
from holoviews.util.transform import dim

from ..utils import dask_switcher

try:
import datashader as ds
except ImportError:
Expand Down Expand Up @@ -684,10 +682,9 @@ def pandas_df(self):
}, dtype=float)


@pytest.fixture(scope="function", params=[pytest.param(True, id='dask-classic'), pytest.param(False, id='dask-expr')])
@pytest.fixture(scope="function")
def dask_df(self, pandas_df, request):
with dask_switcher(query=request.param):
return dd.from_pandas(pandas_df, npartitions=2)
return dd.from_pandas(pandas_df, npartitions=2)

@pytest.fixture(scope="function")
def _method(self):
Expand Down
67 changes: 0 additions & 67 deletions holoviews/tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,9 @@
import logging
import os
import sys
from contextlib import contextmanager, suppress
from functools import lru_cache
from importlib import reload
from importlib.util import find_spec

import param
import pytest

from holoviews.core.util import _no_import_version
from holoviews.element.comparison import ComparisonTestCase

cwd = os.path.abspath(os.path.split(__file__)[0])
Expand Down Expand Up @@ -117,64 +111,3 @@ def tearDown(self):
for level, msgs in messages.items():
for msg in msgs:
log.log(LEVELS[level], msg)


DASK_UNAVAILABLE = find_spec("dask") is None
DASK_VERSION = _no_import_version("dask")


@lru_cache
def dask_setup():
"""
Set-up both dask dataframes, using lru_cahce to only do it once

"""
from datashader.data_libraries.dask import bypixel, dask_pipeline

classic, expr = False, False

# Removed in Dask 2025.1, and will raise AttributeError
if DASK_VERSION < (2025, 1, 0):
import dask.dataframe as dd

bypixel.pipeline.register(dd.core.DataFrame)(dask_pipeline)
classic = True
else:
# dask_expr import below will now fail with:
# cannot import name '_Frame' from 'dask.dataframe.core'
expr = True

with suppress(ImportError):
import dask_expr

bypixel.pipeline.register(dask_expr.DataFrame)(dask_pipeline)
expr = True

return classic, expr


@contextmanager
def dask_switcher(*, query=False, extras=None):
"""
Context manager to switch on/off dask-expr query planning.

Using a context manager as it is an easy way to
change the function to a decorator.
"""
if DASK_UNAVAILABLE:
pytest.skip("dask is not available")

classic, expr = dask_setup()

if not query and not classic:
pytest.skip("Classic DataFrame no longer supported by dask")
if query and not expr:
pytest.skip("dask-expr is not available")

import dask

dask.config.set(**{"dataframe.query-planning": query})
for module in ("dask.dataframe", *(extras or ())):
if module in sys.modules:
reload(sys.modules[module])
yield
3 changes: 1 addition & 2 deletions pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ COVERAGE_CORE = "sysmon"
cftime = "*"
contourpy = "*"
dask-core = "*"
dask-expr = "<2"
datashader = ">=0.11.1"
ffmpeg = "*"
ibis-sqlite = "*"
Expand All @@ -119,7 +118,7 @@ netcdf4 = "*"
networkx = "*"
notebook = "*"
pillow = "*"
plotly = ">=4.0"
plotly = ">=4.0,<6" # TODO: Remove upperpin
pooch = "*"
pyarrow = "*"
scikit-image = "*"
Expand Down
Loading