Skip to content

Commit 2ea4fd0

Browse files
committed
Fix indexing tests
1 parent b41d983 commit 2ea4fd0

File tree

2 files changed

+51
-12
lines changed

2 files changed

+51
-12
lines changed

holoviews/core/data/pandas.py

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import pandas as pd
33
from packaging.version import Version
44
from pandas.api.types import is_numeric_dtype
5+
from pandas.core.dtypes.common import ensure_int64
56

67
from .. import util
78
from ..dimension import Dimension, dimension_name
@@ -351,11 +352,36 @@ def sort(cls, dataset, by=None, reverse=False):
351352
return dataset.data.sort(columns=cols, ascending=not reverse)
352353
return dataset.data.sort_values(by=cols, ascending=not reverse)
353354

355+
@classmethod
356+
def sorted_index(cls, df):
357+
if hasattr(df.index, 'is_lexsorted'):
358+
return df.index.is_lexsorted()
359+
return df.index.is_monotonic_increasing
360+
361+
@classmethod
362+
def sort_depth(cls, df):
363+
try:
364+
from pandas._libs.algos import is_lexsorted
365+
except Exception:
366+
return 0
367+
int64_codes = [ensure_int64(level_codes) for level_codes in df.index.codes]
368+
for k in range(df.index.nlevels, 0, -1):
369+
if is_lexsorted(int64_codes[:k]):
370+
return k
371+
return 0
372+
354373
@classmethod
355374
def index_selection(cls, df, selection):
375+
indexes = cls.indexes(df)
376+
nindex = len(indexes)
377+
sorted_index = cls.sorted_index(df)
378+
if sorted_index:
379+
depth = df.index.nlevels
380+
else:
381+
depth = cls.sort_depth(df)
356382
index_sel = {}
357383
skip_index = True
358-
for idx in cls.indexes(df):
384+
for level, idx in enumerate(indexes):
359385
if idx not in selection:
360386
index_sel[idx] = slice(None, None)
361387
continue
@@ -365,18 +391,26 @@ def index_selection(cls, df, selection):
365391
sel = slice(*sel)
366392
elif not isinstance(sel, (list, slice)):
367393
sel = [sel]
394+
if isinstance(sel, slice) and nindex > 1 and not sorted_index and level>depth:
395+
# If the index is not monotonic we cannot slice
396+
return {}
368397
index_sel[idx] = sel
398+
print(index_sel)
369399
return {} if skip_index else index_sel
370400

371401
@classmethod
372402
def select(cls, dataset, selection_mask=None, **selection):
373403
df = dataset.data
374404
if selection_mask is None:
375405
if index_sel:= cls.index_selection(df, selection):
376-
if len(index_sel) == 1:
377-
df = df[next(iter(index_sel.values()))]
378-
else:
379-
df = df.loc[tuple(index_sel.values()), :]
406+
try:
407+
if len(index_sel) == 1:
408+
df = df[next(iter(index_sel.values()))]
409+
else:
410+
df = df.loc[tuple(index_sel.values()), :]
411+
except KeyError:
412+
# If index lookup fails we fall back to boolean indexing
413+
index_sel = {}
380414
column_sel = {k: v for k, v in selection.items() if k not in index_sel}
381415
if column_sel:
382416
selection_mask = cls.select_mask(dataset, column_sel)

holoviews/tests/core/data/test_pandasinterface.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,13 @@ def test_index_aggregate(self):
222222
agg = ds.aggregate("number", function=np.mean, spreadfn=np.var)
223223
pd.testing.assert_frame_equal(agg.data, expected)
224224

225+
def test_index_select_monotonic(self):
226+
ds = Dataset(self.df, kdims=["number", "color"])
227+
selected = ds.select(number=1)
228+
expected = pd.DataFrame({'color': ['red', 'blue'], 'values': [0, 1], 'number': [1, 1]}).set_index(['number', 'color'])
229+
assert isinstance(selected.data.index, pd.MultiIndex)
230+
pd.testing.assert_frame_equal(selected.data, expected)
231+
225232
def test_index_select(self):
226233
ds = Dataset(self.df, kdims=["number", "color"])
227234
selected = ds.select(number=1)
@@ -307,31 +314,29 @@ def test_sort(self):
307314
np.testing.assert_array_equal(sorted_ds.dimension_values("values"), [1, 3, 0, 2])
308315
np.testing.assert_array_equal(sorted_ds.dimension_values("number"), [1, 2, 1, 2])
309316

310-
def test_select(self):
311-
ds = Dataset(self.df, kdims=["number", "color"])
317+
def test_select_monotonic(self):
318+
ds = Dataset(self.df.sort_index(), kdims=["number", "color"])
312319
selected = ds.select(color="red")
313320
pd.testing.assert_frame_equal(selected.data, self.df.iloc[[0, 2], :])
314321

315322
selected = ds.select(number=1, color='red')
316323
assert selected == 0
317324

318-
@pytest.mark.xfail(reason="Not working")
319325
def test_select_not_monotonic(self):
320326
frame = pd.DataFrame({"number": [1, 1, 2, 2], "color": [2, 1, 2, 1]})
321327
index = pd.MultiIndex.from_frame(frame, names=frame.columns)
322328
df = pd.DataFrame(range(4), index=index, columns=["values"])
323329
ds = Dataset(df, kdims=list(frame.columns))
324330

325331
data = ds.select(color=slice(2, 3)).data
326-
expected = pd.DataFrame({"number": [1, 2], "color": [2, 2], "values": [1, 3]}).set_index(['number', 'color'])
332+
expected = pd.DataFrame({"number": [1, 2], "color": [2, 2], "values": [0, 2]}).set_index(['number', 'color'])
327333
pd.testing.assert_frame_equal(data, expected)
328334

329-
@pytest.mark.xfail(reason="Not working")
330335
def test_select_not_in_index(self):
331336
ds = Dataset(self.df, kdims=["number", "color"])
332337
selected = ds.select(number=[2, 3])
333-
expected = ds.select(number=2)
334-
pd.testing.assert_frame_equal(selected.data, expected.data)
338+
expected = self.df.loc[[2]]
339+
pd.testing.assert_frame_equal(selected.data, expected)
335340

336341
def test_sample(self):
337342
ds = Dataset(self.df, kdims=["number", "color"])

0 commit comments

Comments
 (0)