Skip to content

Commit

Permalink
ENH: consider native chunking in hdf5
Browse files Browse the repository at this point in the history
  • Loading branch information
genematx committed Feb 14, 2025
1 parent 1a904d9 commit 774c8ad
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions tiled/adapters/hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,15 +329,16 @@ def _get_hdf5_specs(
) -> Tuple[Tuple[int, ...], numpy.dtype]:
with h5py.File(fpath, "r", swmr=swmr, libver=libver) as f:
f = f[dataset] if dataset else f
return f.shape, f.dtype
return f.shape, f.chunks, f.dtype

# Need to know shapes/dtypes of constituent arrays to load them lazily
shapes_dtypes = [_get_hdf5_specs(fpath) for fpath in file_paths]
shapes_chunks_dtypes = [_get_hdf5_specs(fpath) for fpath in file_paths]
delayed = [dask.delayed(_read_hdf5_array)(fpath) for fpath in file_paths]
arrs = [
dask.array.from_delayed(val, shape=shape, dtype=dtype)
for (val, (shape, dtype)) in zip(delayed, shapes_dtypes)
for (val, (shape, chunk_shape, dtype)) in zip(delayed, shapes_chunks_dtypes)
]
# TODO: Rechunk?
array = dask.array.concatenate(arrs, axis=0)

return array
Expand Down

0 comments on commit 774c8ad

Please sign in to comment.