Skip to content

Commit

Permalink
Allow (local) pathlib input to HDF
Browse files Browse the repository at this point in the history
  • Loading branch information
martindurant committed Oct 23, 2024
1 parent 09d7a92 commit 7106bf8
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
3 changes: 2 additions & 1 deletion kerchunk/hdf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import base64
import io
import logging
import pathlib
from typing import Union, BinaryIO

import fsspec.core
Expand Down Expand Up @@ -91,7 +92,7 @@ def __init__(

# Open HDF5 file in read mode...
lggr.debug(f"HDF5 file: {h5f}")
if isinstance(h5f, str):
if isinstance(h5f, (pathlib.Path, str)):
fs, path = fsspec.core.url_to_fs(h5f, **(storage_options or {}))
self.input_file = fs.open(path, "rb")
url = h5f
Expand Down
15 changes: 15 additions & 0 deletions kerchunk/tests/test_hdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,21 @@ def test_string_embed():
assert (z.vlen_str[1:] == "").all()


def test_string_pathlib():
# essentially copied test above
import pathlib

fn = osp.join(here, "vlen.h5")
h = kerchunk.hdf.SingleHdf5ToZarr(pathlib.Path(fn), vlen_encode="embed")
out = h.translate()
fs = fsspec.filesystem("reference", fo=out)
assert txt in fs.references["vlen_str/0"]
z = zarr.open(fs.get_mapper())
assert z.vlen_str.dtype == "O"
assert z.vlen_str[0] == txt
assert (z.vlen_str[1:] == "").all()


def test_string_null():
fn = osp.join(here, "vlen.h5")
h = kerchunk.hdf.SingleHdf5ToZarr(fn, fn, vlen_encode="null", inline_threshold=0)
Expand Down

0 comments on commit 7106bf8

Please sign in to comment.