Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor of LH5 I/O routines, deprecation of existing methods #24

Merged
merged 13 commits into from
Nov 24, 2023
2 changes: 1 addition & 1 deletion docs/source/extensions/numbadoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def import_object(self) -> bool:
"""
success = super().import_object()
if success:
# Store away numba wrapper
# store away numba wrapper
self.jitobj = self.object
# And bend references to underlying python function
if hasattr(self.object, "py_func"):
Expand Down
44 changes: 21 additions & 23 deletions docs/source/notebooks/DataCompression.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@
"metadata": {},
"outputs": [],
"source": [
"store = lgdo.LH5Store()\n",
"store.write_object(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n",
"store = lgdo.lh5.LH5Store()\n",
"store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n",
"lgdo.show(\"data.lh5\")"
]
},
Expand Down Expand Up @@ -110,7 +110,7 @@
"metadata": {},
"outputs": [],
"source": [
"lgdo.lh5_store.DEFAULT_HDF5_SETTINGS"
"lgdo.lh5.store.DEFAULT_HDF5_SETTINGS"
]
},
{
Expand All @@ -131,18 +131,18 @@
"outputs": [],
"source": [
"# use another built-in filter\n",
"lgdo.lh5_store.DEFAULT_HDF5_SETTINGS = {\"compression\": \"lzf\"}\n",
"lgdo.lh5.store.DEFAULT_HDF5_SETTINGS = {\"compression\": \"lzf\"}\n",
"\n",
"# specify filter name and options\n",
"lgdo.lh5_store.DEFAULT_HDF5_SETTINGS = {\"compression\": \"gzip\", \"compression_opts\": 7}\n",
"lgdo.lh5.store.DEFAULT_HDF5_SETTINGS = {\"compression\": \"gzip\", \"compression_opts\": 7}\n",
"\n",
"# specify a registered filter provided by hdf5plugin\n",
"import hdf5plugin\n",
"\n",
"lgdo.lh5_store.DEFAULT_HDF5_SETTINGS = {\"compression\": hdf5plugin.Blosc()}\n",
"lgdo.lh5.store.DEFAULT_HDF5_SETTINGS = {\"compression\": hdf5plugin.Blosc()}\n",
"\n",
"# shuffle bytes before compressing (typically better compression ratio with no performance penalty)\n",
"lgdo.lh5_store.DEFAULT_HDF5_SETTINGS = {\"shuffle\": True, \"compression\": \"lzf\"}"
"lgdo.lh5.store.DEFAULT_HDF5_SETTINGS = {\"shuffle\": True, \"compression\": \"lzf\"}"
]
},
{
Expand All @@ -166,7 +166,7 @@
"metadata": {},
"outputs": [],
"source": [
"store.write_object(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n",
"store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n",
"show_h5ds_opts(\"data/col1\")"
]
},
Expand All @@ -175,7 +175,7 @@
"id": "f597a9e2",
"metadata": {},
"source": [
"Nice. Shuffling bytes before compressing significantly reduced size on disk. Last but not least, `create_dataset()` keyword arguments can be passed to `write_object()`. They will be forwarded as is, overriding default settings."
"Nice. Shuffling bytes before compressing significantly reduced size on disk. Last but not least, `create_dataset()` keyword arguments can be passed to `write()`. They will be forwarded as is, overriding default settings."
]
},
{
Expand All @@ -185,9 +185,7 @@
"metadata": {},
"outputs": [],
"source": [
"store.write_object(\n",
" data, \"data\", \"data.lh5\", wo_mode=\"of\", shuffle=True, compression=\"gzip\"\n",
")\n",
"store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\", shuffle=True, compression=\"gzip\")\n",
"show_h5ds_opts(\"data/col1\")"
]
},
Expand All @@ -207,7 +205,7 @@
"outputs": [],
"source": [
"data[\"col2\"].attrs[\"hdf5_settings\"] = {\"compression\": \"gzip\"}\n",
"store.write_object(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n",
"store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n",
"\n",
"show_h5ds_opts(\"data/col1\")\n",
"show_h5ds_opts(\"data/col2\")"
Expand All @@ -221,7 +219,7 @@
"We are now storing table columns with different compression settings.\n",
"\n",
"<div class=\"alert alert-info\">\n",
"**Note:** since any [h5py.Group.create_dataset()](https://docs.h5py.org/en/stable/high/group.html#h5py.Group.create_dataset) keyword argument can be used in `write_object()` or set in the `hdf5_settings` attribute, other HDF5 dataset settings can be configured, like the chunk size.\n",
"**Note:** since any [h5py.Group.create_dataset()](https://docs.h5py.org/en/stable/high/group.html#h5py.Group.create_dataset) keyword argument can be used in `write()` or set in the `hdf5_settings` attribute, other HDF5 dataset settings can be configured, like the chunk size.\n",
"</div>"
]
},
Expand All @@ -232,7 +230,7 @@
"metadata": {},
"outputs": [],
"source": [
"store.write_object(data, \"data\", \"data.lh5\", wo_mode=\"of\", chunks=2)"
"store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\", chunks=2)"
]
},
{
Expand All @@ -257,7 +255,7 @@
"from legendtestdata import LegendTestData\n",
"\n",
"ldata = LegendTestData()\n",
"wfs, n_rows = store.read_object(\n",
"wfs, n_rows = store.read(\n",
" \"geds/raw/waveform\",\n",
" ldata.get_path(\"lh5/LDQTA_r117_20200110T105115Z_cal_geds_raw.lh5\"),\n",
")\n",
Expand Down Expand Up @@ -347,7 +345,7 @@
" t0=wfs.t0,\n",
" dt=wfs.dt,\n",
")\n",
"store.write_object(enc_wfs, \"waveforms\", \"data.lh5\", wo_mode=\"o\")\n",
"store.write(enc_wfs, \"waveforms\", \"data.lh5\", wo_mode=\"o\")\n",
"lgdo.show(\"data.lh5\", attrs=True)"
]
},
Expand All @@ -372,7 +370,7 @@
"metadata": {},
"outputs": [],
"source": [
"obj, _ = store.read_object(\"waveforms\", \"data.lh5\")\n",
"obj, _ = store.read(\"waveforms\", \"data.lh5\")\n",
"obj.values"
]
},
Expand All @@ -391,7 +389,7 @@
"metadata": {},
"outputs": [],
"source": [
"obj, _ = store.read_object(\"waveforms\", \"data.lh5\", decompress=False)\n",
"obj, _ = store.read(\"waveforms\", \"data.lh5\", decompress=False)\n",
"obj.values"
]
},
Expand Down Expand Up @@ -433,9 +431,9 @@
"from lgdo.compression import ULEB128ZigZagDiff\n",
"\n",
"wfs.values.attrs[\"compression\"] = ULEB128ZigZagDiff()\n",
"store.write_object(wfs, \"waveforms\", \"data.lh5\", wo_mode=\"of\")\n",
"store.write(wfs, \"waveforms\", \"data.lh5\", wo_mode=\"of\")\n",
"\n",
"obj, _ = store.read_object(\"waveforms\", \"data.lh5\", decompress=False)\n",
"obj, _ = store.read(\"waveforms\", \"data.lh5\", decompress=False)\n",
"obj.values.attrs[\"codec\"]"
]
},
Expand All @@ -447,8 +445,8 @@
"Further reading:\n",
"\n",
"- [Available waveform compression algorithms](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.compression.html)\n",
"- [read_object() docstring](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Store.read_object)\n",
"- [write_object() docstring](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Store.write_object)"
"- [read() docstring](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5.store.LH5Store.read)\n",
"- [write() docstring](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Store.write)"
]
}
],
Expand Down
22 changes: 9 additions & 13 deletions docs/source/notebooks/LH5Files.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"id": "c136b537",
"metadata": {},
"source": [
"We can use `lgdo.lh5_store.ls()` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.ls) to inspect the file contents:"
"We can use `lgdo.lh5.ls()` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5.ls) to inspect the file contents:"
]
},
{
Expand Down Expand Up @@ -131,7 +131,7 @@
"metadata": {},
"outputs": [],
"source": [
"store.read_object(\"geds/raw\", lh5_file)"
"store.read(\"geds/raw\", lh5_file)"
]
},
{
Expand All @@ -149,7 +149,7 @@
"metadata": {},
"outputs": [],
"source": [
"obj, n_rows = store.read_object(\"geds/raw/timestamp\", lh5_file)\n",
"obj, n_rows = store.read(\"geds/raw/timestamp\", lh5_file)\n",
"obj"
]
},
Expand All @@ -170,7 +170,7 @@
"metadata": {},
"outputs": [],
"source": [
"obj, n_rows = store.read_object(\"geds/raw/timestamp\", lh5_file, start_row=15, n_rows=10)\n",
"obj, n_rows = store.read(\"geds/raw/timestamp\", lh5_file, start_row=15, n_rows=10)\n",
"print(obj)"
]
},
Expand All @@ -189,7 +189,7 @@
"metadata": {},
"outputs": [],
"source": [
"obj, n_rows = store.read_object(\n",
"obj, n_rows = store.read(\n",
" \"geds/raw\", lh5_file, field_mask=(\"timestamp\", \"energy\"), idx=[1, 3, 7, 9, 10, 15]\n",
")\n",
"print(obj)"
Expand All @@ -200,7 +200,7 @@
"id": "b3f52d77",
"metadata": {},
"source": [
"As you might have noticed, `read_object()` loads all the requested data in memory at once. This can be a problem when dealing with large datasets. `LH5Iterator` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Iterator) makes it possible to handle data one chunk at a time (sequentially) to avoid running out of memory:"
"As you might have noticed, `read_object()` loads all the requested data in memory at once. This can be a problem when dealing with large datasets. `LH5Iterator` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5.iterator.LH5Iterator) makes it possible to handle data one chunk at a time (sequentially) to avoid running out of memory:"
]
},
{
Expand Down Expand Up @@ -260,9 +260,7 @@
"source": [
"store = LH5Store()\n",
"\n",
"store.write_object(\n",
" scalar, name=\"message\", lh5_file=\"my_objects.lh5\", wo_mode=\"overwrite_file\"\n",
")"
"store.write(scalar, name=\"message\", lh5_file=\"my_objects.lh5\", wo_mode=\"overwrite_file\")"
]
},
{
Expand Down Expand Up @@ -300,10 +298,8 @@
"metadata": {},
"outputs": [],
"source": [
"store.write_object(array, name=\"numbers\", group=\"closet\", lh5_file=\"my_objects.lh5\")\n",
"store.write_object(\n",
" wf_table, name=\"waveforms\", group=\"closet\", lh5_file=\"my_objects.lh5\"\n",
")\n",
"store.write(array, name=\"numbers\", group=\"closet\", lh5_file=\"my_objects.lh5\")\n",
"store.write(wf_table, name=\"waveforms\", group=\"closet\", lh5_file=\"my_objects.lh5\")\n",
"show(\"my_objects.lh5\")"
]
},
Expand Down
4 changes: 2 additions & 2 deletions src/lgdo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,11 @@
"VectorOfVectors",
"VectorOfEncodedVectors",
"WaveformTable",
"LH5Iterator",
"LH5Store",
"load_dfs",
"load_nda",
"ls",
"show",
"LH5Iterator",
"LH5Store",
"__version__",
]
2 changes: 1 addition & 1 deletion src/lgdo/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


def lh5ls():
""":func:`.show` command line interface."""
""":func:`.lh5.show` command line interface."""
parser = argparse.ArgumentParser(
prog="lh5ls", description="Inspect LEGEND HDF5 (LH5) file contents"
)
Expand Down
Loading
Loading